celery 调用scrapy

　　我的环境： celery 3.1.25 python 3.6.9 window10

celery tasks 代码如下，其中 QuotesSpider 是我的scrapy项目爬虫类名称

from celery_app import app

from scrapy.crawler import CrawlerProcess

from scrapy.utils.project import get_project_settings

from tutorial.spiders.quotes import QuotesSpider

def crawl_run():

    scope = 'all'

    process = CrawlerProcess(settings=get_project_settings())

    process.crawl(QuotesSpider, scope)

    process.start()

    process.join()

@app.task(queue='default')

def execute_task():

    return crawl_run()


后面发现这样写重复做定时任务的时候会报错，报reactor不能重启的问题，改成下面这样就解决了，这个类要放在和项目scrapy.cfg同级目录下

from crawler.tutorial.crawler.tutorial.spiders.quotes import QuotesSpider

from scrapy.utils.project import get_project_settings

import scrapy.crawler as crawler

from crochet import setup

setup()

import os

class Scraper():

    def crawl_run(self):

        spider = QuotesSpider()

        settings = get_project_settings()

        runner = crawler.CrawlerRunner(settings)

        runner.crawl(spider, 'all')

        runner.join()

if __name__ == '__main__':

    scraper = Scraper()

    scraper.crawl_run()

巴特西

celery 调用scrapy

最新文章

热门文章