scrapy 采集数据存入excel

# -*- coding: utf-8 -*-

# Define your item pipelines here

#

# Don't forget to add your pipeline to the ITEM_PIPELINES setting

# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html

import csv

import xlwt

class GanjiPipeline(object):

    def __init__(self):

        self.line = 1

        self.file_name = "赶集.xls"

        self.book = xlwt.Workbook(encoding="utf8")

        self.sheet = self.book.add_sheet("租房数据")

        self.sheet.col(0).width = 256 * 70  # Set the column w

        self.sheet.col(1).width = 256 * 10

        self.sheet.col(2).width = 256 * 10

        self.sheet.col(3).width = 256 * 10

        self.sheet.col(4).width = 256 * 50

        self.tall_style = xlwt.easyxf('font:height 300')

        first_row = self.sheet.row(0)

        first_row.set_style(self.tall_style)

        head = ["标题","价格","面积","朝向","地址"]

        for h in head:

            self.sheet.write(0,head.index(h),h)

    def process_item(self, item, spider):

        self.sheet.write(self.line, 0, item['title'])

        self.sheet.write(int(self.line), 1, item['price'])

        self.sheet.write(int(self.line), 2, item['size'])

        self.sheet.write(int(self.line), 3, item['chaoxiang'])

        self.sheet.write(int(self.line), 4, item['address'])

        self.sheet.row(self.line).set_style(self.tall_style)

        self.line = self.line + 1

    def close_spider(self,spider):

        self.book.save(self.file_name)

    def optimizeContent(self,res):

        res = res.replace('b\'', '')

        res = res.replace('\\n', '')

        res = res.replace('\'', '')

        res = res.replace('style', 'nouse')

        res = res.replace('\.', '')

        return res
巴特西

scrapy 采集数据存入excel

最新文章

热门文章