scrapy 采集数据存入excel
2024-09-02 01:32:19
# -*- coding: utf-8 -*- # Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import csv
import xlwt class GanjiPipeline(object): def __init__(self):
self.line = 1
self.file_name = "赶集.xls"
self.book = xlwt.Workbook(encoding="utf8")
self.sheet = self.book.add_sheet("租房数据")
self.sheet.col(0).width = 256 * 70 # Set the column w
self.sheet.col(1).width = 256 * 10
self.sheet.col(2).width = 256 * 10
self.sheet.col(3).width = 256 * 10
self.sheet.col(4).width = 256 * 50 self.tall_style = xlwt.easyxf('font:height 300')
first_row = self.sheet.row(0)
first_row.set_style(self.tall_style) head = ["标题","价格","面积","朝向","地址"]
for h in head:
self.sheet.write(0,head.index(h),h) def process_item(self, item, spider):
self.sheet.write(self.line, 0, item['title'])
self.sheet.write(int(self.line), 1, item['price'])
self.sheet.write(int(self.line), 2, item['size'])
self.sheet.write(int(self.line), 3, item['chaoxiang'])
self.sheet.write(int(self.line), 4, item['address'])
self.sheet.row(self.line).set_style(self.tall_style)
self.line = self.line + 1 def close_spider(self,spider):
self.book.save(self.file_name) def optimizeContent(self,res):
res = res.replace('b\'', '')
res = res.replace('\\n', '')
res = res.replace('\'', '')
res = res.replace('style', 'nouse')
res = res.replace('\.', '')
return res
最新文章
- CSS3 基于关系的选择器
- REDHAT一总复习1 禁用颜色
- python ide ---wing 注册机
- 修改oracle字符集
- USACO Section 3.1: Contact
- js获取元素transform参数得出的个人理解
- JavaScript学习总结【6】、JS BOM
- Qt入门(16)——组装窗口部件
- Android --Vibrator--震动服务
- 跨主机网络概述 - 每天5分钟玩转 Docker 容器技术(48)
- 【微软大法好】VS Tools for AI全攻略
- Imcash:一边大裁员,一边大扩招,你能否成为区块链人才中的7%?
- TCP的三次握手过程与四次挥手
- 如何高效地遍历 MongoDB 超大集合?
- redis集群部署+节点端口修改+数据恢复
- C语言权威指南和书单 - 专家级别
- redis sentinel集群的搭建
- [整]Android开发优化-布局优化
- 牛客网-《剑指offer》-斐波那契数列
- 把握这两点,抢占下一个电商风口|2016最新中国电商App排名&;研究报告