#!/usr/bin/env python3

import pymongo
import datetime
import os yesterday = str(datetime.date.today() - datetime.timedelta(days=1)) def mongo2csv(db_name, ts_string=yesterday, column_handle=None, column_delete=None):
''' :param db_name: 表名
:param ts_string: 日期
:param column_handle: 需要处理的列,
格式{k:v} ,
k:需要处理的列,
v:1.数字,即列表索引,取列表的第几个元素
2.字符串,即字典key,取子字典的某个value
3.join ,即需要将列表转为字符串 :param column_delete: 需要删除的列
:return:
'''
print('starting...')
host = '127.0.0.1'
port = 5600
user = 'root'
password = ''
url = 'mongodb://' + user + ':' + password + '@' + host + ':' + str(port) + '/'
client = pymongo.MongoClient(url)
db = client.rental
collection = db[db_name]
select_condition = {'_id': 0, 'ts': 0}
if not column_delete:
column_delete = []
if not column_handle:
column_handle = {}
if len(column_delete) > 0:
for temp in column_delete:
select_condition[temp] = 0
title_dic = collection.find_one({'ts_string': ts_string}, select_condition)
title = sorted(title_dic.keys())
con = collection.find({'ts_string': ts_string}, select_condition)
file_name = db_name + '_' + yesterday + '.csv'
with open(file_name, 'a', encoding='utf8') as f:
f.write(','.join(title) + '\n')
for item in con:
for k, v in column_handle.items():
item[k] = item[k][v] if v != 'join' else (
'|'.join([str(x) for x in item[k]]) if isinstance(item[k], list) else item[k])
f.write(','.join([str(item[x]) for x in title]) + '\n') if __name__ == '__main__':
mongo2csv('lianjia_detail')
mongo2csv('mogu_detail', column_handle={'metroInfo': 0, 'rentType': 'value'})
mongo2csv('qingke_detail', column_handle={})
mongo2csv('xiangyu_detail', column_delete=['endDate','vacantStartDate','tabList','vacantEndDate'])
mongo2csv('ziru_detail', column_handle={'subway_line_code': 'join', 'subway_station_code': 'join'})
print(os.system('wc -l *.csv'))

最新文章

  1. c#解析xml
  2. PKU 1003解题
  3. 2015baidu复赛2 连接的管道(mst && 优先队列prim)
  4. Knockout 新版应用开发教程之创建view models与监控属性
  5. 无线路由器WDS简要
  6. Thread类详解
  7. C# HttpWebRequest类
  8. 51nod1240莫比乌斯函数
  9. UVA 11090 Going in Cycle!!
  10. unable to find valid certification path to requested target
  11. /调整button的title的位置
  12. vue+Element实现静态旅游网站
  13. Docker容器访问控制
  14. sql字符串包含单引号
  15. B. Creating the Contest(水题)
  16. BABOK概述
  17. raid1 raid2 raid5 raid6 raid10的优缺点和做各自raid需要几块硬盘
  18. python(字符串、列表、字典、元组、集合)的常用内置方法
  19. ORACLE 字段AES算法加密、解密
  20. 解决跨域No 'Access-Control-Allow-Origin' header is present on the requested resource.

热门文章

  1. codeforces444A
  2. 【坦克大战】Unity3D多人在线游戏(泰课的坦克大战--旋转的螺丝钉)
  3. C#、Java和JS实现SHA256+BASE64加密总结
  4. SpringCloud 过滤器
  5. 爬虫之Scrapy框架介绍
  6. Java【初识篇】语言概述
  7. (一)Qt5模块,QtCreator常用快捷键,命名规范
  8. Django--ORM相关操作
  9. 微信小程序之canvas 文字断行和省略号显示
  10. Centos7的目录结构