网纸: https://ai.baidu.com/easydl/app/deploy/tee/public

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# @mail : lshan523@163.com
# @Time : 2022/9/7 11:53
# @Author : Sea
# @File : 文本抽取.py
# @history:
# ****************************
import time
import random
import pandas as pd
from datetime import datetime, timedelta # time: 2021-03-26 10:20:12 operator : Sea milestone : SAD bookingNo : 222231321212
def gen_no():
tail = str(random.randint(1, 9)) + str(random.randint(1000, 9999))
return time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())) + tail def gen_time():
randint = random.randint(-100000000, 100000000)
return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time() + randint)) def gen_miles():
pass
mile = ["BKD", "DEP", "RCF", "DDL", "DL1", "RCS", "OTH", "CCD", "EXP"]
return mile[random.randint(0, len(mile) - 1)] def gen_operator():
mile = ["Sea", "Jeff", "Zero", "Dana", "stiff", "jack", "Ryan", "Tom", "Jerry", "happy", "Mini", "Syan", "Joan"]
return mile[random.randint(0, len(mile) - 1)] # time: 2021-03-26 10:20:12 operator : Sea milestone : SAD bookingNo : 222231321212
def prepare_data():
return "time:" + gen_time() + " operator:" + gen_operator() + " milestone:" + gen_miles() + " bookingNo:" + gen_no() def write_data():
with open("xxx.txt", mode='a+', encoding="utf-8") as file:
for i in range(10):
file.write(str(prepare_data() + "\n"))
file.close() def write_date_to_excel_marked():
t = datetime.now().date() - timedelta(days=1)
mark1_name = "时间"
mark2_name = "操作者"
mark3_name = "里程碑"
mark4_name = "单号"
# excel col
data_set = {'文本内容': [],
'实体标注1': [],
'实体标注2': [],
'实体标注3': [],
'实体标注4': []
} for i in range(1000):
# time:2019-08-09 07:41:16 stiff DDL 2022091318272387597
mark1 = gen_time()
mark2 = gen_operator()
mark3 = gen_miles()
mark4 = gen_no()
data = ""
data += mark1_name + ":" + mark1 + " " * 3
start1 = data.find(mark1)
mark1_loc = "[" + str(start1) + "," + str(start1 + len(mark1) - 1) + "]" + "," + mark1_name
data += mark2_name + ":" + mark2 + " " * 3
start2 = data.find(mark2)
mark2_loc = "[" + str(start2) + "," + str(start2 + len(mark2) - 1) + "]" + "," + mark2_name
data += mark3_name + ":" + mark3 + " " * 3
start3 = data.find(mark3)
mark3_loc = "[" + str(start3) + "," + str(start3 + len(mark3) - 1) + "]" + "," + mark3_name
data += mark4_name + ":" + mark4 + " " * 3
start4 = data.find(mark4)
mark4_loc = "[" + str(start4) + "," + str(start4 + len(mark4) - 1) + "]" + "," + mark4_name
data_set["文本内容"].append(data)
data_set["实体标注1"].append(mark1_loc)
data_set["实体标注2"].append(mark2_loc)
data_set["实体标注3"].append(mark3_loc)
data_set["实体标注4"].append(mark4_loc)
writer = pd.ExcelWriter(path='demo-%d%02d%02d.xlsx' % (t.year, t.month, t.day), mode="w", engine='xlsxwriter')
pd.DataFrame(data_set).to_excel(writer, sheet_name='Sheet1', index=False, header=True, startrow=0)
# add format
workbook = writer.book
worksheet1 = writer.sheets['Sheet1']
fmt = workbook.add_format({"font_name": u"宋体"})
# 设置列宽行宽
worksheet1.set_column('B:F', 20, fmt)
worksheet1.set_column('A:B', 70, fmt) writer.save() if __name__ == '__main__':
# data = "012345678"
# print(data.find("67"))
write_date_to_excel_marked()

最新文章

  1. 【Java并发编程实战】-----“J.U.C”:ReentrantReadWriteLock
  2. Dubbo消费端错误: ClassNotFoundException: org.apache.zookeeper.proto.WatcherEvent
  3. Eclipse DDT
  4. 算法练习:寻找最小的k个数
  5. 【leetcode】Insertion Sort List (middle)
  6. 无状态Web应用集成——《跟我学Shiro》
  7. C# MessageBox 用法大全(转)
  8. HW5.6
  9. 《sed的流艺术之三》-linux命令五分钟系列之二十三
  10. 自己定义View之绘制圆环
  11. kafka producer生产数据到kafka异常:Got error produce response with correlation id 16 on topic-partition...Error: NETWORK_EXCEPTION
  12. svn checkout The XML response contains invalid XML
  13. Mysql锁机制--索引失效导致行锁变表锁
  14. sqlserver的坑
  15. [UWP]使用Popup构建UWP Picker
  16. Java Singleton的3种实现方式
  17. sql 存储时空格转成问号问题
  18. Vim 编辑文件时,突然断开链接
  19. 关于shell
  20. 省选模拟赛 LYK loves rabbits(rabbits)

热门文章

  1. const引用和指针
  2. axios上传excal方法
  3. lua中定义变量用and和or连接
  4. JS篇(010)-JavaScript 继承的方式和优缺点
  5. Leetcode习题集-链表
  6. Python基础数据类型-Dictionary(字典)
  7. Pytest+allure+requests接口自动化
  8. IDEA移除Maven依赖的方法
  9. vue项目浏览器ioc小图标
  10. android charles 抓不到https包,翻了2天资料总算是找到答案了