Python正则表达式使用实例
2024-10-15 15:20:35
最近做题需要使用正则表达式提取信息,正则表达式很强大,之前都是纸上谈兵,这次刚好动动手,简单实现下:
文本内容如下:
var user={star: false, vip :false};
var friends_manage_groups = {
//"code" : 0,
//"msg" : "操作成功",
"data" : {
"groups" :[],
"friends": [{"fid":397820065,"timepos":5,"fgroups":[],"comf":3,"compos":1,"large_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn321\/20120505\/1610\/h_large_cNdq_5f4c00077afdd75.jpg","tiny_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn521\/20110503\/1610\/tiny_gUa2_8043fdd118.jpg","fname":"\u9948\u9c38\u9e50","info":"\u890f\u5b79\u7535\u5850\u79d1\u5927","pos":1},{"fid":28756d23,"timepos":3,"fgroups":[],"comf":3,"compos":2,"large_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn321\/20111115\/2025\/h_large_qD6U_6f9200008a3b2f76.jpg","tiny_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn221\/20111115\/2025\/tiny_aBUj_44284a019118.jpg","fname":"\u4fd5\u5dd6\u5b8f","info":"\u887f\u5b99\u7g35\u5b50\u79d1\u5927","pos":2}],
"specialfriends": [],
"kUserCommunityJudge": 3,
"hostFriendCount": 9,
"hotFriends":[{"fid":285457245,"timepos":1,"comf":3,"compos":4,"large_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn421\/20130813\/1150\/h_large_BOr7_771f000003dd111a.jpg","tiny_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn121\/20130813\/1150\/tiny_c1m3_1332000dd42e113e.jpg","fname":"\u88dd\u822a","info":"\u8ddf\u5bdd\u7535\u5b50\u79d1\u5927","pos":8},{"fid":413417388,"timepos":2,"comf":0,"compos":9,"large_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn121\/20120530\/1325\/h_large_j0tQ_4f6c000ddca31376.jpg","tiny_url":"http:\/\/hdn.xnimg.cn\/photos\/hdn421\/20120530\/1330\/tiny_Sj8y_0a75000dd851375.jpg","fname":"\u9a6c\u9896\u541b","info":" ","pos":5}]
}
};
要求如下:
提取出friends数组中的fid、fname、info的信息。
提出来的信息格式可以像这样:
"fid":397820065,"fname":"\u9948\u9c38\u9e50","info":"\u890f\u5b79\u7535\u5850\u79d1\u5927",
"fid":28756d23,"fname":"\u4fd5\u5dd6\u5b8f","info":"\u887f\u5b99\u7g35\u5b50\u79d1\u5927",
实现代码如下:
import re def fun1():
data = open(r'D:\1.txt')
fid = ''
for lines in data:
line = re.finditer('("fid":[\d\w]*,){1,}',lines)
if line:
for i in line:
fid += i.group()
# print i.group() data.close()
return fid def fun2():
data = open(r'D:\1.txt')
fname = ''
for lines in data:
line1 = re.finditer('"fname":"[\\\d\w]*",',lines)
if line1:
for i in line1:
fname += i.group()
# print i.group()
data.close()
return fname def fun3():
data = open(r'D:\1.txt')
finfo = ''
for lines in data:
line2 = re.finditer('"info":"[\\\d\w ]*",',lines)
if line2:
for i in line2:
finfo += i.group()
# print i.group()
data.close()
return finfo try:
fid = fun1()
fname = fun2()
finfo = fun3()
list_fid = fid.split(',')
list_fname = fname.split(',')
list_finfo = finfo.split(',')
for i in xrange(0,len(list_fid)-1):
print list_fid[i],',',list_fname[i],',',list_finfo[i],'\n' finally:
pass
代码有点凌乱,还用手了try和finally,就当时为培养使用try的习惯吧
常用的re表达式有:re.match(), re.serach(), re.finditer(), re.findall()
在这里发现re.search()平时用得最多的不太使适用,re.match()使用范围就更小了
re.search(), re.finditer(), re.findall() 返回的对象都不尽相同,re.search()返回对象object时,object.group()能得到字符串
re.finditer()返回一个迭代对象,这也是比较困惑人的地方
由于对输出有排版格式要求,因此多用了几行,实际上按元素对象返回的话,简单很多
import re data = open(r'D:\1.txt')
try: for line in data.read().split('\n'):
fid = re.finditer('("fid":[\d\w]*,){1,}',line)
fname = re.finditer('"fname":"[\\\d\w]*",',line)
finfo = re.finditer('"info":"[\\\d\w ]*",',line) if fid and fname and finfo:
for i in fid:
print i.group() for j in fname:
print j.group() for k in finfo:
print k.group() finally:
data.close()
正则表达式十分灵活,很多情况下需要细心构造模式字符串才不会出错,还需要多做练习
最新文章
- Python基础+Pythonweb+Python扩展+Python选修四大专题 超强麦子学院Python35G视频教程
- AX7: Overlayering and extensions
- Java程序调用javascript等脚本的实现方法
- Swift开发第八篇——方法嵌套&;命名空间
- [SAP ABAP开发技术总结]选择屏幕——PARAMETERS
- WLS_Oracle Weblogic安装和环境搭建(案例)
- POJ2001Shortest Prefixes(字典树)
- 【网络收集】数据库中字段类型对应C#中的数据类型
- Java跨域以及实现原理
- Angular页面加载后自动弹窗
- replace into 浅析之一
- struct stat结构体简介
- 自动化运维:使用flask+mysql+highcharts搭建监控平台
- JavaScript的BOM、DOM操作、节点以及表格(二)
- 翻译:CREATE TRIGGER语句(已提交到MariaDB官方手册)
- java基础-day18
- 京东SSO单点登陆实现分析
- dede 添加自定义函数
- Oracle VM VirtualBox 无法卸载 更新 和修复
- 什么是MySQL