from lxml import etree
2 text = '''
3 <div>
4 <ul>
5 <li class = "item-0"><a herf = "link1.html">first item</a></li>
6 <li class = "item-1"><a herf = "link2.html">second item</a></li>
7 <li class = "item-inactive"><a herf = "link3.html">third item</a></li>
8 <li class = "item-1"><a herf = "link4.html">fourth item</a></li>
9 <li class = "item-0"><a herf = "link5.html">fifth item</a></li>
10 </ul>
11 </div>
12 '''
13 html = etree.HTML(text)
14 result = etree.tostring((html))#输出修正后的HTML文本
15 code_all = html.xpath("//*")#选取HTML全部的节点
16 code_li = html.xpath("//li")
17 code_a = html.xpath("//li/a")#选取HTML的li节点的子节点a
18 code_p = html.xpath("//a[@herf = 'link4.html']/../@class")#一直子节点寻找父节点的class属性
19 print(code_p)
20 print(code_li)
21 print("///")
22 print(code_all)
23 print("///")
24 print(code_a)
25 #属性匹配
26 attribute = html.xpath("//li[@class = 'item-0']")
27 print(attribute)
28 #文本获取
29 text = html.xpath("//li/text()")
30 print(text)
31 #属性获取
32 attribute_get = html.xpath("//li/a/@herf")
33 print(attribute_get)
34 #属性多值匹配
35 text1 = """
36 <li class = "li li-fist"><a href = "link.html">first item</a></li>
37 """
38 html1 = etree.HTML(text1)
39 attribute_number = html1.xpath("//li[contains(@class,'li')]/a/text()")
40 print(attribute_number)
41 #多属性匹配
42 text2 = """
43 <li calss = "li li-first" name = "name"><a href = "link.html">first item</a></li>
44 """
45 html2 = etree.HTML(text2)
46 attribute_text2 = html2.xpath("//li[contains(@calss,'li') and @name = 'name']/a/text()")
47 print(attribute_text2)
48 #按序选择
49 """
50 有时候,我们在选择的时候某些属性可能同时匹配了多个节点,但是只想要其中某个节点
51 这是可以利用中括号传入索引的方法获取特定次序的节点
52 """
53 text3 = '''
54 <div>
55 <ul>
<li class = "item-0"><a herf = "link1.html">first item</a></li>
57 <li class = "item-1"><a herf = "link2.html">second item</a></li>
58 <li class = "item-inactive"><a herf = "link3.html">third item</a></li>
59 <li class = "item-1"><a herf = "link4.html">fourth item</a></li>
60 <li class = "item-0"><a herf = "link5.html">fifth item</a></li>
61 </ul>
62 </div>
63 '''
64 html3 = etree.HTML(text3)
65 result = html3.xpath("//li[1]/a/text()")#选取第一个li节点
66 print(result)
67 result = html3.xpath("//li[last()]/a/text()")#选取左后一个li节点
68 print(result)
69 result = html3.xpath("//li[position() < 3]/a/text()")#选取位置小于三的节点
70 print(result)
71 #节点轴选取
72 result = html3.xpath("//li[1]/ancestor::*")#获取所有祖先节点,后跟*表示匹配所有节点
73 print(result)
74 result = html3.xpath("//li[1]/ancestor::div")#获取div这个祖先节点
75 print(result)
76 result = html3.xpath("//li[1]/attribute::*")#获取所有属性
77 print(result)
#运行结果
['item-1']
[<Element li at 0x7f72f489c888>, <Element li at 0x7f72f489c948>, <Element li at 0x7f72f489c9c8>, <Element li at 0x7f72f489ca08>, <Element li at 0x7f72f489ca88>]
///
[<Element html at 0x7f72f489c808>, <Element body at 0x7f72f489c788>, <Element div at 0x7f72f489c748>, <Element ul at 0x7f72f489c848>, <Element li at 0x7f72f489c888>, <Element a at 0x7f72f489c908>, <Element li at 0x7f72f489c948>, <Element a at 0x7f72f489c988>, <Element li at 0x7f72f489c9c8>, <Element a at 0x7f72f489c8c8>, <Element li at 0x7f72f489ca08>, <Element a at 0x7f72f489ca48>, <Element li at 0x7f72f489ca88>, <Element a at 0x7f72f489cac8>]
///
[<Element a at 0x7f72f489c908>, <Element a at 0x7f72f489c988>, <Element a at 0x7f72f489c8c8>, <Element a at 0x7f72f489ca48>, <Element a at 0x7f72f489cac8>]
[<Element li at 0x7f72f489c888>, <Element li at 0x7f72f489ca88>]
[]
['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
['first item']
['first item']
['first item']
['fifth item']
['first item', 'second item']
[<Element html at 0x7f72f489cdc8>, <Element body at 0x7f72f489cec8>, <Element div at 0x7f72f489cf48>, <Element ul at 0x7f72f489cf08>]
[<Element div at 0x7f72f489cf48>]
['item-0']

最新文章

  1. easyUI时间控件 使用
  2. 借助cookie实现子网页修改父网页内容遇到的问题:同一个浏览器访问相同页面,会互相影响。 (已解决)
  3. [moka同学转载]Yii2 中国省市区三级联动
  4. +Load和+initialize方法解析
  5. ubutu之Navicat安装
  6. ASP.NET的一般处理程序对数据的基本操作
  7. java数据结构和算法------快速排序
  8. Ubuntu16.04.1 安装Redis-Cluster
  9. C#引用传递
  10. 隐藏 Status Bar
  11. 【MySQL】JDBC连接MySQL的一些问题以及解决办法
  12. Unity 动态加载 Prefab
  13. selenium+Python3.5获取验证码
  14. Android Studio 下载与安装配置
  15. 使用 vi/vim 时,粘贴进新创建文件或空文件的首行内容丢失的解决方法
  16. 安装OpenSSL缺失Microsoft Visual C++ 2008 Redistributables的解决方案
  17. idea插件 总结 自认用比较不错的插件的总结
  18. Spring Boot 学习笔记1---初体验之3分钟启动你的Web应用
  19. iscroll源码学习(1)
  20. Open vSwitch for CentOS

热门文章

  1. Qt开发Activex笔记(二):Qt调用Qt开发的Activex控件
  2. Java开发工程师最新面试题库系列——集合部分(附答案)
  3. 阿里巴巴java开发手册(2020版)
  4. image auto downloader
  5. how to remove git commit history
  6. switchable css dark theme in js &amp; html custom element
  7. select notes mark
  8. learning free programming resources form top university of the world
  9. 小程序 in action
  10. 1. VUE介绍