Python-beautifulsoup库

 #beautifulsoup库的安装

 pip install beautifulsoup4

 python -m pip install --upgrage pip

 from bs4 import BeautifulSoup

 #----------------beautifulsoup库的使用--------------------------------------

 import requests

 from bs4 import BeautifulSoup

 url = "http://python123.io/ws/demo.html"

 r = requests.get(url)

 # print(r.text)

 demo = r.text

 soup = BeautifulSoup(demo,"html.parser") #熬一锅`粥

 #print(soup.prettify()) #打印这锅粥

 #下行遍历函数：.contents()  .children()用于循环 .descendants()

 soup.head #获取head标签

 soup.head.contents #获取head的子节点，返回类型是列表

 soup.body.contents #

 len(soup.body.contents)  #terurn 5

 soup.body.contents[2]

 print('以下输出子节点：')

 for child in soup.body.children:

     print('##',child)

 print('以下输出子孙节点：')

 for child in soup.body.descendants:

     print('**',child)

 #---上行遍历 .parent  .parents(用于循环)

 soup.title.parent  #return  <head><title>This is a python demo page</title></head>

 soup.html.parents     #返回 html所有内容

 soup.parent         #返回为空

 print('以下输出父节点：')

 for par in soup.a.parents:

     if par is None:

         print('$$$',par)

     else:

         print('%',par.name)

 #----平行遍历----

 # 向后.next_sibling    向前.previous_sibling    加 s 用于遍历

 #title 与 p标签 不构成平行关系

 soup.a.next_sibling #return ' and ' 所以<a>标签的下一个标签不一定是<a>标签，需要判断

 soup.a.next_sibling.next_sibling #return <a ...</a>

 soup.a.previous_sibling

 soup.a.previous_sibling.previous_sibling

 print('以下输出下行遍历：')

 for sibling in soup.a.next_siblings:

     print('##',sibling)

 print('以下输出上行遍历：')

 for sibling in soup.a.previous_siblings:

     print('**',sibling)
巴特西

Python-beautifulsoup库

最新文章

热门文章