python scraping webs - python取得NIPS oral paper列表

 from lxml import html

 import requests

 # using xpath

 # page = requests.get('http://econpy.pythonanywhere.com/ex/001.html')

 page = requests.get('https://nips.cc/Conferences/2019/Schedule')

 tree = html.fromstring(page.content)

 #This will create a list of buyers:

 # buyers = tree.xpath('//div[@title="buyer-name"]/text()')

 # test = tree.xpath('//*[@id="maincard_15788"]/div[3]')

 # print(test)

 doc = tree

 # btags = doc.xpath("//*[@class[starts-with(., 'maincard narrower Oral') and string-length() > 3]]")

 btags = doc.xpath("//*[@class[starts-with(., 'maincard narrower Spotlight') and string-length() > 3]]")

 idx = 1

 with open('nips_paperlist_spotlight.txt', 'w') as f:

     for b in btags:

         type = b.xpath("div[1]")[0].text

         title = b.xpath("div[3]")[0].text

         author = b.xpath("div[5]")[0].text

         out_str = "%d, %s, %s, %s\n"%(idx, type,  title, author)

         print(out_str)

         f.writelines(out_str)

         # print(idx)

         # print(type)

         # print(title)

         # print(author)

         idx += 1

使用XPath

lxml, requests

https://docs.python-guide.org/scenarios/scrape/

https://stackoverflow.com/questions/12393858/xpath-using-contains-with-a-wildcard

巴特西

python scraping webs - python取得NIPS oral paper列表

最新文章

热门文章