使用selenium实现站长素材图片采集

from selenium import webdriver

import requests,os

from lxml import etree

from selenium.webdriver.chrome.options import Options

from urllib import request

chrome_options = Options()

chrome_options.add_argument('--headless')

chrome_options.add_argument('--disable-gpu')   #跳过视图界面

pro = webdriver.Chrome(executable_path='./chromedriver.exe',options=chrome_options)

url = "http://sc.chinaz.com/tupian/haiyangshengwutupian.html"

pro.get(url)

js = 'window.scrollTo(0,document.body.scrollHeight)'

pro.execute_script(js)

page_text = pro.page_source

tree = etree.HTML(page_text)

url_img = tree.xpath('//div[@id="container"]/div[@class="box picblock col3 masonry-brick"]/div/a/img/@src')  #获取图片url列表

names = tree.xpath('//div[@id="container"]/div[@class="box picblock col3 masonry-brick"]/div/a/@alt')   #图片名称列表

if not os.path.exists('./img'):  #生成文件夹

    os.mkdir('./img')

for index,url in enumerate(url_img):

    img_path = './img/' + names[index]+'.jpg'  #提取图片名称

    request.urlretrieve(url,img_path)

巴特西

使用selenium实现站长素材图片采集

最新文章

热门文章