from selenium import webdriver
import time
import random
from bs4 import * browser = webdriver.Chrome()
url = 'https://so.gushiwen.org/shiwenv_ee16df5673bc.aspx'
browser.get(url) ck_l_ori_len = len(browser.find_elements_by_link_text('展开阅读全文 ∨'))
ck_l_ori_ok = 0
try:
for isc in range(100):
if ck_l_ori_ok == ck_l_ori_len:
break
time.sleep(1)
js = 'window.scrollTo(0,document.body.scrollHeight)'
js = 'window.scrollTo(0,100*{})'.format(isc)
browser.execute_script(js)
ck_l = browser.find_elements_by_link_text('展开阅读全文 ∨')
for i in ck_l:
try:
i.click()
ck_l_ori_ok += 1
except Exception as e:
print(e)
except Exception as e:
print('window.scrollTo-->', e) # ck_l=browser.find_elements_by_link_text('展开阅读全文 ∨')
# for i in ck_l:
# try:
# i.click()
# except Exception as e:
# print(e) xp_l = ['//*[@id="fanyi967"]/div/div[3]/a', ] myhtml = 'D:\\myhtml\\{}gushiwen.tmp.html'.format(random.randint(123, 999))
with open(myhtml, 'w', encoding='utf-8') as fw:
fw.write(browser.page_source)
sql = 'INSERT INTO parent_url (page_title,page_url,children_url) VALUES '
with open(myhtml, 'r', encoding='utf-8') as myhtml_o:
bs = BeautifulSoup(myhtml_o, 'html.parser') dd = 9
a_=document.getElementsByTagName('a');le=a_.length;for(i=0;i<le;i++){if(a_[i].text=='展开阅读全文 ∨'){a_[i].click()}}

  

a_=document.getElementsByTagName('a');le=a_.length;for(i=0;i<le;i++){if(a_[i].text=='展开阅读全文 ∨'){try{a_[i].click()}catch(err){console.log(err)}}}

  

from selenium import webdriver
import time
import random
from bs4 import * browser = webdriver.Chrome()
url = 'https://so.gushiwen.org/shiwenv_ee16df5673bc.aspx'
browser.get(url) # ck_l_ori_len = len(browser.find_elements_by_link_text('展开阅读全文 ∨'))
# ck_l_ori_ok = 0
# try:
# for isc in range(100):
# if ck_l_ori_ok == ck_l_ori_len:
# break
# time.sleep(1)
# js = 'window.scrollTo(0,document.body.scrollHeight)'
# js = 'window.scrollTo(0,100*{})'.format(isc)
# browser.execute_script(js)
# ck_l = browser.find_elements_by_link_text('展开阅读全文 ∨')
# for i in ck_l:
# try:
# i.click()
# ck_l_ori_ok += 1
# except Exception as e:
# print(e)
# except Exception as e:
# print('window.scrollTo-->', e) js = "a_=document.getElementsByTagName('a');le=a_.length;for(i=0;i<le;i++){if(a_[i].text=='展开阅读全文 ∨'){try{a_[i].click()}catch(err){console.log(err)}}}"
try:
browser.execute_script(js)
except Exception as e:
print(e)
ck_l_ori_len = len(browser.find_elements_by_link_text('展开阅读全文 ∨'))
ck_l_ori_ok = 0
try:
for isc in range(100):
if ck_l_ori_ok == ck_l_ori_len:
break
time.sleep(1)
js = 'window.scrollTo(0,document.body.scrollHeight)'
js = 'window.scrollTo(0,100*{})'.format(isc)
browser.execute_script(js)
ck_l = browser.find_elements_by_link_text('展开阅读全文 ∨')
for i in ck_l:
try:
i.click()
ck_l_ori_ok += 1
except Exception as e:
print(e)
except Exception as e:
print('window.scrollTo-->', e)
from selenium import webdriver
import time
import random
from bs4 import *
from pyquery import PyQuery as pq browser = webdriver.Chrome()
url = 'https://so.gushiwen.org/shiwenv_ee16df5673bc.aspx'
browser.get(url) js = "a_=document.getElementsByTagName('a');le=a_.length;for(i=0;i<le;i++){if(a_[i].text=='展开阅读全文 ∨'){try{a_[i].click()}catch(err){console.log(err)}}}"
try:
browser.execute_script(js)
except Exception as e:
print(e)
ck_l_ori_len = len(browser.find_elements_by_link_text('展开阅读全文 ∨'))
ck_l_ori_ok = 0
try:
for isc in range(100):
if ck_l_ori_ok == ck_l_ori_len:
break
time.sleep(1)
js = 'window.scrollTo(0,document.body.scrollHeight)'
js = 'window.scrollTo(0,100*{})'.format(isc)
browser.execute_script(js)
ck_l = browser.find_elements_by_link_text('展开阅读全文 ∨')
for i in ck_l:
try:
i.click()
ck_l_ori_ok += 1
except Exception as e:
print(e)
except Exception as e:
print('window.scrollTo-->', e) doc = pq(browser.page_source)
pq_r_d = {'xmlns="http://www.w3.org/1999/xhtml"': ''}
r_k, r_v = 'xmlns="http://www.w3.org/1999/xhtml"', ''
article_ = doc('.left>:nth-child(2).sons>.cont>.contson').html().replace(r_k, r_v)
title_d = {'h1': doc('.left>:nth-child(2).sons>.cont>:nth-child(2)').html().replace(r_k, r_v)}
author_d = {'h3': doc('.left>:nth-child(2).sons>.cont>:nth-child(3)').text()}
translation_ = doc('.left>:nth-child(4)>.contyishang>:nth-child(2)').html().replace(r_k, r_v)
explanation_ = doc('.left>:nth-child(4)>.contyishang>:nth-child(3)').html().replace(r_k, r_v)
refer_ = doc('.left>:nth-child(4)>.cankao').html().replace(r_k, r_v) author_img_url = doc('.left>.sonspic>.cont>.divimg>:nth-child(1)').html().split('src="')[-1].split('"')[0] d = 4

  

最新文章

  1. IIS------无法打开登录所请求的数据库 &quot;company&quot;。登录失败。 用户 &#39;IIS APPPOOL\AppPool 4.0&#39; 登录失败。
  2. linux 管道命令 竖线 ‘ | ’
  3. MySQL的循环语句使用总结
  4. Maven —— 如何设置HTTP代理
  5. c# using 引用和别名的使用
  6. (转载)SQL语句,纵列转横列
  7. window redis 安装配置
  8. JAVA_build_ant_Tstamp
  9. git 快速使用(本地仓库同步到远程仓库)
  10. DevExpress ASP.NET 使用经验谈(3)-XPO对象的使用(使用指定数据连接)
  11. pomelo 协议
  12. linux添加到普通用户sudo才干
  13. MVC工作流程
  14. etcd raft library设计原理和使用
  15. Linux CentOS设置定时重启:crontab
  16. Maven安装问题
  17. 06 I/O重定向与管道符
  18. poi 读取使用 Strict Open XML 保存的 excel 文档
  19. &lt;转&gt;字节码指令
  20. 【文件上传】文件上传的form表单提交方式和ajax异步上传方式对比

热门文章

  1. IDEA打war包部署本地TOMCAT测试
  2. PHP读取超大的excel文件数据的方案
  3. 将文件大小kb转换成M
  4. LIBSVM使用方法及参数设置
  5. python之GUI图形用户界面 2014-4-7
  6. Node.js &amp; Unix/Linux &amp; NVM
  7. zoj3988 Prime Set
  8. java基础标识符,关键字,常量
  9. Check ini style config tool
  10. APPLE STORE