# -*- coding: utf-8 -*-
# @Time : 2018/03/08 10:32
# @Author : cxa
# @File : gethtmlandimg.py
# @Software: PyCharm import requests
from fake_useragent import UserAgent as UA
from lxml import html
import traceback
import os url = "http://www.genome.jp/kegg-bin/show_pathway?1520394169137283/hsa01100.args"
html_path = os.path.join(os.getcwd(), url.split("/")[-1].replace("args", "html"))
img_path = os.path.join(os.getcwd(), url.split("/")[-1].replace("args", "png"))
headers = {'Accept': 'text/html, application/xhtml+xml, image/jxr, */*',
'Accept - Encoding': 'gzip, deflate',
'Accept-Language': 'zh-Hans-CN, zh-Hans; q=0.5',
'Connection': 'Keep-Alive',
'User-Agent': UA().random}
img_xapth = "//div[@class='map']/div[@class='image']/img[@name='pathwayimage']/@src"
main_url = "http://www.genome.jp" def get_img(buff):
with open(img_path, "wb") as fs:
fs.write(buff) req = requests.get(url, timeout=20, headers=headers)
try:
if req.status_code == requests.codes.ok:
get_html = req.text
root = html.fromstring(get_html)
imgurl = main_url + root.xpath(img_xapth)[0]
with open(html_path, "w") as fs:
fs.write(get_html.replace(root.xpath(img_xapth)[0],"./{}".format(url.split("/")[-1].replace("args", "png")))) img_req = requests.get(imgurl, headers=headers)
if img_req.status_code == requests.codes.ok:
buff = img_req.content
get_img(buff)
else:
img_req.raise_for_status()
else:
req.raise_for_status()
except:
print(traceback.format_exc())

  

最新文章

  1. linux下的一些操作(持续更新)
  2. 什么是js面向对象??
  3. 78 mount 挂载Linux系统外的文件。
  4. 调用未绑定的父类方法和使用supper 函数 之间的选择.
  5. 2013 ACM-ICPC长沙赛区全国邀请赛——Bottles Arrangement
  6. Delphi TRichEdit加载word内容
  7. java 复习002
  8. Objective-C 数组、可变数组
  9. js数组去重,并统计最多项算法
  10. Html 定位position
  11. js 数字递增特效 仿支付宝我的财富 HTML5
  12. U型理论
  13. JDK动态代理深入理解分析并手写简易JDK动态代理(下)
  14. codeforces148----E. Porcelain
  15. MFC创建线程示例
  16. iOS添加pch文件
  17. system.data oracleClient 需要Oracle客户端8.1.7或high
  18. 转: H264码流分析 --264分析两大利器:264VISA和Elecard StreamEye Tools
  19. 玩转X-CTR100 l USB功能-HOST鼠标键盘
  20. QML类型说明-ParallelAnimation

热门文章

  1. Ubuntu 进阶命令——长期不定时更新
  2. pandas DataFrame的查询方法(loc,iloc,at,iat,ix的用法和区别)
  3. tomcat中配置JNDI方法
  4. lintcode-76-最长上升子序列
  5. C++编码规范101
  6. 网络编程:listen函数
  7. C语言的getopt
  8. bzoj 3513 [MUTC2013]idiots FFT 生成函数
  9. TOM的show_space
  10. Windows Time Client