抓取不得姐动图(报错)

# -*- coding:utf-8 -*-
#__author__ :kusy
#__content__:文件说明
#__date__:2018/7/23 17:01
import urllib.request
import re def getHtml(url):
page = urllib.request.urlopen(url)
html = page.read()
# print(html)
return html def getImg(reg,savePath):
iCnt = 0
def giveImg(html):
imgre = re.compile(reg)
imglist = re.findall(imgre, html.decode('utf-8'))
nonlocal iCnt
for imgurl in imglist:
urllib.request.urlretrieve(imgurl, savePath + '%s.gif' % iCnt)
iCnt += 1
return giveImg # html = getHtml("http://pic.sogou.com/")
# reg = r'"image":"(.+?)"' #sougou reg = r'data-original="(.+?\.gif)"'
savePath = 'image/gif/'
g = getImg(reg,savePath)
for i in range(10):
if i >1:
print("http://www.budejie.com/" + str(i))
html = getHtml("http://www.budejie.com/" + str(i))
else:
html = getHtml("http://www.budejie.com/")
g(html)

报错如下

E:\kusy\python\venv\Scripts\python.exe E:/kusy/python/getJpg.py
http://www.budejie.com/2
Traceback (most recent call last):
File "E:/kusy/python/getJpg.py", line 35, in <module>
html = getHtml("http://www.budejie.com/" + str(i))
File "E:/kusy/python/getJpg.py", line 9, in getHtml
page = urllib.request.urlopen(url)
File "C:\Users\jingjing\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "C:\Users\jingjing\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "C:\Users\jingjing\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "C:\Users\jingjing\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "C:\Users\jingjing\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "C:\Users\jingjing\AppData\Local\Programs\Python\Python36\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden Process finished with exit code 1

百度了下已解决:

# -*- coding:utf-8 -*-
#__author__ :kusy
#__content__:文件说明
#__date__:2018/7/23 17:01
import urllib.request
import re def getHtml(url):
# 如果不加上下面的这行出现会出现urllib.error.HTTPError: HTTP Error 403: Forbidden错误
# 主要是由于该网站禁止爬虫导致的,可以在请求加上头信息,伪装成浏览器访问User-Agent,具体的信息可以通过火狐的FireBug插件查询
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = urllib.request.Request(url=url,headers=headers)
page =
urllib.request.urlopen(req)
html = page.read()
# print(html)
return html def getImg(reg,savePath):
iCnt = 0
def giveImg(html):
imgre = re.compile(reg)
imglist = re.findall(imgre, html.decode('utf-8'))
nonlocal iCnt
for imgurl in imglist:
urllib.request.urlretrieve(imgurl, savePath + '%s.gif' % iCnt)
iCnt += 1
return giveImg # html = getHtml("http://pic.sogou.com/")
# reg = r'"image":"(.+?)"' #sougou reg = r'data-original="(.+?\.gif)"'
savePath = 'image/gif/'
g = getImg(reg,savePath)
for i in range(10):
if i >1:
print("http://www.budejie.com/" + str(i))
html = getHtml("http://www.budejie.com/" + str(i))
else:
html = getHtml("http://www.budejie.com/")
g(html)

下载成功

最新文章

  1. Excel转Html
  2. Android之assets资源
  3. ASP.Net MVC开发基础学习笔记(5):区域、模板页与WebAPI初步
  4. hdf第一周完了,突然时间静止.,醒了就早点去公司上班,再努力一点
  5. android 组合控件接收不到点击事件的问题
  6. cocos2d-x入门笔记(1)
  7. [BZOJ 2594] [Wc2006]水管局长数据加强版 【LCT】
  8. JavaWeb之cookie
  9. [php] in_array 判断问题(坑)
  10. 接触JS的变量
  11. JS中函数常见的表现形式以及立即执行函数
  12. 受到 1 万点暴击,二狗子被 DDoS 攻击的惨痛经历
  13. Javascript arguments.callee和caller的区别
  14. Akka详细介绍
  15. c#的一些快捷键
  16. vue.js学习:1.0到2.0的变化(区别)
  17. 前端基础开发之HTML
  18. L313 珊瑚裸鼠灭绝
  19. JQuery常用和很有用处的方法
  20. linux开机、重启和用户登陆注销

热门文章

  1. 解决国内 Pip 安装速度慢
  2. tab吸顶的神奇-- css粘性属性
  3. Xml解析作业与Xml建模andXml建模作业
  4. luoguP3964 [TJOI2013]松鼠聚会
  5. AcWing 95 费解的开关
  6. Golang 位向量
  7. saltstack自动化运维工具搭建个人笔记
  8. shell 命令行参数(基本)
  9. 如何在ProXmoX VE 下虚拟机安装 黑群晖 DSM 6.1.6
  10. jmeter(四十六)参数化与断言实战