记一次 爬取LOL全皮肤原画保存到本地的实例
2024-10-08 06:33:04
#爬取lol全英雄皮肤 import re import traceback # 异常跟踪 import requests from bs4 import BeautifulSoup #获取html def get_url(url, hander): try: r = requests.get(url, headers=hander, timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: traceback.print_exc() #将异常信息打印出来 return "" #解析html def prasing_page(lst,html): try: soup = BeautifulSoup(html, "html.parser") for a in soup.find_all('li', class_=re.compile('boxShadow')): tag_a = a('a') for i in tag_a: lst.append(i['href']) return lst except: traceback.print_exc() return "" #解析获取到的单个html并筛选和下载 def getUrl_prasingpag(lst, hander): hero_img_url = [] hero_skin_name = [] hero_name = [] for u in lst: try: r = requests.get(u, headers=hander, timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding #二次解析 soup = BeautifulSoup(r.text, "html.parser") pag = soup.find_all('div', class_=re.compile('othersPifuBox')) for m in pag: tag_img = m('img') tag_p = m('p') tag_span = m('span') for m in tag_p: hero_skin_name.append(m.string) for m in tag_img: hero_img_url.append(m['src']) for m in tag_span: hero_name.append(m.string) except: traceback.print_exc() # 将异常信息打印出来 continue #下载到本地 for i in range(len(hero_name)): try: path = 'O:/lol_hero_jpg/' + hero_skin_name[i]+'--' + hero_name[i] + '.jpg' f = open(path, 'wb') r = requests.get(hero_img_url[i], stream=True) f.write(r.content) print("\r当前进度>>>>>>>>>>>>>>>>>>{:.0f}%>>>>>>>>>>>>>>>>>>".format(i * 100 / len(lst)), end="") f.close() except: traceback.print_exc() # 将异常信息打印出来 continue def main(): hander = {"User-Agent":"Mozilla/5.0"} deep = 43 #定义爬取页数 list = [] for i in range(deep): try: url = "http://********/hero_"+str(1+i)+".shtml" html = get_url(url, hander) prasing_page(list, html) getUrl_prasingpag(list, hander) except: continue main()
最新文章
- 项目管理_FindBugs的使用
- DevExpress XtraTreeList的复选框 禁用
- Magento Service Temporarily Unavailable解决方法
- MySQL ibdata1撑爆占满磁盘空间
- C#_MySql 主从复制
- C#创建https请求并使用pfx证书
- CSS左中右布局,规范案例
- Gulp思维——Gulp高级技巧
- 原生js实现的轮播图,易用+可多用
- 用ToggleButton和ImageView实现不同状态下显示的切换
- Spring Boot,Spring Data JPA多数据源支持
- sql 根据字段查询不同表
- iOS回顾笔记(03) -- 自定义View的封装和xib文件的使用详解
- MySQL一对一:一对多:多对多: 实例!!!!
- Specify 的含义 ------ 转载
- 什么是Java序列化,如何实现java序列化
- javaweb开发1.环境配置(javaweb插件下载及tomact在eclips中配置)
- 借助强大的IDEA开发ide高效实现equals,hashcode以及toString方法
- Leetcode题库——38.报数
- Eslint 配置及规则说明(报错)