爬虫下载rockchip的规格书
2024-09-18 20:26:27
#file-name: pdf_download.py
import os
import requests
from bs4 import BeautifulSoup
def download_file(url, index):
#local_filename1 = index+"-"+url.split('/')[-1]
local_filename1 = url.split('/')[-1]
local_filename = local_filename1.replace('%20','_')
# NOTE the stream=True parameter
r = requests.get(url, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename root_link="https://rockchip.fr/"
#sub = ["main1", "ampak", "chipspark", "firefly", "geekbox", "marsboard", "radxa", "RK312X TRM", "RK3288 TRM"]
sub = [ "ampak", "chipspark", "firefly", "geekbox", "marsboard", "radxa", "RK312X TRM", "RK3288 TRM"]
for i in sub:
if(i != "main"):
r=requests.get(root_link+i)
if(not os.path.exists(i)):
directory = i.replace(" ", "_")
os.mkdir( "./"+directory )
os.chdir( "./"+directory)
else:
continue
#//else:
# r=requests.get(root_link) print(root_link+i)
if r.status_code==200:
soup=BeautifulSoup(r.text)
# print(soup.prettify())
index=1
for link in soup.find_all('a'):
new_link=root_link+i+'/'+link.get('href')
if new_link.endswith(".pdf") or new_link.endswith(".DSN") or new_link.endswith(".apk") :
file_path=download_file(new_link,str(index))
print("downloading:"+new_link+" -> "+file_path)
index+=1
print(i+" download finished")
else:
print("errors occur.")
exit() if(i != "main"):
os.chdir( "../")
#else:
# pass print("all download finished")
最新文章
- 慕课网__css_ float
- Eclipse和PyDev搭建完美Python开发环境(Windows篇)
- django 架构点点滴滴
- Linux脚本执行过程重定向
- C# Enum Type
- hive-安装0.13.1(hadoop2.2.0)
- NPOI的版本查看
- 修改LiteIDE 编辑窗口的主题
- Cracking the coding interview
- C# 内存管理优化畅想----前言
- 1. Git 克隆代码
- R语言 决策树算法
- 现代3D图形编程学习--opengl使用不同的缓存对象(译者添加)
- 【读书笔记】【深入理解ES6】#5-解构:使数据访问更便捷
- iphone开发笔记目录
- java基础之修饰符和内部类
- 同时使用n和nvm踩到的坑
- Linux tmpwatch命令详解
- 转载 - java中接口的向上转型。和多态性
- Pytest+Allure环境的搭建
热门文章
- [编程基础] Python日志记录库logging总结
- CSP-S2022 游记
- Spark详解(09) - Spark调优
- python之路39 前端开始 各种标签
- 琐碎的想法(五)for 的前世今生
- pytorch 配置详细过程
- 「HNOI2019」校园旅行
- 前端基础知识-react(一)个人学习记录 _
- RA-Depth: Resolution Adaptive Self-Supervised Monocular Depth Estimation
- 12月22日内容总结——django中间件的三个了解要求的方法、基于django中间件的功能设计、cookie与session