python下载站长素材免费简历模板(xpath)
2024-10-20 07:54:12
import os.path import requests
from lxml import etree if __name__ == '__main__':
if not os.path.exists('./jianli'):
os.mkdir('./jianli') headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
# 下载前两页模板
for i in range(1, 3):
if i == 1:
url = 'https://sc.chinaz.com/jianli/free.html'
else:
url = 'https://sc.chinaz.com/jianli/free_' + str(i) + '.html'
page = requests.get(url=url, headers=headers)
page.encoding = 'utf-8'
tree = etree.HTML(page.text)
free_jianli = tree.xpath('//div[@id="main"]/div/div') for free in free_jianli:
# 获取链接
free_url = free.xpath('./a/@href')[0]
# 简历标题
free_title = free.xpath('./a/img/@alt')[0]
# 根据链接获取到对应下载页面
free_content = requests.get(url=free_url, headers=headers).text
free_content_tree = etree.HTML(free_content)
# 找到下载链接
down_path = free_content_tree.xpath('//div[@class="down_wrap"]/div[2]/ul/li[1]/a/@href')[0]
# 下载内容的标题
down_path_title = free_title + '.' + down_path.split('.')[-1]
# 根据下载链接进行二进制数据下载
down_path_content = requests.get(url=down_path, headers=headers).content
# 存入文件中
with open('./jianli/' + down_path_title, 'wb') as fp:
fp.write(down_path_content)
print(down_path_title, "下载成功")
print("第{0}页下载成功".format(i))
print('下载完成')
最新文章
- sync
- MD5 32位 小写加密和大写加密
- BZOJ 1124: [POI2008]枪战Maf
- FFMpeg写MP4文件例子分析
- 使用Nexus搭建Maven私服
- Java内存区域分析
- ArcGIS: version not specified. You must call RuntimeManager.Bind before creat
- Prime Land
- JNI学习&;使用过程中的错误
- Bootstrap之Button.js
- Appium Android Bootstrap源码分析之命令解析执行
- java怎么连接mysql数据库
- 自己封装的一个js方法用于获取显示的星期和日期时间
- python 条件分支与循环
- C++ Primer 笔记——动态数组
- Java 异常处理的重要认识
- 软件工程实践_Task1
- Linux系统知识汇总
- Docker的简单介绍及使用
- UVa 11464 Even Parity (二进制法枚举)