requests_html使用asyncio
2024-09-05 08:38:47
import asyncio
import functools
from concurrent.futures.thread import ThreadPoolExecutor
from requests_html import HTMLSession
import sys
session = HTMLSession()
async def get_response(executor, *, url, loop: asyncio.AbstractEventLoop = None, ):
if not loop:
loop = asyncio.get_running_loop()
request = functools.partial(session.get, url)
return loop.run_in_executor(executor, request)
async def bulk_requests(executor, *,
urls,
loop: asyncio.AbstractEventLoop = None, ):
for url in urls:
yield await get_response(executor, url=url, loop=loop)
def filter_unsuccesful_requests(responses_and_exceptions):
return filter(
lambda url_and_response: not isinstance(url_and_response[1], Exception),
responses_and_exceptions.items()
)
async def main():
executor = ThreadPoolExecutor(10)
urls = [
"https://baidu.com",
"https://cnblogs.com",
"https://163.com",
]
requests = [request async for request in bulk_requests(executor, urls=urls, )]
responses_and_exceptions = dict(zip(urls, await asyncio.gather(*requests, return_exceptions=True)))
responses = {url: resp.html for (url, resp) in filter_unsuccesful_requests(responses_and_exceptions)}
for res in responses.items():
print(res[1].xpath("//head//title//text()")[0])
for url in urls:
if url not in responses:
print(f"No successful request could be made to {url}. Reason: {responses_and_exceptions[url]}",
file=sys.stderr)
asyncio.run(main())
最新文章
- intel82599在centos6.5下编译安装
- Strus2学习:基础(一)
- 如何使用openssl生成RSA公钥和私钥对
- mesos 学习笔记1 -- mesos安装和配置
- 关于 jquery select2 多个关键字 模糊查询的解决方法
- 详细剖析电脑hosts文件的作用和修改
- 【Hadoop环境搭建】Centos6.8搭建hadoop伪分布模式
- pgAdminIII使用图解
- java.io.NotSerializableException
- 图片的 base64 编码
- spring boot使用profile来区分正式环境配置文件与测试环境配置文件
- WebRtc编译好的vs2015源码
- AtCoder Regular Contest 080 (ARC080) E - Young Maids 线段树 堆
- Java:JDBC的基本使用
- 把一个List<;T>;的数据复制至另一个List<;T>;
- Linux基础命令---tail显示文本
- libgdx学习记录17——照相机Camera
- Docker Swarm Mode无法增加管理节点
- 使用VS Code开发.Net Core 2.0 MVC Web应用程序教程之一
- 【Python3】【贪心】hdu4296 Buildings