爬IP代码

import requests
import re
import dauk
from bs4 import BeautifulSoup
import time
def daili():
print('[+]极速爬取代理IP,默认为99页')
for b in range(1,99):
url="http://www.xicidaili.com/nt/{}".format(b)
header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:58.0) Gecko/20100101 Firefox/48.0'}
r=requests.get(url,headers=header)
gsx=BeautifulSoup(r.content,'html.parser')
for line in gsx.find_all('td'):
sf=line.get_text()
dailix=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',str(sf))
for g in dailix:
po=".".join(g)
print(po)
with open ('采集到的IP.txt','a') as l:
l.write(po+'\n') daili() def dailigaoni():
print('[+]极速爬取代理IP,默认为99页')
for i in range(1,99):
url="http://www.xicidaili.com/nn/{}".format(i)
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
r=requests.get(url,headers=header)
bks=r.content
luk=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',str(bks))
for g in luk:
vks=".".join(g)
print(vks)
with open('采集到的IP.txt','a') as b:
b.write(vks+'\n')
dailigaoni() def dailihtp():
print('[+]极速爬取代理IP,默认为99页')
for x in range(1,99):
header="{'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}"
url="http://www.xicidaili.com/wn/{}".format(x)
r=requests.get(url,headers=header)
gs=r.content
bs=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',gs)
for kl in bs:
kgf=".".join(kl)
print(kgf)
with open ('采集到的IP.txt','a') as h:
h.write(kgf)
dailihtp() def dailihttps():
print('[+]极速爬代理IP,默认为99页')
for s in range(1,99):
url="http://www.xicidaili.com/wt/{}".format(s)
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
r=requests.get(url,headers=header)
kl=r.content
lox=re.findall('(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)\.(25[0-5]|2[0-4]\d|[0-1]\d{2}|[1-9]?\d)',kl)
for lk in lox:
los=".".join(lk)
print(los)
with open('采集到的IP.txt','a') as lp:
lp.write(los)
dailihttps()

 端口代码

import requests
import re
from bs4 import BeautifulSoup def daili():
print('[+]极速爬取代理IP端口,默认为99页')
for b in range(1, 99):
url = "http://www.xicidaili.com/nt/{}".format(b)
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:58.0) Gecko/20100101 Firefox/48.0'}
r = requests.get(url, headers=header)
gsx = BeautifulSoup(r.content, 'html.parser')
for line in gsx.find_all('td'):
sf = line.get_text()
dailix = re.findall(
'<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>',
str(sf))
for g in dailix:
po = ".".join(g)
print(po )
with open('采集到的端口.txt.txt', 'a') as l:
l.write(po + '\n') daili() def dailigaoni():
print('[+]极速爬取代理IP的端口,默认为99页')
for i in range(1, 99):
url = "http://www.xicidaili.com/nn/{}".format(i)
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
r = requests.get(url, headers=header)
bks = r.content
luk = re.findall(
'<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>',
str(bks))
for g in luk:
vks = ".".join(g)
print(vks)
with open('采集到的端口.txt.txt', 'a') as b:
b.write(vks + '\n') dailigaoni() def dailihtp():
print('[+]极速爬取代理IP,默认为99页')
for x in range(1, 99):
header = "{'User-Agent':'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}"
url = "http://www.xicidaili.com/wn/{}".format(x)
r = requests.get(url, headers=header)
gs = r.content
bs = re.findall(
'<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>',
gs)
for kl in bs:
kgf = ".".join(kl)
print(kgf)
with open('采集到的端口.txt.txt', 'a') as h:
h.write(kgf) dailihtp() def dailihttps():
print('[+]极速爬代理IP的端口,默认为99页')
for s in range(1, 99):
url = "http://www.xicidaili.com/wt/{}".format(s)
header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1 Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
r = requests.get(url, headers=header)
kl = r.content
lox = re.findall(
'<td>([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-5]{2}[0-3][0-5])</td>',
kl)
for lk in lox:
los = ".".join(lk)
print(los)
with open('采集到的端口.txt', 'a') as lp:
lp.write(los) dailihttps()

  调用代码

print('''
_ooOoo_
o8888888o
88" . "88
(| -_- |)
O\ = /O
____/`---'\____
.' \\| |// `.
/ \\||| : |||// \
/ _||||| -:- |||||- \
| | \\\ - /// | |
| \_| ''\---/'' | |
\ .-\__ `-` ___/-. /
___`. .' /--.--\ `. . __
."" '< `.___\_<|>_/___.' >'"".
| | : `- \`.;`\ _ /`;.`/ - ` : | |
\ \ `-. \_ __\ /__ _/ .-` / /
======`-.____`-.___\_____/___.-`____.-'======
`=---='
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
佛祖保佑 永无BUG
''') print('[!]爬虫速度过快,导致IP被封请更换IP')
print('[*]极速爬取代理IP')
print('1.普通代理IP')
print('2.高匿代理IP')
print('3.http代理IP')
print('4.https代理IP')
bk=input('请选择:')
def xs():
import 代理.daili
import 代理.dauk
if bk=='1':
代理.daili.daili.daili()
代理.dauk.daili()
exit()
elif bk=='2':
代理.daili.daili.dailigaoni()
代理.dauk.dailigaoni()
exit()
elif bk=='3':
代理.daili.daili.dailihtp()
代理.dauk.dailihtp()
exit()
elif bk=='4':
代理.daili .daili.dailihttps()
代理.dauk.dailihttps()
exit()
elif bk=='q':
exit()
else:
print('[-]没有找到你要的选项')
xs()

2018-02-17

最新文章

  1. 【JavaWeb】Spring+SpringMVC+MyBatis+SpringSecurity+EhCache+JCaptcha 完整Web基础框架(五)
  2. [译]Object.getPrototypeOf
  3. Html限制input输入框只能输入数字
  4. ssh项目java.lang.ClassNotFoundException: org.springframework.web.context.ContextLoade错误
  5. 一起来学习DOJO吧--序
  6. PHP的学习--生成器Generators
  7. 管理故事&mdash;&mdash;和尚挑水的故事
  8. 年度十佳 DevOps 博客文章(后篇)
  9. 微软原版SQLHelper类
  10. pecl/mongo is already installed
  11. Square spiral
  12. Effective C++ Item 40 明智而审慎地使用多重继承
  13. [Apio2012]dispatching
  14. 深度学习之tensorflow (一)
  15. 100Mbps和100Mb/s有什么不同
  16. django基础 -- 10.form , ModelForm ,modelformset
  17. SpringMVC不能引入静态资源
  18. [转帖]IBM POWER系列处理器的前世今生
  19. concat layer
  20. 4.1 C++多态的概念及前提条件

热门文章

  1. java多线程补:充原子性和可见性
  2. LeetCode OJ:First Bad Version(首个坏版本)
  3. 【tensorflow:Google】三、tensorflow入门
  4. toString 和 valueOf 总结
  5. tomcat8启动慢原因及解决办法
  6. flask中cookie和session介绍
  7. ASP.NET中服务器控件的生命周期
  8. 十、python沉淀之路--高阶函数初识
  9. JUnit测试,获取Spring MVC环境
  10. 如何设计并使用FireMonkeyStyle