代码拉取完成,页面将自动刷新
import parsel
import requests
import time
def chik_ip(proxies_list):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
can_ip=[]
for proxies in proxies_list:
try:
respon=requests.get(url='http://www.baidu.com',headers=headers,proxies={'http://':proxies},timeout=1)
if respon.status_code==200:
print(f"{proxies}可用")
can_ip.append(proxies)
except Exception as e:
return False
return can_ip
def ipinfo():
proxies_list = []
start_time=time.time()
for i in range(6):
page=i+1
print(f"正在爬取第{page}页{'#*#'*30}")
url=f'https://www.kuaidaili.com/free/inha/{page}/'
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
}
res=requests.get(url,headers=headers)
data = res.text
html = parsel.Selector(data) # 实例化
resp = html.xpath('//*[@id="list"]/table/tbody/tr')
time.sleep(0.5)
for i in resp:
proxies_dict={}
http_type = i.xpath('./td[4]/text()').extract_first()
ip_num = i.xpath('./td[1]/text()').extract_first()
port_num = i.xpath('./td[2]/text()').extract_first()
proxies_dict[http_type]=ip_num+':'+port_num
print(proxies_dict)
time.sleep(0.5)
proxies_list.append(proxies_dict[http_type])
print(f"第{page}爬取完毕{'*+*'*30}")
print(proxies_list,)
print('获取到的代理IP数量是:',len(proxies_list),"个")
end_time=time.time()
now_time=end_time-start_time
print(f"用时{now_time}秒")
return proxies_list
def write_text(can_ip):
w = open('./i_p.text', 'w')
for i, item in enumerate(can_ip):
w.write(item + '\n')
w.close()
def run():
s_ip=ipinfo()
c_ip=chik_ip(s_ip)
write_text(c_ip)
run()
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。