master

分支 (1)

管理

管理

master

pan
/
collect11.py

# coding:utf-8
from bs4 import BeautifulSoup as bs
import requests
import pm as pm
import json
import time
import re
import sys


# config配置
def go(i):
    link = "https://www.57fx.com/file/" + str(i) + ".html"
    ip = "10.12.345.892"
    headers = {"CLIENT-IP": ip, "X-FORWARDED-FOR": ip}
    response = requests.get(link, headers=headers)
    html = response.text
    soup = bs(html, "html.parser")
    if len(soup.find_all("div", {"class": "content"})) != 0:
        title = soup.find("div", {"class": "content"}).find("h1").get_text()
        title = title.replace("'","\\'")
        link = soup.find("a", {"class": "toyunDown"}).get('href')
        link = link.replace("&third=0", "")
        import datetime
        nowTime = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')  # 现在
        arr = {}
        arr['title'] = title
        arr['link'] = link
        arr['createtime'] = nowTime
        arr['updatetime'] = nowTime
        arr['key_id'] = 'pk_'+str(i)
        map = {}
        map['link'] = link
        count = pm.getcount('pan', map)
        if count == 0:
            pm.insert('pan', arr)


def start():
    for i in range(300000, 310000):
        print(i)
        if i % 50 == 0:
            time.sleep(2)
        go(i)


start()