master

分支 (1)

管理

管理

master

picpicker
/
picpicker_mt.py

import threading
from time import ctime,sleep
import os
from urllib import request
from bs4 import BeautifulSoup


def down_image(url,file_name):
    req = request.Request(url = url)
    req.add_header('User-Agent', user_agent_str)
    binary_data = request.urlopen(req).read()
    temp_file = open(file_name, 'wb')
    temp_file.write(binary_data)
    temp_file.close()


def loop1():
    print('=====>starting picpicer#1 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[0:s12]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop2():
    print('=====>starting picpicer#2 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s21:s22]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop3():
    print('=====>starting picpicer#3 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s31:s32]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop4():
    print('=====>starting picpicer#4 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s41:s42]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop5():
    print('=====>starting picpicer#5 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s51:s52]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop6():
    print('=====>starting picpicer#6 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s61:s62]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop7():
    print('=====>starting picpicer#7 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s71:s72]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop8():
    print('=====>starting picpicer#8 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s81:s82]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop9():
    print('=====>starting picpicer#9 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s91:s92]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


def loop10():
    print('=====>starting picpicer#10 at:', ctime())
    f = open('isbn.txt', 'r', -1, 'utf-8')
    for line in f.readlines()[s101:s102]:
        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                url = 'http:' + imgs.get('src')
                                print(url)
                                down_image(url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)


user_agent_str = 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) \
AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'

lines = open('isbn.txt', 'r', -1, 'utf-8').readlines()
lines_cnt = int(len(lines))
# print(lines_cnt)
# print('---'*5)
s11 = 1
s12 = int(lines_cnt/10)

s21 = s12 + 1
s22 = int(lines_cnt*2/10)

s31 = s22 + 1
s32 = int(lines_cnt*3/10)

s41 = s32 + 1
s42 = int(lines_cnt*4/10)

s51 = s42 + 1
s52 = int(lines_cnt*5/10)

s61 = s52 + 1
s62 = int(lines_cnt*6/10)

s71 = s62 + 1
s72 = int(lines_cnt*7/10)

s81 = s72 + 1
s82 = int(lines_cnt*8/10)

s91 = s82 + 1
s92 = int(lines_cnt*9/10)

s101 = s92 + 1
s102 = lines_cnt - 1

threads = []
t1 = threading.Thread(target=loop1)
threads.append(t1)
t2 = threading.Thread(target=loop2)
threads.append(t2)
t3 = threading.Thread(target=loop3)
threads.append(t3)
t4 = threading.Thread(target=loop4)
threads.append(t4)
t5 = threading.Thread(target=loop5)
threads.append(t5)
t6 = threading.Thread(target=loop6)
threads.append(t6)
t7 = threading.Thread(target=loop7)
threads.append(t7)
t8 = threading.Thread(target=loop8)
threads.append(t8)
t9 = threading.Thread(target=loop9)
threads.append(t9)
t10 = threading.Thread(target=loop10)
threads.append(t10)

if __name__ == '__main__':
    img_dir = ".\cover"

    if not os.path.isdir(img_dir):
        os.mkdir(img_dir)

    print('----->starting picpicker at:', ctime())
    for t in threads:
        t.setDaemon(False)
        t.start()

    print("All over", ctime())