master

分支 (1)

管理

管理

master

picpicker
/
picpicker.py

import os
from urllib import request
from bs4 import BeautifulSoup


user_agent_str = 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) \
AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'


def down_image(url,file_name):
    req = request.Request(url = url)
    req.add_header('User-Agent', user_agent_str)
    binary_data = request.urlopen(req).read()
    temp_file = open(file_name, 'wb')
    temp_file.write(binary_data)
    temp_file.close()


if __name__ == "__main__":

    img_dir = ".\cover"
    if not os.path.isdir(img_dir):
        os.mkdir(img_dir)
        # os.chdir(img_dir)
        # print(os.getcwd())

    f = open('isbn.txt', 'r', -1, 'utf-8')

    n = 0
    for line in f.readlines():
        if(n > 400000):
            break
        n = n + 1

        isbn = line.strip()
        print('----->http://search.jd.com/Search?keyword=' + isbn)
        req = request.Request('http://search.jd.com/Search?keyword='+isbn)
        req.add_header('User-Agent', user_agent_str)
        try:
            with request.urlopen(req) as f:
                data = f.read()
                soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
                for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
                    # print(i.find_all('img'))
                    for imgs in i.find_all('img', limit=1):
                        try:
                            if imgs.get('src') is None:
                                print(imgs.get('src'))
                            else:
                                my_url = 'http:' + imgs.get('src')
                                print(my_url)
                                down_image(my_url, img_dir + '\\' + isbn + '.jpg')
                        except Exception as e:
                            print('=====>Error:', e)
        except Exception as e:
            print('=-===>Error:', e)