加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
picpicker.py 1.88 KB
一键复制 编辑 原始数据 按行查看 历史
kzeng 提交于 2017-03-15 21:16 . a
import os
from urllib import request
from bs4 import BeautifulSoup
user_agent_str = 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) \
AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'
def down_image(url,file_name):
req = request.Request(url = url)
req.add_header('User-Agent', user_agent_str)
binary_data = request.urlopen(req).read()
temp_file = open(file_name, 'wb')
temp_file.write(binary_data)
temp_file.close()
if __name__ == "__main__":
img_dir = ".\cover"
if not os.path.isdir(img_dir):
os.mkdir(img_dir)
# os.chdir(img_dir)
# print(os.getcwd())
f = open('isbn.txt', 'r', -1, 'utf-8')
n = 0
for line in f.readlines():
if(n > 400000):
break
n = n + 1
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
my_url = 'http:' + imgs.get('src')
print(my_url)
down_image(my_url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化