代码拉取完成,页面将自动刷新
import os
from urllib import request
from bs4 import BeautifulSoup
user_agent_str = 'Mozilla/6.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) \
AppleWebKit/536.26 (KHTML, like Gecko) Version/8.0 Mobile/10A5376e Safari/8536.25'
def down_image(url,file_name):
req = request.Request(url = url)
req.add_header('User-Agent', user_agent_str)
binary_data = request.urlopen(req).read()
temp_file = open(file_name, 'wb')
temp_file.write(binary_data)
temp_file.close()
if __name__ == "__main__":
img_dir = ".\cover"
if not os.path.isdir(img_dir):
os.mkdir(img_dir)
# os.chdir(img_dir)
# print(os.getcwd())
f = open('isbn.txt', 'r', -1, 'utf-8')
n = 0
for line in f.readlines():
if(n > 400000):
break
n = n + 1
isbn = line.strip()
print('----->http://search.jd.com/Search?keyword=' + isbn)
req = request.Request('http://search.jd.com/Search?keyword='+isbn)
req.add_header('User-Agent', user_agent_str)
try:
with request.urlopen(req) as f:
data = f.read()
soup = BeautifulSoup(data.decode('utf-8', 'ignore'), "html.parser")
for i in soup.find_all('div', attrs={'class': 'p-img'}, limit=1):
# print(i.find_all('img'))
for imgs in i.find_all('img', limit=1):
try:
if imgs.get('src') is None:
print(imgs.get('src'))
else:
my_url = 'http:' + imgs.get('src')
print(my_url)
down_image(my_url, img_dir + '\\' + isbn + '.jpg')
except Exception as e:
print('=====>Error:', e)
except Exception as e:
print('=-===>Error:', e)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。