加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
task5.py 2.05 KB
一键复制 编辑 原始数据 按行查看 历史
打代码的小黄瓜 提交于 2024-04-22 09:39 . 爬取多页电影数据
import requests
import json
import time
import pymysql
# url = "https://api.json.wandou9.xyz:8036/movie/gethotmovie/page/1/rows/32.json"
# 请大家设计一个程序,自动生成从1~7页的链接 range
urls = []
for i in range(1, 8):
# 在字符串里面引入变量
link = f"https://api.json.wandou9.xyz:8036/movie/gethotmovie/page/{i}/rows/32.json"
urls.append(link)
print(urls)
# 建立数据库连接
conn = pymysql.connect(host='192.168.18.13', port=33046, user='root', password='123456', db='xuweijie', charset='utf8')
# 创建游标
cursor = conn.cursor()
for url in urls:
# 避免服务器超载,被识别出是攻击,每一次爬虫结束休息1秒钟
time.sleep(1)
# 发送get请求获取网页内容
resp = requests.get(url)
# 检查是否请求成功
if resp.status_code == 200:
# 返回的数据是json格式,不是网页,所以使用Json库解析数据
data = json.loads(resp.text)
#  数组
movies = data['data']
# {"hot_movie_id":"166716","hot_name":"盟约","hot_url":"/detail/166716.html","hot_img_url":"https://647bc185.szrtcpa.com/view/photo/s_ratio_poster/public/p2887388117.webp","hot_rate":"7.1","movie_onclick":810932,"movie_director":"盖·里奇","movie_tags":"动作,惊悚","movie_recommend":"0"}
sql = "INSERT INTO movies (name, pic, actor, score, detail_link, movie_id) VALUES (%s, %s, %s, %s, %s, %s)"
# 遍历电影数组,插入数据库
for movie in movies:
name = movie['hot_name']
pic = movie['hot_img_url']
actor = movie['movie_director']
score = movie['hot_rate']
detail_link = "https://wandou.la/" + movie['hot_url']
movie_id = movie['hot_movie_id']
val = (name, pic, actor, score, detail_link, movie_id)
cursor.execute(sql, val)
else:
print("请求失败,状态码:", resp.status_code)
# 提交更改
conn.commit()
# 关闭游标和连接
cursor.close()
conn.close()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化