加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
TouTiaoCrawlers.py 2.68 KB
一键复制 编辑 原始数据 按行查看 历史
GuLu 提交于 2022-09-18 17:19 . request爬虫
import requests
import re
import subprocess
# 今日头条视频爬虫
def TouTCraw(url):
headers = {
'cookie': 'tt_webid=7139605604477388318; ttcid=aadd7eb74eb44a01b84a41b1a9b79ac634; _tea_utm_cache_24={'
'%22utm_source%22:%22copy_link%22%2C%22utm_medium%22:%22toutiao_android%22%2C%22utm_campaign%22'
':%22client_share%22}; s_v_web_id=verify_l7npsvjl_GqaI64FF_6lGb_4UCu_9cqF_V1RQsEXiqB4H; '
'local_city_cache=%E5%8C%97%E4%BA%AC; _tea_utm_cache_1300={'
'%22utm_source%22:%22copy_link%22%2C%22utm_medium%22:%22toutiao_android%22%2C%22utm_campaign%22'
':%22client_share%22}; csrftoken=043b3faa03fcf1b602eaa21751189f06; '
'msToken=EuKsGE-znZHJPqf78tWyhYPXmmoHLSTy_26uW5Zaym9IaO4TlXpcEasOm-BstVKnPKD0'
'-K8fDCtL2rKsTfDvpzYylXM53l6XoUp_Kd5oB0jt; MONITOR_WEB_ID=7139605604477388318; '
'ttwid=1%7CzJeA_hNSisnYWtfAdXu37DQ4Y-_vr-eRGyl03F23qbM%7C1662376254'
'%7C74f23fb8683a5eea15ab1f36724e67bc7add7270b4a36fdd290d3ee9c84b05bd; '
'tt_scid=U.wVvxl6xFthEs63E56aFWNT6Jg6mftTUY9opJDAe9amsXDG3u58Kp0mReukDAhm8146',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.27 '
}
response = requests.get(url=url, headers=headers)
response.encoding = 'utf-8'
# print(response.text)
title = re.findall('<title>(.*?)</title>', response.text)[0]
title = title.replace(' ', '_')
# print(title)
data = re.findall('videoPlayInfo(.*?)</script>', response.text)[0]
url = requests.utils.unquote(data)
# print(url)
# 获得视频地址
data1 = re.findall('video_meta":{"definition":"720p"(.*?),"backup_url', url)[0]
# print(data1)
video_url = re.findall('main_url":"(.*?)"', data1)[0]
# 获得音频地址
audio_url = re.findall('dynamic_audio_list":\[{"main_url":"(.*?)","backup_url"', url)[0]
# print(video_url)
# print(audio_url)
audio_content = requests.get(url=audio_url, headers=headers).content
video_content = requests.get(url=video_url, headers=headers).content
# 获得音频内容和视频内容
with open('audio\\' + title + '.mp3', mode='wb') as audio:
audio.write(audio_content)
with open('video\\' + title + '.mp4', mode='wb') as video:
video.write(video_content)
# 音频内容和视频内容合并
cmd = f"ffmpeg -i E:\\爬虫\\video\\{title}.mp4 -i E:\\爬虫\\audio\\{title}.mp3 -c:v copy -c:a aac -strict experimental " \
f"E:\\爬虫\\video\\{title}output.mp4 "
# print(cmd)
subprocess.run(cmd, shell=True)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化