代码拉取完成,页面将自动刷新
import json
import re
import subprocess
import requests
from pprint import pprint
# 爬取b站普通视频 有水印
def BlibCraw(url):
headers = {
# 防盗链 referer
'referer': 'https://www.bilibili.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 '
'Safari/537.36 Edg/105.0.1343.27 '
}
response = requests.get(url=url, headers=headers)
# print(response.text)
title = re.findall('<title data-vue-meta="true">(.*?)</title>', response.text)[0]
# print(title)
title = title.replace(' ', '_')
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)[0]
# print(html_data)
json_data = json.loads(html_data)
# pprint(json_data)
# 字典数据 b站视频 和 音频是分离的
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url)
# print(video_url)
# 直接访问audio_url 和 video_url 是403 没有权限访问 需要上方headers中的防盗脸
audio_content = requests.get(url=audio_url, headers=headers).content
video_content = requests.get(url=video_url, headers=headers).content
# 获得音频内容和视频内容
with open('audio\\' + title + '.mp3', mode='wb') as audio:
audio.write(audio_content)
with open('video\\' + title + '.mp4', mode='wb') as video:
video.write(video_content)
# 音频内容和视频内容合并
cmd = f"ffmpeg -i E:\\爬虫\\video\\{title}.mp4 -i E:\\爬虫\\audio\\{title}.mp3 -c:v copy -c:a aac -strict experimental " \
f"E:\\爬虫\\video\\{title}output.mp4 "
# print(cmd)
subprocess.run(cmd, shell=True)
# 爬取b站番剧 需要会员的只能爬取试看三分钟 有水印
def BlibCrawForep(url):
headers = {
# 防盗链 referer
'referer': 'https://www.bilibili.com/',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/105.0.0.0 '
'Safari/537.36 Edg/105.0.1343.27 '
}
response = requests.get(url=url, headers=headers)
# print(response.text)
title = re.findall('<h1 title=".*?">(.*?)</h1>', response.text)[0]
# print(title)
title = title.replace(' ', '_')
html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)[0]
# print(html_data)
json_data = json.loads(html_data)
# print(type(json_data))
# pprint(json_data)
# 试看番剧
if 'durl' in json_data['data'].keys():
# 番剧试看视频和音频未分离
video_url = json_data['data']['durl'][0]['url']
# 直接访问video_url 是403 没有权限访问 需要上方headers中的防盗脸
video_content = requests.get(url=video_url, headers=headers).content
with open('video\\' + title + '.mp4', mode='wb') as video:
video.write(video_content)
# 可以直接观看的番剧 非会员内容
else:
# 字典数据 b站视频 和 音频是分离的
audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
video_url = json_data['data']['dash']['video'][0]['baseUrl']
# print(audio_url)
# print(video_url)
# 直接访问audio_url 和 video_url 是403 没有权限访问 需要上方headers中的防盗脸
audio_content = requests.get(url=audio_url, headers=headers).content
video_content = requests.get(url=video_url, headers=headers).content
# 获得音频内容和视频内容
with open('audio\\' + title + '.mp3', mode='wb') as audio:
audio.write(audio_content)
with open('video\\' + title + '.mp4', mode='wb') as video:
video.write(video_content)
# 音频内容和视频内容合并
cmd = f"ffmpeg -i E:\\爬虫\\video\\{title}.mp4 -i E:\\爬虫\\audio\\{title}.mp3 -c:v copy -c:a aac -strict experimental " \
f"E:\\爬虫\\video\\{title}output.mp4 "
# print(cmd)
subprocess.run(cmd, shell=True)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。