加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
geek_time.py 6.39 KB
一键复制 编辑 原始数据 按行查看 历史
northlight 提交于 2018-08-04 18:43 . fix duplicated / in the download path
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import json
import os
import regex as re
import browsercookie
mobile_headers = {
'Host': "time.geekbang.org",
'X-GEEK-APP-NAME': "time",
'X-GEEK-DEVICE-MODEL': "iPhone7,2",
'Accept': "*/*",
'X-GEEK-OS-VER': "11.3",
'Ticket': "AwgBAQQEIBwAAAEEYkEQAAoEDwAAAAUEgMYTAAkBAQsCAwAHBLx.4K8GBF5ck0gCBOq6uFoDBOJ74lo-",
'X-GEEK-VER-NAME': "1.1.4",
'Device-Id': "3C9553DD-611C-4EBE-B703-109AD179F848",
'Device-Token': "c839237dbd40706072532646df282c50ac0b8ebf60cd06fa5e54d70b041b5c77",
'Accept-Language': "zh-cn",
'Accept-Encoding': "br, gzip, deflate",
'Content-Type': "application/json",
'Content-Length': "57",
'User-Agent': "iPhone7,2(iOS/11.3) GeekbangApp(Zeus/1.1.4) Weex/0.16.1 ExternalUA 750x1334",
'X-GEEK-OS-PLATFORM': "iOS",
'Referer': "http://www.geekbang.org/",
'X-GEEK-OS-NAME': "iOS",
'Connection': "keep-alive",
'Cache-Control': "no-cache"
}
# 专栏
my_column_url = "https://time.geekbang.org/serv/v1/my/columns"
# 视频专栏
video_url = "https://time.geekbang.org/serv/v1/column/articles"
# 音频专栏
audio_url = "https://time.geekbang.org/serv/v1/column/audios"
# 所有专栏
all_column_url = "https://time.geekbang.org/serv/v1/columns"
# 获取 Chrome cookie
cj = browsercookie.chrome()
JSON_PATH = os.path.join(os.getcwd(), 'json/')
AUDIO_PATH = os.path.join(os.getcwd(), 'audio/')
VIDEO_PATH = os.path.join(os.getcwd(), 'video/')
# mkdir
def make_dir(path):
try:
os.mkdir(path)
except:
pass
make_dir(JSON_PATH)
make_dir(VIDEO_PATH)
make_dir(AUDIO_PATH)
# 所有专栏
def fetch_all_column():
payload = "{}"
response = requests.request("POST", all_column_url, data=payload, headers=mobile_headers)
json_data = json.loads(response.content.decode('utf-8'))
print(json_data)
data = json_data.get('code')
if data != 0:
return
list = json_data.get('data').get('list')
my_column = []
for item in list:
cid = item.get('id')
column_unit = item.get('column_unit')
column_title = item.get('column_title')
had_sub = item.get('had_sub')
is_include_audio = item.get('is_include_audio')
message = "id = %s has_audio = %s column_unit = %s column_title = %s" % (cid, is_include_audio, column_unit, column_title)
print(my_column)
if had_sub:
my_column.append(message)
print("-" * 40)
print("had sub:")
print(my_column)
def column_type_string(column_type):
return {
1 : "audio",
3 : "video",
}.get(column_type, "other")
# 获取我的专栏
def fetch_my_column():
payload = "{}"
response = requests.request("POST", my_column_url, data=payload, headers=mobile_headers)
json_data = json.loads(response.content.decode('utf-8'))
print(json_data)
data = json_data.get('code')
if data != 0:
return
list = json_data.get('data').get('list')
for item in list:
id = item.get('id')
column_type = item.get('column_type')
column_title = item.get('column_title')
print("id = %s type = %s column_title = %s" % (id, column_type_string(column_type), column_title))
# 根据专栏 id 获取视频
def download_video_by_cid(cid, size):
payload = " {\"cid\":%d,\"size\":%d,\"prev\":%d,\"order\":\"earliest\"}" % (cid, size, 0)
response = requests.request("POST", video_url, data=payload, cookies=cj, headers=mobile_headers)
print(response.content.decode('utf-8'))
download_videos(response)
def download_videos(response):
json_data = json.loads(response.content.decode('utf-8'))
print(json_data)
data = json_data.get('code')
if data != 0:
return
list = json_data.get('data').get('list')
for item in list:
video_media = item.get('video_media')
article_title = item.get('article_title')
pattern = ' |\|'
article_title = re.sub(pattern, '_', article_title)
print(article_title)
print(video_media)
if len(video_media) > 0:
video = json.loads(video_media)
video_path = VIDEO_PATH + article_title +'.mp4'
m3u8 = video.get('hd').get('url')
cmd = 'ffmpeg -y -i %s %s' % (m3u8, video_path)
os.system(cmd.encode('utf-8'))
# 根据专栏 id 获取音频
def download_audio_by_cid(cid, size):
payload = " {\"cid\":%d,\"size\":%d,\"prev\":%d,\"order\":\"newest\"}" % (cid, size, 0)
response = requests.request("POST", audio_url, data=payload, headers=mobile_headers)
download_audio(response)
def download_json(json_data, file_name):
json_path = JSON_PATH + file_name + '.json'
# save json file
with open(json_path, 'wt') as f:
# unicode
json_string = json.dumps(json_data, sort_keys=False, ensure_ascii=False, indent=4)
f.write(json_string)
print("save : {}.json".format(file_name))
f.close()
def download_one_audio(audio_download_url, article_sharetitle):
if len(audio_download_url) > 0:
print(audio_download_url)
mp3_path = AUDIO_PATH + article_sharetitle +'.mp3'
cmd = "wget -c %s -O %s" % (audio_download_url, mp3_path)
os.system(cmd.encode('utf-8'))
def download_audio(response):
json_data = json.loads(response.content.decode('utf-8'))
print(json_data)
data = json_data.get('code')
if data != 0:
return
list = json_data.get('data').get('list')
for item in list:
audio_download_url = item.get('audio_download_url')
article_sharetitle = item.get('article_sharetitle')
pattern = ' |\|'
article_sharetitle = re.sub(pattern, '_', article_sharetitle)
print(article_sharetitle)
download_one_audio(audio_download_url, article_sharetitle)
def exec_audio():
cid = int(input("input audio column id:\n> "))
download_audio_by_cid(cid, default_size)
def exec_video():
cid = int(input("input video column id:\n> "))
download_video_by_cid(cid, default_size)
print("login 'http://geekbang.org' from chrome first")
if __name__ == "__main__":
""" 下载音频
cid = 50
size = 100
download_audio_by_cid(cid, size)
fetch_my_column()
"""
""" 下载视频
cid = 77
size = 100
download_video_by_cid(cid, size)
"""
""" 查看所有专栏
"""
fetch_all_column()
""" 查看我的专栏
fetch_my_column()
"""
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化