代码拉取完成,页面将自动刷新
同步操作将从 飞哥/爬虫demo 强制同步,此操作会覆盖自 Fork 仓库以来所做的任何修改,且无法恢复!!!
确定后同步将在后台操作,完成时将刷新页面,请耐心等待。
'''
Author: fg
Date: 2023-02-01 16:58:28
LastEditors: fg
LastEditTime: 2023-02-01 17:06:00
'''
import requests, json
import pandas as pd
import time
import re
#需要搜索的内容
name = '蛋仔派对bug'
url = "https://www.douyin.com/aweme/v1/web/general/search/single/?device_platform=webapp&aid=6383&channel=channel_pc_web&search_channel=aweme_general&sort_type=2&publish_time=0&keyword=%E8%9B%8B%E4%BB%94%E6%B4%BE%E5%AF%B9bug&search_source=tab_search&query_correct_type=1&is_filter_search=1&from_group_id=&offset=0&count=10&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1920&screen_height=1080&browser_language=zh-CN&browser_platform=Win32&browser_name=Chrome&browser_version=107.0.0.0&browser_online=true&engine_name=Blink&engine_version=107.0.0.0&os_name=Windows&os_version=10&cpu_core_num=12&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50&webid=7161298122589062667&msToken=s_gtpqKiciDNb27UA4kdYWxT080gccYjBRPfmCIMxMIDUyqKumI0kNw_-t7UCLDwMOC0jDBRuFOYojkETHnV5z6LEpNav6peEnWHFL-oKTi28zSSM4ddtg==&X-Bogus=DFSzswVL30hANtdJSpmkQKXAIQ5V"
payload={}
headers = {
'accept': 'application/json, text/plain, */*',
'accept-language': 'zh-CN,zh;q=0.9',
'referer': 'https://www.douyin.com/search/%E8%9B%8B%E4%BB%94%E6%B4%BE%E5%AF%B9bug?aid=86c9ffea-a071-407d-a5ce-b9ea5edf39ad&publish_time=0&sort_type=0&source=normal_search&type=general',
'sec-ch-ua': '"Google Chrome";v="107", "Chromium";v="107", "Not=A?Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
'Cookie': 'ttcid=026e309b92cd415683d7c4ff4b64e3d918; ttwid=1%7CSH3ISRCt7PvjgSCMsX_c7pS2Xue4qOVGamuWDlhIw90%7C1667369661%7Cd001973747192f7f98d4de8835f3db938451af98da5b13e2eb48f8945024cfd3; s_v_web_id=verify_l9z8uj8o_oRAqPZUn_4avS_4utp_9E37_uTltWjM2yzDN; passport_csrf_token=e5907daccbe45661653ea40abd5b043d; passport_csrf_token_default=e5907daccbe45661653ea40abd5b043d; odin_tt=1708901e1d7efa00d20c3dcfc1ba258dc3a65d84d00e771a71df0d620ee1469a8ab94a59f50e39daa601ecc4bc57e7d790bfdfe3a4242f6f4f3eadc389305b3e6618848e495d048459db223c186c4962; xgplayer_user_id=167253398575; douyin.com; __ac_nonce=0638f44e0005a8c2c9ea0; __ac_signature=_02B4Z6wo00f01mNAAbwAAIDDAErB1qTtmq5jYAUAAPtu99MOq5RaA3oiDUAYE8QzL8fQHmY5BZXCz49tfSTz5f4PV7cV4UkcPP7YgVoW4zHWnvNvc9uUYMA0ALgGf-QL0-xqcndJaIt62Ec945; strategyABtestKey=%221670333667.771%22; csrf_session_id=2077140ced824678ea2862d7db92d4ae; SEARCH_RESULT_LIST_TYPE=%22single%22; home_can_add_dy_2_desktop=%221%22; msToken=s_gtpqKiciDNb27UA4kdYWxT080gccYjBRPfmCIMxMIDUyqKumI0kNw_-t7UCLDwMOC0jDBRuFOYojkETHnV5z6LEpNav6peEnWHFL-oKTi28zSSM4ddtg; tt_scid=mP2FFZhH5nuqWANWpCh5dmX4y8lv9akeMag26mDgkg9YIPXxikWJgQ9quB8SynPYf1e2; download_guide=%222%2F20221206%22',
'Host': 'www.douyin.com',
'Connection': 'keep-alive'
}
response = requests.request("GET", url, headers=headers, data=payload)
title_list, like_list, play_list = [],[],[]
# 以下根据网页来剔除不需要的部分
def getdata(shu):
# 用于输入当前页数
# 读取url中数据
res = requests.get(url, headers=headers,).content.decode('utf-8')
# 变为json格式数据
jsonfile = json.loads(res)
for i in range(shu):
print('标题:'+jsonfile['data'][i]['aweme_info']['desc'])
print('链接:https://www.douyin.com/video/'+jsonfile['data'][i]['aweme_info']['aweme_id'])
发布时间 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(jsonfile['data'][i]['aweme_info']['create_time']))#转换时间戳
print('时间:'+发布时间)
print('')
# 从第一页开始获取数据
getdata(3)
"""
Data = {
'标题': title_list,
'链接': like_list,
'发布时间': play_list
}
#写入excel
dataframe = pd.DataFrame(data=Data)
print(dataframe)
dataframe.to_excel('./数据2.xlsx', index=False, encoding='utf-8')"""
#最新n个视频
""" for i in range(2):
print('标题:'+ title_list[i])
print('时间:'+ play_list[i])
print('链接:'+ like_list[i])
print('') """
print("end")
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。