代码拉取完成,页面将自动刷新
import requests
from bs4 import BeautifulSoup
import csv
import time
def get_jd_products(keyword, max_page=1):
products = []
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'}
for page in range(1, max_page + 1):
url = f"https://search.jd.com/Search?keyword={keyword}&enc=utf-8&page={page}"
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
for item in soup.select("li.gl-item"):
name = item.select_one("div.p-name a em").text.strip()
price = item.select_one("div.p-price i").text.strip()
link = 'https:' + item.select_one("div.p-name a")["href"].strip()
response = requests.get(link, headers=headers)
details_soup = BeautifulSoup(response.text, "html.parser")
try:
product_id = \
details_soup.find("ul", {"class": "parameter2 p-parameter-list"}).find("li").text.split(":")[
1].strip()
except AttributeError:
product_id = "N/A"
# Retrieve the comment count for the product
try:
comment_count = details_soup.select_one(".comment-count").text.strip().split()[1]
except AttributeError:
comment_count = "N/A"
# Retrieve the store name for the product
try:
store_name = details_soup.select_one(".name a").text.strip()
except AttributeError:
store_name = "N/A"
products.append({"name": name, "price": price, "product_id": product_id, "link": link, "comment_count": comment_count, "store_name": store_name})
time.sleep(5)
# Sort the products by comment count in descending order and return the top 50
sorted_products = sorted(products, key=lambda p: int(p["comment_count"]) if p["comment_count"] != "N/A" else 0, reverse=True)
return sorted_products[:50]
def save_products_to_csv(products, filename):
with open(filename, 'w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
writer.writerow(["名称", "价格", "货号", "链接", "评论数", "店铺名称"])
for product in products:
writer.writerow([product["name"], product["price"], product["product_id"], product["link"], product["comment_count"], product["store_name"]])
print(f"商品信息已保存至{filename}!")
if __name__ == "__main__":
keyword = "清风原木"
max_page = 10
products = get_jd_products(keyword, max_page=max_page)
filename = f"{keyword}_{max_page}页_评论数前50.csv"
save_products_to_csv(products, filename)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。