代码拉取完成,页面将自动刷新
from bs4 import BeautifulSoup
import json
import sys
def process(htmlpath):
# htmlpath = "./crtsh/test.html"
jsonfile = open(htmlpath.replace(".html", ".json"), 'w')
with open(htmlpath, "r", encoding='utf-8') as html_file:
html = html_file.read()
soup = BeautifulSoup(html, "html.parser")
logs = []
Table1 = soup.find_all("table")[0]
trs = Table1.find_all("tr")
trs = trs[3:-1]
log = {}
for tr in trs:
infos = tr.find_all('td')
try:
log["log_name"] = infos[0].text
log["url"] = infos[1].text
log["MMD(hrs)"] = infos[2].text
log["Latest STH(UTC)"] = infos[3].text
log["Entries"] = {"Tree Size": infos[4].text, "Backlog": infos[5].text,
"Latest Entry Age": infos[6].text}
log["Last get-sth call(UTC)"] = infos[7].text
log["Google Uptime%"] = infos[8].text
log["Chrome (Status)"] = infos[9].text
log["Chrome Roots Missing"] = infos[10].text
log["Apple (Status)"] = infos[11].text
log["Apple Roots Missing"] = infos[12].text
logs.append(log)
except:
print(infos)
json.dump(logs, jsonfile)
jsonfile.close()
if __name__ == "__main__":
htmlpath = sys.argv[1]
process(htmlpath)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。