master

分支 (1)

管理

管理

master

template-code
/
process.py

from bs4 import BeautifulSoup
import json
import sys

def process(htmlpath):
    # htmlpath = "./crtsh/test.html"
    jsonfile = open(htmlpath.replace(".html", ".json"), 'w')
    with open(htmlpath, "r", encoding='utf-8') as html_file:
        html = html_file.read()
        soup = BeautifulSoup(html, "html.parser")
        logs = []
        Table1 = soup.find_all("table")[0]
        trs = Table1.find_all("tr")
        trs = trs[3:-1]
        log = {}
        for tr in trs:
            infos = tr.find_all('td')
            try:
                log["log_name"] = infos[0].text
                log["url"] = infos[1].text
                log["MMD(hrs)"] = infos[2].text
                log["Latest STH(UTC)"] = infos[3].text
                log["Entries"] = {"Tree Size": infos[4].text, "Backlog": infos[5].text,
                                  "Latest Entry Age": infos[6].text}
                log["Last get-sth call(UTC)"] = infos[7].text
                log["Google Uptime%"] = infos[8].text
                log["Chrome (Status)"] = infos[9].text
                log["Chrome Roots Missing"] = infos[10].text
                log["Apple (Status)"] = infos[11].text
                log["Apple Roots Missing"] = infos[12].text
                logs.append(log)
            except:
                print(infos)
        json.dump(logs, jsonfile)
        jsonfile.close()


if __name__ == "__main__":
    htmlpath = sys.argv[1]
    process(htmlpath)