代码拉取完成,页面将自动刷新
#!/usr/bin/python
# coding:utf-8
"""
@author: 信长华
@contact: skywater@gmail.com
@software: PyCharm
@file: zhihu610.py
@time: 2022/3/26 15:03
"""
import openpyxl
import orjson
def read_excel(*, excel_name: str) -> list:
"""
读取excel, 生成数据的list
@param excel_name:
@return:
"""
wb = openpyxl.load_workbook(excel_name)
sheet_names = wb.get_sheet_names()
ws = wb.get_sheet_by_name(sheet_names[0])
info_list = []
for _index, row in enumerate(ws.rows):
if _index == 0:
continue
row_data = []
for cell in row:
if cell.value:
row_data.append(cell.value)
else:
row_data.append("")
print(_index, row_data)
# row_data = [item for item in row_data]
info_list.append(row_data)
return info_list
def read_keyword(*, keyword_name: str) -> dict:
"""
从文本文件中读取关键字
@param keyword_name:
@return:
"""
keyword_dict = dict()
with open(file=keyword_name, mode="r", encoding="utf-8") as f:
for line in f:
line = line.strip()
line_list = line.split(" ")
for word in line_list:
word = word.strip()
if not word:
continue
keyword_dict[word] = line
return keyword_dict
def make_count(*, item: str, keyword_dict: dict) -> str:
"""
判断频次
@param item:
@param keyword_dict:
@return:
"""
count = 0
if not item:
return "{}[{}]".format(count, "")
item = str(item)
line = []
for keyword in keyword_dict:
if keyword in item:
count += 1
line.append(keyword_dict[keyword])
return "{}{}".format(count, orjson.dumps(line).decode())
def make_excel(*, info_list: list, keyword_dict: dict, result_excel_name: str):
"""
@param info_list:
@param keyword_dict:
@return:
"""
name = "频次"
wb1 = openpyxl.Workbook()
sheet1 = wb1.active
sheet1.title = name
sheet1["A1"] = "ID"
sheet1["B1"] = "姓名"
sheet1["C1"] = "出现频率"
sheet1["D1"] = "性别"
sheet1["E1"] = "出现频次"
sheet1["F1"] = "住址"
sheet1["G1"] = "出现频次"
sheet1["H1"] = "身份证"
sheet1["I1"] = "出现频次"
sheet1["J1"] = "电话"
sheet1["K1"] = "出现频次"
sheet1["L1"] = "出生日期"
sheet1["M1"] = "出现频次"
sheet1["N1"] = "QQ"
sheet1["O1"] = "出现频次"
sheet1["P1"] = "email"
sheet1["Q1"] = "出现频次"
sheet1["R1"] = "msn"
sheet1["S1"] = "出现频次"
sheet1["T1"] = "附加关键字"
sheet1["U1"] = "出现频次"
sheet1["V1"] = "照片"
sheet1["W1"] = "出现频次"
excel_index = 2
for data in info_list:
sheet1["A{}".format(excel_index)] = data[0]
name = data[1]
sheet1["B{}".format(excel_index)] = name
sheet1["C{}".format(excel_index)] = make_count(
item=name, keyword_dict=keyword_dict
)
gender = data[2]
sheet1["D{}".format(excel_index)] = gender
sheet1["E{}".format(excel_index)] = make_count(
item=gender, keyword_dict=keyword_dict
)
address = data[3]
sheet1["F{}".format(excel_index)] = address
sheet1["G{}".format(excel_index)] = make_count(
item=address, keyword_dict=keyword_dict
)
person_id = data[4]
sheet1["H{}".format(excel_index)] = person_id
sheet1["I{}".format(excel_index)] = make_count(
item=person_id, keyword_dict=keyword_dict
)
mobile = data[5]
sheet1["J{}".format(excel_index)] = mobile
sheet1["K{}".format(excel_index)] = make_count(
item=mobile, keyword_dict=keyword_dict
)
birthday = data[6]
sheet1["L{}".format(excel_index)] = birthday
sheet1["M{}".format(excel_index)] = make_count(
item=mobile, keyword_dict=keyword_dict
)
qq = data[7]
sheet1["N{}".format(excel_index)] = qq
sheet1["O{}".format(excel_index)] = make_count(
item=qq, keyword_dict=keyword_dict
)
email = data[8]
sheet1["P{}".format(excel_index)] = email
sheet1["Q{}".format(excel_index)] = make_count(
item=email, keyword_dict=keyword_dict
)
msn = data[9]
sheet1["R{}".format(excel_index)] = msn
sheet1["S{}".format(excel_index)] = make_count(
item=msn, keyword_dict=keyword_dict
)
add_word = data[10]
sheet1["T{}".format(excel_index)] = add_word
sheet1["U{}".format(excel_index)] = make_count(
item=add_word, keyword_dict=keyword_dict
)
photo = data[11]
sheet1["V{}".format(excel_index)] = photo
sheet1["W{}".format(excel_index)] = make_count(
item=photo, keyword_dict=keyword_dict
)
excel_index += 1
wb1.save(result_excel_name)
info_list = read_excel(excel_name="userinfomation.xlsx")
keyword_dict = read_keyword(keyword_name="result.txt")
print(keyword_dict)
make_excel(
info_list=info_list,
keyword_dict=keyword_dict,
result_excel_name="{}-20220328.xlsx".format("频次"),
)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。