加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
CollaborativeFiltering_WithConfig.py 20.92 KB
一键复制 编辑 原始数据 按行查看 历史
q981160455 提交于 2023-03-10 08:26 . windows端用这个
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
import csv
import os
import sys
import scipy.sparse
import numpy as np
import sklearn
from sklearn.metrics.pairwise import cosine_similarity
import pymysql
import re
import math
import time
from functools import reduce
print("please wait patiently...")
delete_list = ["供应商名称","中标金额","元","包名称","联系电话","详见公示","二包","标的名称",
"统一社会信用代码:","最终报价","最后报价金额","(元)","投标总报价","(万元)","包一","包二","包三",
"企业类型","投标价","成交价","包号","单价","总价","合同包","(人民币元)","(人民币:元)","评审结果",
"最后得分","最后磋商报价","最后成交总报价","最后报价","(下浮率)","组织机构代码","证号","总得分","得分","综合得分","详见公告正文","某单位"]
# regions = ["辽宁","吉林","黑龙江","河北","山西","陕西",
# "甘肃","青海","山东","安徽","江苏","浙江",
# "河南","湖北","湖南","江西","台湾","福建",
# "云南","海南","四川","贵州","广东",
# "内蒙古","新疆","广西","西藏","宁夏",
# "北京","上海","天津","重庆","香港","澳门"]#要加上”市“、”省“等等
#regions = ["重庆市","上海市"]
def getChinese(str1):
for delete_content in delete_list:
if delete_content in str1:
str1 = str1.replace(delete_content,"")
if "(" in str1 and ")" in str1:
return str1
res1 = ''.join(re.findall('[\u4e00-\u9fa5]', str1))
return res1
def get_score(float_num):
float_num = re.sub(r'[\W]','',float_num)
try:
regex = re.compile("\d+\W*\d*")
float_num = re.findall(regex,float_num)[0]
#print(float_num)
except:
#print(float_num)
return 1
#exit()
try:
float_num = float(float_num)
except:
#print(float_num)
return 1
try:
score = 10.04 + 4.34 *math.log(float_num)
#score = float_num
if -2<=score<=2:
return 1
else:
return score
except:
#print(float_num)
return 1
def find_relation_byRegion(aim_region="('重庆')"):
print("Updating recommand system,please wait!")
if aim_region=="('全国')":
sql = "select call_unit,agent_unit_name,provide_unit,actual_price from %s "% notice_table_name +"where call_unit != agent_unit_name AND locate('无', provide_unit )<=0 AND locate('地址', provide_unit )<=0 AND actual_price != '' AND actual_price != '0.0';"
else:
#aim_region = "'" + aim_region + "'"
sql = "select call_unit,agent_unit_name,provide_unit,actual_price from %s where call_unit != agent_unit_name AND locate('无', provide_unit )<=0"% notice_table_name+" AND region in "+aim_region+" AND locate('地址', provide_unit )<=0 AND actual_price != '' AND actual_price != '0.0';"
cursor_notice.execute(sql)
relations_table = cursor_notice.fetchall()
if not os.path.exists("xietongguolv"):
os.makedirs("xietongguolv")
with open("xietongguolv/relations"+"_"+aim_region+".csv","w",encoding="utf-8",newline="") as f:
writer = csv.writer(f)
for i in relations_table:
writer.writerow(list(i))
print("Done")
#============================================================================================
#开始预处理
def get_simularity_matrix(aim_region="('重庆')"):
print("reading .csv,please wait")
with open("xietongguolv/relations"+"_"+aim_region+".csv","r",encoding="utf-8") as f:
reader = csv.reader(f)
process = 1
for i in reader:
agent_name = getChinese(i[agent_col])
if agent_name == "":#给代理机构推采购方的时候需要判断
continue
buyer_name =getChinese(i[buyer_col])
if buyer_name=="" or buyer_name is None or buyer_name==" ":
continue
global buyers_names
buyers_names.append(buyer_name)
global agents_names
agents_names.append(agent_name)
global relations
#global relations_dict
# small_dict_buyer_agent = {}
# small_dict_buyer_agent[buyer_name] = [agent_name]
#relations_dict = reduce_fun(relations_dict,small_dict_buyer_agent)
relations.append([buyer_name,agent_name,"",get_score(i[3])])
# 开始统计各角色分数
# 首先处理采购方和代理机构的关系
buyers_names = list(set(buyers_names))
agents_names = list(set(agents_names))
buyers_dict = {}
agents_dict = {}
for i in range(len(buyers_names)):
buyers_dict[buyers_names[i]] = i
for i in range(len(agents_names)):
agents_dict[agents_names[i]] = i
buyers_indexs = []
agents_indexs = []
scores_indexs = []
providers_indexs = []
len_buyers = len(buyers_names)
len_agents = len(agents_names)
len_providers = len(providers_names)
print("ceating matrix :" + str(len_buyers) + " x " + str(len_agents))
process = 1
time.sleep(3)
len_relations = len(relations)
for i in relations:
print("\r", end="")
print(command+" progress: {:.2f}%: ".format((process / len_relations)*0.2 * 100),
"▋" * int((process * 100 // len_relations) // 2*0.2), end="")
buyer_name = i[0]
buyer_index = buyers_dict[buyer_name]
buyers_indexs.append(buyer_index)
agent_name = i[1]
agent_index = agents_dict[agent_name]
agents_indexs.append(agent_index)
score = i[3]
scores_indexs.append(score)
process += 1
b_a = scipy.sparse.coo_matrix((scores_indexs, (buyers_indexs, agents_indexs)), shape=(len_buyers, len_agents))
#b_p = scipy.sparse.coo_matrix((scores_indexs, (buyers_indexs, providers_indexs)), shape=(len_buyers, len_providers))
matrix_b_a = b_a.tocsr()
#print(np.shape(matrix_b_a))
notzero_rows = matrix_b_a.nonzero()[0]
notzero_cols = matrix_b_a.nonzero()[1]
# print(len(list(set(notzero_rows))))
# print(len(list(set(notzero_cols))))
busy_pair1 = list(zip(notzero_rows, notzero_cols))
test_list = []
global buyer_history
buyer_history = []
agents = []
# with open("test.txt","w") as f:
# f.write(str(busy_pair1))
process = 1
len_busy = len(busy_pair1)
for i in range(len(busy_pair1)):
print("\r", end="")
print(command+" progress: {:.2f}%: ".format(((process / len_busy)*0.2+0.2) * 100),
"▋" * int((process * 100 // len_busy)*0.2 // 2+10), end="")
buyer = busy_pair1[i][0]
agent = busy_pair1[i][1]
agents.append(agent)
process += 1
try:
if busy_pair1[i+1][0] ==buyer:
continue
else:
buyer_history.append(agents)
agents=[]
except:
buyer_history.append(agents)
matrix_T_b_a = matrix_b_a.transpose()
notzero_rows = matrix_T_b_a.nonzero()[0]
notzero_cols = matrix_T_b_a.nonzero()[1]
busy_pair2 = list(zip(notzero_rows,notzero_cols))
global agent_history
agent_history = []
buyers = []
process = 1
len_busy = len(busy_pair2)
for i in range(len(busy_pair2)):
print("\r", end="")
print(command + " progress: {:.2f}%: ".format(((process / len_busy) * 0.2 + 0.4) * 100),
"▋" * int((process * 100 // len_busy) * 0.2 // 2 + 20), end="")
agent = busy_pair2[i][0]
buyer = busy_pair2[i][1]
buyers.append(buyer)
process += 1
try:
if busy_pair2[i+1][0] ==agent:
continue
else:
agent_history.append(buyers)
buyers = []
except:
agent_history.append(buyers)
process = 1
buyers_sims = []
sim_matrix = cosine_similarity(matrix_b_a,dense_output=False)
notzero_rows = sim_matrix.nonzero()[0]
notzero_cols = sim_matrix.nonzero()[1]
notzero_datas = sim_matrix.data
sim_pair = list(zip(notzero_rows,notzero_cols,notzero_datas))
# with open("xietongguolv/simularity_test.csv", "w", encoding="utf-8", newline="") as f:
# writer = csv.writer(f)
for i in sim_pair:
index1 = i[0]
buyer_name1 = buyers_names[index1]
index2 = i[1]
buyer_name2 = buyers_names[index2]
if index1 == index2:
similarity = 0
else:
similarity = i[2]
buyers_sims.append((index1, index2, similarity,buyer_name1,buyer_name2))
#writer.writerow([index1,index2,similarity])
# for i in sim_matrix:
# print(i[0][0][0])
# print(type(i))
#print(str(process) + " / " + str(len_buyers) + " :" + str(sim))
#process += 1
# for i in range(len_buyers):
# for j in range(len_buyers):
# if j > i:
# break
# if i == j:
# sim = 0
# else:
#
#
# try:
# object1 = matrix_b_a[i].toarray()[0]
# object2 = matrix_b_a[j].toarray()[0]
# cos1 = cosine(object1, object2)
# sim = 1 - cos1
# del object1,object2,cos1
# except:
# print(matrix_b_a[i].toarray()[0])
# exit()
#
#
# if sim<=0.001:
# pass
# else:
# buyer_name1 = buyers_names[i]
# buyer_name2 = buyers_names[j]
# try:
# # buyer_agents1 = list(map(lambda x:agents_names[x],np.nonzero(np.array(matrix[i]))[0]))
# # buyer_agents2 = list(map(lambda x:agents_names[x],np.nonzero(np.array(matrix[j]))[0]))
# # more1 = list(set(buyer_agents2)-set(buyer_agents1))
# # more2 = list(set(buyer_agents1) - set(buyer_agents2))
# # res = list(set(buyer_agents2)&set(buyer_agents1))
#
# # recommend_reason1 ="您的客户 "+str(buyer_name2)+" 与其他采购方: "+str(buyer_name1)+" 是相似的,选择过相同的代理机构。我们挖掘到,该采购方,还没有跟您做过生意,是您潜在的客户"
# # recommend_reason2 = "您的客户 "+str(buyer_name1)+" 与其他采购方: "+str(buyer_name2)+" 是相似的,选择过相同的代理机构。我们挖掘到,该采购方,还没有跟您做过生意,是您潜在的客户"
# buyers_sims.append((i, j, sim,buyer_name1,buyer_name2))
# except:
# print(np.nonzero(np.array(matrix_b_a[i]))[0],np.nonzero(np.array(matrix_b_a[j]))[0])
# exit()
buyers_sims.sort(key=lambda x:(x[0],-x[2]))
with open("xietongguolv/simularity_+" + aim_region + ".csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
for i in buyers_sims:
writer.writerow(i)
#print("write file finished")
buyer_sims_buyers = [()]*len(buyers_names)
buyer_buyers = []
process = 1
len_buyers_sims = len(buyers_sims)
for i in range(len_buyers_sims):
print("\r", end="")
print(command+" progress: {:.2f}%: ".format(((process *0.2/ len_buyers_sims)+0.6) * 100),
"▋" * int((process * 100 // len_buyers_sims) *0.2// 2+30), end="")
buyer_index = buyers_sims[i][0]
buyer_buyers.append((buyers_sims[i][1], buyers_sims[i][2]))
try:
if buyers_sims[i+1][0] !=buyer_index:
buyer_sims_buyers[buyer_index] = buyer_buyers
buyer_buyers = []
except:
buyer_sims_buyers[buyer_index] = buyer_buyers
process += 1
return buyers_sims,buyer_sims_buyers
def get_agent_buyers(agent_index):
list1= agent_history[agent_index]
return list1
def get_buyer_agents(buyer_index):
return buyer_history[buyer_index]
def get_simular_buyer_for_agent(buyer_index):
return buyer_sims_buyers[buyer_index]
def list_contant(list1,list2):
return list1 + list2
def textDistance(str1,str2):
str1 = list(str1)
str2 = list(str2)
res =list(set(str1) & set(str2))
ratio = float(len(res))/ float(len(str1))
return ratio
def tupleToJsonObject(tuple):
object_id = tuple[0]
object_name = tuple[2]
object_score = tuple[1]
object_dict = {"pid":object_id,"name":object_name,"score":object_score}
return object_dict
ii = []
with open("Controller.config","r",encoding="utf-8") as f:
controller_Data = f.readlines()
for i in controller_Data :
try:
ii.append(i.split(":")[1].rstrip("\n"))
except Exception as e:
print(e)
print("wrong parameter:"+i)
try:
contorller_id = ii[0]
operater = ii[1]
input_ip = ii[2]
input_port = int(ii[3])
input_user = ii[4]
input_password = ii[5]
notice_database = ii[6]
notice_table_name = ii[7]
buyer_col = ii[8]
agent_col = ii[9]
provider_col = ii[10]
price_col = ii[11]
region_col = ii[12]
regions = ii[13].split(",")
# regions = ["全国"]
regions_name_list = str(tuple(regions)).replace('"', "")
if ",)" in regions_name_list:
regions_name_list = regions_name_list.replace(",)", ")")
topK = int(ii[14])
Update = ii[15]
result_ip = ii[16]
result_port = int(ii[17])
result_user = ii[18]
result_password = ii[19]
result_database = ii[20]
result_table_name = ii[21]
except Exception as e:
print(e)
print("input parameters wrong!")
sys.exit()
try:
#链接数据库
try:
conn0 = pymysql.connect(host=input_ip, # 192.168.2.20
port=input_port,
user=input_user,
password=input_password,
database=notice_database,
charset="utf8")
cursor_notice = conn0.cursor() # 获取游标, 目的就是要执行sql语句
except Exception as e:
print(e)
print(
"Failed to reach database:" + notice_database + ",check input parameters or network connection or database state.")
try:
conn1 = pymysql.connect(host=result_ip, # 192.168.2.20
port=result_port,
user=result_user,
password=result_password,
database=result_database,
charset="utf8")
cursor_result = conn1.cursor() # 获取游标, 目的就是要执行sql语句
sql_findMax = "SELECT MAX(result_id) FROM "+result_table_name
cursor_result.execute(sql_findMax)
maxid = cursor_result.fetchall()[0][0]
if maxid is None:
maxid = 0
except Exception as e:
print(e)
print(
"Failed to reach database:" + result_database + ",check input parameters or network connection or database state.")
commands = ["GiveProvidersToBuyers","GiveBuyersToProviders","GiveAgentsToBuyers","GiveBuyersToAgents",]
result_id = maxid+1
region = regions_name_list
for com in commands:
command = com
if command == "GiveProvidersToBuyers":
buyer_col = 2
agent_col = 0
character = "采购方"
if command == "GiveBuyersToProviders":
buyer_col = 0
agent_col = 2
character = "供应商"
if command == "GiveAgentsToBuyers":
buyer_col = 1
agent_col = 0
character = "采购方"
if command == "GiveBuyersToAgents":
buyer_col = 0
agent_col = 1
character = "代理机构"
relations = []
relations_dict = {}
buyers_names = []
agents_names = []
providers_names = []
buyer_history = []
agent_history = []
if Update=="是":
find_relation_byRegion(region)
buyers_sims,buyer_sims_buyers = get_simularity_matrix(region )
#开始推荐
now = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
results = []
now_forFile = now.replace(":","_").replace(" ","_")
if not os.path.exists("xietongguolv/results"):
os.makedirs("xietongguolv/results")
with open("xietongguolv/results/"+command+"_in_"+region+"_"+now_forFile+".csv","w",encoding="utf-8",newline="") as f:
writer = csv.writer(f)
process = 1
len_agents_names = len(agents_names)
for i in range(len_agents_names):
#找出代理结构的采购方用户列表,推荐相似的采购方用户
print("\r", end="")
print(command+" progress: {:.2f}%: ".format(((process / len_agents_names)*0.2+0.8) * 100),
"▋" * int((process * 100 // len_agents_names)*0.2 // 2+40), end="")
recommand = []
agents_buyers = get_agent_buyers(i)
agent_log = str(list(map(lambda x: buyers_names[x], agents_buyers))).replace("[", "").replace("]",
"").replace(
"'", "")
map_value = list(map(get_simular_buyer_for_agent,agents_buyers))
map_value = list( reduce(list_contant,map_value) )
recommand = recommand + map_value
recommand.sort(key=lambda x:x[0])
code_value = []
value = 0
re_type = "成功推荐的"
for j in range(len(recommand)):
code = recommand[j][0]
if code in agents_buyers:
continue
value = value + recommand[j][1]
try:
if recommand[j+1][0] ==code:
continue
else:
code_value.append((code,value,buyers_names[code]))
value = 0
except:
code_value.append((code, value,buyers_names[code]))
#找到了代理机构的采购方列表,但他们没有相似的其他采购方,则随机推荐
if len(code_value) == 0:
re_type = "随机赋值的"
random_indexs = np.random.randint(0,len(buyers_names)-1,int(len(buyers_names)*0.001))
random_buyers = list(map(lambda x:buyers_names[x],random_indexs))
random_sims = list(map(lambda x:textDistance(agent_log,x),random_buyers))
code_value = list(zip(random_indexs,random_sims,random_buyers))
code_value.sort(key=lambda x: x[1], reverse=True)
recommand_give = code_value[:topK]
recommand_list = list(map(tupleToJsonObject,recommand_give))
recommand_JSON = {"Objects":recommand_list}
recommand_str = str(recommand_JSON)
results.append((result_id,command,character,agents_names[i],recommand_str,re_type,region,now,contorller_id))
result_id+=1
#results.append(())
writer.writerow([i,agents_names[i],code_value[:topK],re_type,region,now,contorller_id])
process += 1
print("\n")
sql_result = 'insert into ' + result_table_name + ' values(%s,%s,%s,%s,%s,%s,%s,%s,%s)'
cursor_result.executemany(sql_result,results)
conn1.commit()
print("finished!!!")
except Exception as e:
print("推荐失败")
print(e)
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化