代码拉取完成,页面将自动刷新
import os
import sys
from tqdm import tqdm
import pickle
import random
import torch
from datetime import datetime
import math
from utils import *
from icecream import ic
import warnings
warnings.filterwarnings('ignore')
# 用于表征state的类
# 说明state的长度,与过去的几个node,relation相关
class KGState(object):
def __init__(self,embed_size,history_len=1):
self.embed_size=embed_size
self.history_len=history_len
if history_len==0:
self.dim=2*embed_size
elif history_len==1:
self.dim=4*embed_size
elif history_len==2:
self.dim=6*embed_size
else:
raise Exception('history length should be 0/1/2')
def __call__(self,head_node_embed,tail_node_embed,last_node_embed,last_relation_embed,older_node_embed,older_relation_embed):
if self.history_len==0:
return np.concatenate([head_node_embed,tail_node_embed])
elif self.history_len==1:
return np.concatenate([head_node_embed,tail_node_embed,last_node_embed,last_relation_embed])
elif self.history_len==2:
return np.concatenate([head_node_embed,tail_node_embed,last_node_embed,last_relation_embed,older_node_embed,older_relation_embed])
else:
raise Exception('mode should be one of {full, current}')
class BatchKGEnvironment(object):
def __init__(self,dataset_str,max_acts,max_path_len=3,state_history=1,relation=['COMP'],type='whole',train_time=1,delrel=''):
self.max_acts=max_acts
self.act_dim=max_acts+1
self.max_num_nodes=max_path_len+1
# 读取TransE训练好的实体和关系embedding
self.kg=load_kg(dataset_str,type=type)
self.embeds=load_embed(dataset_str,type=type)
self.embed_size=self.embeds[PRODUCT].shape[1] #100
self.embeds[SELF_LOOP]=(np.zeros(self.embed_size),0.0)
self.state_gen=KGState(self.embed_size,history_len=state_history)
self.state_dim=self.state_gen.dim
self.relation=relation[0]
self.train_time=train_time
self.delrel=delrel
if self.delrel!='None':
print('delrel:',self.delrel)
'''
self.train_time==7:目标是找到目标商品的相关商品
reward:用两种关系中分数较大的一项作为商品的reward
剪枝:用两种关系中分数较大的一项作为商品的reward
元路径:少量元路径进行推理
self.train_time==8:目标是找到目标商品的相关商品
reward:用一种关系中分数较大的一项作为商品的reward
剪枝:用两种关系中分数较大的一项作为商品的reward
元路径:少量元路径进行推理
'''
if self.train_time==3 or self.train_time==4 or self.train_time==8:
scorePath=os.path.join('/mnt/ssd/zjyang/KAPR/OnlyProduct/AAAITmp',dataset_str,'KEIM',self.relation,'score_numpy.npy')
self.reward=np.load(scorePath)
print('reward shape',self.reward.shape)
elif self.train_time==5 or self.train_time==7 or self.train_time==9 or self.train_time==13 or self.train_time==70 or self.train_time==71 or self.train_time==72:
scorePath_sub=os.path.join('/mnt/ssd/zjyang/KAPR/OnlyProduct/AAAITmp',dataset_str,'KEIM','SUB','score_numpy.npy')
scorePath_comp=os.path.join('/mnt/ssd/zjyang/KAPR/OnlyProduct/AAAITmp',dataset_str,'KEIM','COMP','score_numpy.npy')
self.reward_sub=np.load(scorePath_sub)
self.reward_comp=np.load(scorePath_comp)
print('reward shape',self.reward_sub.shape)
#for i in self.reward:
#print(i)
#self.train_labels=Train_labels
#Compute product-product score
product_size=len(self.embeds[PRODUCT])
self.p_p_scales=[]
#用判别器得到的模型作为p_p_scales
if self.relation=='COMP':
try:
self.relation_embedding=self.embeds[COMP][0]
except:
self.relation_embedding=self.embeds[SUB][0]
elif self.relation=='SUB':
try:
self.relation_embedding=self.embeds[SUB][0]
except:
self.relation_embedding=self.embeds[COMP][0]
R=5
b_size=math.ceil(product_size/R)
for i in range(R):
remain=product_size-i*b_size
min_s=min(b_size,remain)
start=i*b_size
end=start+min_s
score=np.max(np.dot(self.embeds[PRODUCT][start:end]+self.relation_embedding,self.embeds[PRODUCT].T),axis=1)
self.p_p_scales.extend(score)
print('p_p_scales size:',len(self.p_p_scales),product_size)
#compute path patterns
self.patterns=[]
#根据不同的关系,采取不同的匹配模板
if self.train_time==5:
for pattern_id in PATH_PATTERN_COMP.keys():
pattern=PATH_PATTERN_COMP[pattern_id]
pattern=[SELF_LOOP]+[v[0] for v in pattern[1:]]
self.patterns.append(tuple(pattern))
for pattern_id in PATH_PATTERN_SUB.keys():
pattern=PATH_PATTERN_SUB[pattern_id]
pattern=[SELF_LOOP]+[v[0] for v in pattern[1:]]
if tuple(pattern) not in self.patterns:
self.patterns.append(tuple(pattern))
#其他train的方式都是只读取一种关系的元路径
else:
if self.relation=='COMP':
for pattern_id in PATH_PATTERN_COMP.keys():
pattern=PATH_PATTERN_COMP[pattern_id]
pattern=[SELF_LOOP]+[v[0] for v in pattern[1:]]
self.patterns.append(tuple(pattern))
if self.relation=='SUB':
for pattern_id in PATH_PATTERN_SUB.keys():
pattern=PATH_PATTERN_SUB[pattern_id]
pattern=[SELF_LOOP]+[v[0] for v in pattern[1:]]
self.patterns.append(tuple(pattern))
#current episode information
self._batch_path=None # list of tuples of (relation, node_type, node_id)
self._batch_curr_actions=None #save current valid actions
self._batch_curr_state=None
self._batch_curr_reward=None
# Here only use 1 'done' indicator, since all paths have same length and will finish at the same time.
self._done = False
# 获取指定的pattern
def _has_pattern(self,path):
pattern=tuple([v[0] for v in path])
return pattern in self.patterns
def _batch_has_pattern(self,batch_path):
#print(batch_path)
#print(len(batch_path))
#input()
return [self._has_pattern(path) for path in batch_path]
def _get_actions_all_relation(self,path,done):
#为当前的state返回action
#Compute actions for current node
_,curr_node_type,curr_node_id=path[-1]
actions=[(SELF_LOOP,curr_node_id)] # self-loop must be included.
if done:
return actions
#分析路径模式
pattern=tuple([v[0] for v in path])
if len(pattern)<=2 and str(pattern) in ACTION_PRUNED.keys():
relation_pattern=ACTION_PRUNED[str(pattern)]
if len(pattern)==3:
relation_pattern=['sub','comp']
#Get all possible edges from original KG
#与当前节点相关的边
relation_nodes=self.kg(curr_node_type,curr_node_id)
#可能包含的动作
candidate_acts=[] #list of tuples of (relation, node_type, node_id)
#已经访问过的结点
visited_nodes=set([(v[1],v[2]) for v in path])
for r in relation_nodes:
#去除出度过大的结点
#if len(relation_nodes[r]) > 150:
#continue
if r == self.delrel:
continue
#如果路径模式不在pattern中,则删除
#if r not in relation_pattern:
#continue
next_node_type=KG_RELATION[curr_node_type][r]
next_node_ids=relation_nodes[r]
#filter
next_node_ids=[n for n in next_node_ids if (next_node_type,n) not in visited_nodes]
candidate_acts.extend(zip([r]*len(next_node_ids),next_node_ids))
#如果candidate action集合为空,则return self-loop action
#action的数量小于max_num,则直接return action sets
if len(candidate_acts)<=self.max_acts:
candidate_acts=sorted(candidate_acts,key=lambda x:(x[0],x[1]))
actions.extend(candidate_acts)
#如果actions列表不为0,则删除改自环
if len(actions)>1:
require_remove_element = (SELF_LOOP,curr_node_id)
index = actions.index(require_remove_element)
actions = actions[:index] + actions[index+1:]
return actions
#如果action的动作过多,则需要进行修剪
#每一步action的得分,是根据当前结点和下一步结点的数值来决定的TransE
product_embed=self.embeds[PRODUCT][path[0][-1]]
scores=[]
for r,next_node_id in candidate_acts:
next_node_type=KG_RELATION[curr_node_type][r]
if next_node_type==PRODUCT:
src_embed=product_embed+self.relation_embedding
elif next_node_type==WORD:
src_embed=product_embed+self.embeds[DESCRIBED_AS][0]
elif next_node_type==BRAND:
src_embed=product_embed+self.embeds[PRODUCED_BY][0]
elif next_node_type==CATEGORY:
src_embed=product_embed+self.embeds[BELONG_TO][0]
elif next_node_type==USER:
src_embed=product_embed+self.embeds[PURCHASE][0]
else:
src_embed=product_embed+self.embeds[r][0]
#计算相应的分数
if self.train_time==71 or self.train_time==72:
score1=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
cur_embed=self.embeds[curr_node_type][curr_node_id]+self.embeds[r][0]
score2=np.matmul(cur_embed,self.embeds[next_node_type][next_node_id])
score=0.7*score1+0.3*score2
else:
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#使用ScoreNumpy计算得分
if (self.train_time==5 or self.train_time==7 or self.train_time==70 or self.train_time==8 or self.train_time==9 or self.train_time==13 or self.train_time==71 or self.train_time==72) and next_node_type==PRODUCT:
try:
score_sub=np.matmul(product_embed+self.embeds[SUB][0],self.embeds[next_node_type][next_node_id])
except:
score_sub=np.matmul(product_embed+self.embeds[COMP][0],self.embeds[next_node_type][next_node_id])
try:
score_comp=np.matmul(product_embed+self.embeds[COMP][0],self.embeds[next_node_type][next_node_id])
except:
score_comp=np.matmul(product_embed+self.embeds[SUB][0],self.embeds[next_node_type][next_node_id])
if self.train_time==71 or self.train_time==72:
cur_embed=self.embeds[curr_node_type][curr_node_id]+self.embeds[r][0]
score2=np.matmul(cur_embed,self.embeds[next_node_type][next_node_id])
score=0.7*max(score_sub,score_comp)+0.3*score2
else:
score=max(score_sub,score_comp)
scores.append(score)
#选择具有较大分数的动作
candidate_idxs=np.argsort(scores)[-self.max_acts:]
candidate_acts=sorted([candidate_acts[i] for i in candidate_idxs],key=lambda x:(x[0],x[1]))
actions.extend(candidate_acts)
#如果actions列表不为0,则删除改自环
if len(actions)>1:
require_remove_element = (SELF_LOOP,curr_node_id)
index = actions.index(require_remove_element)
actions = actions[:index] + actions[index+1:]
#if ('belong_to',41) in actions:
#print(curr_node_type,curr_node_id)
#input()
return actions
#为当前的state返回action
def _get_actions(self,path,done):
#Compute actions for current node
_,curr_node_type,curr_node_id=path[-1]
#actions=[]
actions=[(SELF_LOOP,curr_node_id)] # self-loop must be included.
if done:
return actions
#Get all possible edges from original KG
#与当前节点相关的边
#print('curr_node_type:',curr_node_type)
#print('curr_node_id:',curr_node_id)
#print('relation_nodes',self.kg(curr_node_type,curr_node_id))
#input()
relation_nodes=self.kg(curr_node_type,curr_node_id)
#可能包含的动作
candidate_acts=[] #list of tuples of (relation, node_type, node_id)
#已经访问过的结点
visited_nodes=set([(v[1],v[2]) for v in path])
CandidateTuple=[]
for r in relation_nodes:
#第一次训练只考虑comp和sub两种关系
#糟糕,这里不会有问题吧
if self.train_time==1 and (r=='purchase' or r=='described_as' or r=='belong_to' or r=='produced_by'):
continue
next_node_type=KG_RELATION[curr_node_type][r]
next_node_ids=relation_nodes[r]
t=(r,next_node_type,next_node_ids)
CandidateTuple.append(t)
#filter
next_node_ids=[n for n in next_node_ids if (next_node_type,n) not in visited_nodes]
candidate_acts.extend(zip([r]*len(next_node_ids),next_node_ids))
#for c in CandidateTuple:
#print(c)
#input()
#for c in candidate_acts:
# print(c)
# input()
CCC=candidate_acts
#for i in candidate_acts:
#print(i)
#input()
#print('candidate_acts')
#input()
#如果candidate action集合为空,则return self-loop action
if len(candidate_acts)==0:
return actions
#action的数量小于max_num,则直接return action sets
if len(candidate_acts)<=self.max_acts:
candidate_acts=sorted(candidate_acts,key=lambda x:(x[0],x[1]))
actions.extend(candidate_acts)
#'''
#如果actions列表不为0,则删除改自环
if len(actions)>1 and self.train_time!=77:
require_remove_element = (SELF_LOOP,curr_node_id)
index = actions.index(require_remove_element)
actions = actions[:index] + actions[index+1:]
return actions
#'''
#如果action的动作过多,则需要进行修剪
#每一步action的得分,是根据目标商品和下一步结点的数值来决定的TransE
#分两步进行剪枝
candidate_product_relation=[] #100
candidate_other_relation=[] #150
product_embed=self.embeds[PRODUCT][path[0][-1]]
scores_p=[]
scores_o=[]
scores=[]
R=[]
NextNodeType=[]
PRODUCT_r=[]
WORD_r=[]
BRAND_r=[]
CATEGORY_r=[]
USER_r=[]
for r,next_node_id in candidate_acts:
t=(r,next_node_id)
R.append(r)
next_node_type=KG_RELATION[curr_node_type][r]
if next_node_type==PRODUCT :
if r=='sub' or r=='comp':
PRODUCT_r.append(r)
src_embed=product_embed+self.relation_embedding
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#score=max(score,0)
#保存商品的关系和分数 sub comp
scores_p.append(score)
candidate_product_relation.append(t)
else:
src_embed=product_embed+self.relation_embedding
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#保存其他的关系和分数
scores_o.append(score)
candidate_other_relation.append(t)
'''
if r=='sub' or r=='comp':
#src_embed_c=product_embed+self.relation_embedding
#score_c=np.matmul(src_embed_c,self.embeds[next_node_type][next_node_id])
#score=score_c
src_embed_c=product_embed+self.embeds[COMP][0]
score_c=np.matmul(src_embed_c,self.embeds[next_node_type][next_node_id])
src_embed_s=product_embed+self.embeds[SUB][0]
score_s=np.matmul(src_embed_s,self.embeds[next_node_type][next_node_id])
score=max(score_c,score_s)
'''
#else:
#src_embed=product_embed
#score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
elif next_node_type==WORD:
WORD_r.append(r)
src_embed=product_embed+self.embeds[DESCRIBED_AS][0]
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#保存其他的关系和分数
scores_o.append(score)
candidate_other_relation.append(t)
elif next_node_type==BRAND:
BRAND_r.append(r)
src_embed=product_embed+self.embeds[PRODUCED_BY][0]
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#保存其他的关系和分数
scores_o.append(score)
candidate_other_relation.append(t)
elif next_node_type==CATEGORY:
CATEGORY_r.append(r)
src_embed=product_embed+self.embeds[BELONG_TO][0]
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#保存其他的关系和分数
scores_o.append(score)
candidate_other_relation.append(t)
elif next_node_type==USER:
USER_r.append(r)
src_embed=product_embed+self.embeds[PURCHASE][0]
score=np.matmul(src_embed,self.embeds[next_node_type][next_node_id])
#保存其他的关系和分数
scores_o.append(score)
candidate_other_relation.append(t)
#scores.append(score)
#NextNodeType.append((r,next_node_type))
#print('PRODUCT_r',set(PRODUCT_r))
#print('WORD_r',set(WORD_r))
#print('BRAND_r',set(BRAND_r))
#print('CATEGORY_r',set(CATEGORY_r))
#print('USER_r',set(USER_r))
#input()
#print(len(scores),len(candidate_acts))
#print(set(R))
#print(set(NP))
#input()
#选择具有较大分数的动作
#candidate_idxs=np.argsort(scores)[-self.max_acts:]
#print(len(candidate_product_relation))
# comp 和 sub 保留的关系数目 RNum
#其他关系的数目 ONum
RNum=min(self.max_acts,len(candidate_product_relation))
ONum=self.max_acts-RNum
#print(RNum)
candidate_product_relation_idxs=np.argsort(scores_p)[-RNum:]
if ONum>0:
candidate_other_relation_idxs=np.argsort(scores_o)[-(ONum):]
#if 'sub' in R and 'comp' in R and 'described_as' in R and 'belong_to' in R and 'described_as' in R:
# print('+++')
# for i in candidate_idxs:
# print(candidate_acts[i])
# input()
#if 'sub' in R or 'comp' in R:
# print(scores)
# print(len(scores))
#input()
#BeforeCan=candidate_acts[:10]
#for i in BeforeCan:
# print(i)
#input()
#candidate_acts=sorted([candidate_acts[i] for i in candidate_idxs],key=lambda x:(x[0],x[1]))
#NextNodeType=sorted([NextNodeType[i] for i in candidate_idxs],key=lambda x:(x[0],x[1]))
candidate_product_relation=sorted([candidate_product_relation[i] for i in candidate_product_relation_idxs],key=lambda x:(x[0],x[1]))
if ONum>0:
candidate_other_relation=sorted([candidate_other_relation[i] for i in candidate_other_relation_idxs],key=lambda x:(x[0],x[1]))
#for i,t in zip(candidate_acts,NextNodeType):
# print(i,t)
# input()
#s=sorted(scores,reverse=True)
#for i,s in zip(candidate_acts,s):
# if i[0]=='sub' or i[0]=='comp':
# print(i,s)
# input()
#print('++++++++++++++++++++++++++++++++++++++++++++++++++++')
#if 'sub' in R and 'comp' in R and 'described_as' in R and 'belong_to' in R and 'described_as' in R:
# for i,t in zip(candidate_acts,NextNodeType):
# print(i,t)
# input()
#actions.extend(candidate_acts)
#print('$',len(candidate_other_relation))
#print(len(candidate_product_relation),len(candidate_other_relation))
actions.extend(candidate_product_relation)
if ONum>0:
actions.extend(candidate_other_relation)
'''
CCC_r=[]
aaa_R=[]
for i in CCC:
if i[0] not in CCC_r:
CCC_r.append(i[0])
for i in candidate_acts:
if i[0] not in aaa_R:
aaa_R.append(i[0])
print('####################candidates########################')
for i in CCC_r:
print(i)
print('@@@@@@@@@@@@@@@@@@@@@actions@@@@@@@@@@@@@@@@@@@@@@@@')
for i in aaa_R:
print(i)
input()
'''
#如果actions列表不为0,则删除改自环
if len(actions)>1 and self.train_time!=77:
require_remove_element = (SELF_LOOP,curr_node_id)
index = actions.index(require_remove_element)
actions = actions[:index] + actions[index+1:]
#print(len(actions))
return actions
def _batch_get_actions(self,batch_path,done):
#使用全部的关系
#return [self._get_actions_all_relation(path,done) for path in batch_path]
if self.train_time==1:
return [self._get_actions(path,done) for path in batch_path]
else:
return [self._get_actions_all_relation(path,done) for path in batch_path]
# 获取当前的状态
def _get_state(self,path):
"""Return state of numpy vector: [product_embed, curr_node_embed, last_node_embed, last_relation]."""
product_embed=self.embeds[PRODUCT][path[0][-1]]
zero_embed=np.zeros(self.embed_size)
# initalize state
if len(path)==1:
state=self.state_gen(product_embed,product_embed,zero_embed,zero_embed,zero_embed,zero_embed)
return state
older_relation,last_node_type,last_node_id=path[-2]
last_relation,curr_node_type,curr_node_id=path[-1]
curr_node_embed=self.embeds[curr_node_type][curr_node_id]
last_node_embed=self.embeds[last_node_type][last_node_id]
last_relation_embed, _ = self.embeds[last_relation] # this can be self-loop!
if len(path)==2:
state=self.state_gen(product_embed,curr_node_embed,last_node_embed,last_relation_embed,zero_embed,zero_embed)
return state
_,older_node_type,older_node_id=path[-3]
older_node_embed=self.embeds[older_node_type][older_node_id]
older_relation_embed,_=self.embeds[older_relation]
state=self.state_gen(product_embed,curr_node_embed,last_node_embed,last_relation_embed,older_node_embed,older_relation_embed)
return state
def _batch_get_state(self,batch_path):
batch_state=[self._get_state(path) for path in batch_path]
return np.vstack(batch_state)
def _batch_get_cur_node_type(self,batch_path):
batch_cur_node_type=[self._get_cur_node_type(path) for path in batch_path]
return np.vstack(batch_cur_node_type)
def _get_cur_node_type(self,path):
_,curr_node_type,curr_node_id=path[-1]
return curr_node_type
def _batch_get_cur_node_id(self,batch_path):
batch_cur_node_id=[self._get_cur_node_id(path) for path in batch_path]
return np.vstack(batch_cur_node_id)
def _get_cur_node_id(self,path):
_,curr_node_type,curr_node_id=path[-1]
return curr_node_id
def batch_action_embedding(self,batch_cur_node_type,batch_cur_node_id,batch_curr_actions):
batch_act_emb=[self._get_act_emb(Type[0],Id[0],Act) for Type,Id,Act in zip(batch_cur_node_type,batch_cur_node_id,batch_curr_actions)]
batch_act_emb=np.stack(batch_act_emb,axis=0)
return batch_act_emb
def _get_act_emb(self,Type,Id,Act):
action_embedding=[]
#zero_embed=np.zeros(self.embed_size*2, dtype=np.bool)
zero_embed=np.zeros(self.embed_size*2)
flag=0
for i in Act:
relation=i[0]
if relation=='self_loop':
next_node_type=Type
else:
next_node_type=KG_RELATION[Type][relation]
node_ID=i[1]
relation_emb=self.embeds[relation][0]
node_embed=self.embeds[next_node_type][node_ID]
Emb=np.hstack((relation_emb,node_embed))
if flag==0:
action_embedding=Emb
flag=1
else:
action_embedding=np.vstack((action_embedding,Emb))
# padding
zero_num=self.max_acts-len(Act)+1
for i in range(zero_num):
action_embedding=np.vstack((action_embedding,zero_embed))
#action_embedding.flatten()
#action_embedding = action_embedding.reshape(1,-1)
return action_embedding
#获取奖励
def _get_reward(self,path):
#Initial
if len(path)<=1:
return 0.0
#reward只会发生在具有模式的path中
if not self._has_pattern(path):
return 0.0
target_score=0.0
_,curr_node_type,curr_node_id=path[-1]
if curr_node_type==PRODUCT:
pid=path[0][-1]
#如果是同一个结点
if self.train_time ==3 or self.train_time ==4 or self.train_time ==8:
scoreNumReword=self.reward[pid,curr_node_id]
score=scoreNumReword
elif self.train_time ==5 or self.train_time==7 or self.train_time==70 or self.train_time ==9 or self.train_time ==71 :
head_p_vec=self.embeds[PRODUCT][pid]
tail_p_vec=self.embeds[PRODUCT][curr_node_id]
Nmax=1
Nmin=-1
s_1=self.reward_sub[pid,curr_node_id]
s_1=(Nmax-Nmin)*s_1+Nmin
s_2=self.reward_comp[pid,curr_node_id]
s_2=(Nmax-Nmin)*s_2+Nmin
score=max(s_1,s_2)
'''
if self.relation=='SUB':
score=s_1
if self.relation=='COMP':
score=s_2
'''
elif self.train_time ==72 :
head_p_vec=self.embeds[PRODUCT][pid]
tail_p_vec=self.embeds[PRODUCT][curr_node_id]
Nmax=1
Nmin=-1
s_1=self.reward_sub[pid,curr_node_id]
s_1=(Nmax-Nmin)*s_1+Nmin
s_2=self.reward_comp[pid,curr_node_id]
s_2=(Nmax-Nmin)*s_2+Nmin
if self.relation=='SUB':
score=s_1
if self.relation=='COMP':
score=s_2
else:
head_p_vec=self.embeds[PRODUCT][pid]+self.relation_embedding
tail_p_vec=self.embeds[PRODUCT][curr_node_id]
score=np.dot(head_p_vec,tail_p_vec)/self.p_p_scales[pid]
target_score=max(score,0.0)
return target_score
def _batch_get_reward(self,batch_path):
'''
for path in batch_path:
print(path,self._get_reward(path))
input()
'''
batch_reward=[self._get_reward(path) for path in batch_path]
'''
if np.mean(batch_reward) == 0.0:
print(batch_path)
'''
return np.array(batch_reward)
#当达到最大长度的时候则停止搜索
def _is_done(self):
#print(self._done,len(self._batch_path[0]) >=self.max_num_nodes)
return self._done or len(self._batch_path[0]) >=self.max_num_nodes
def reset(self,pids=None):
if pids is None:
all_pids=list(self.kg(PRODUCT).keys())
pids=[random.choice(all_pids)]
self._batch_path=[[(SELF_LOOP,PRODUCT,pid)] for pid in pids]
self._done=False
self._batch_curr_state=self._batch_get_state(self._batch_path)
self._batch_curr_actions=self._batch_get_actions(self._batch_path,self._done)
self._batch_curr_reward=self._batch_get_reward(self._batch_path)
self._cur_node_type=self._batch_get_cur_node_type(self._batch_path)
self._cur_node_id=self._batch_get_cur_node_id(self._batch_path)
return self._batch_curr_state,self._cur_node_type,self._cur_node_id
def batch_step(self,batch_act_idx):
assert len(batch_act_idx)==len(self._batch_path)
for i in range(len(batch_act_idx)):
act_idx=batch_act_idx[i]
_,curr_node_type,curr_node_id=self._batch_path[i][-1]
relation,next_node_id=self._batch_curr_actions[i][act_idx]
if relation ==SELF_LOOP:
next_node_type=curr_node_type
else:
next_node_type=KG_RELATION[curr_node_type][relation]
self._batch_path[i].append((relation,next_node_type,next_node_id))
self._done=self._is_done()
self._batch_curr_state=self._batch_get_state(self._batch_path)
self._batch_curr_actions=self._batch_get_actions(self._batch_path,self._done)
self._batch_curr_reward=self._batch_get_reward(self._batch_path)
self._cur_node_type=self._batch_get_cur_node_type(self._batch_path)
self._cur_node_id=self._batch_get_cur_node_id(self._batch_path)
return self._batch_curr_state,self._batch_curr_reward,self._done,self._batch_path,self._cur_node_type,self._cur_node_id
def batch_action_mask(self,dropout=0.0):
batch_mask=[]
#print(self._batch_curr_actions.shape)
#print(self._batch_curr_actions)
for actions in self._batch_curr_actions:
act_idxs=list(range(len(actions)))
#print(len(act_idxs)) #187 [0,1,2,...,186]
#随机drop了一部分
if dropout>0 and len(act_idxs)>=5:
keep_size = int(len(act_idxs[1:]) * (1.0 - dropout))
tmp = np.random.choice(act_idxs[1:], keep_size, replace=False).tolist()
act_idxs = [act_idxs[0]] + tmp
act_mask = np.zeros(self.act_dim, dtype=np.bool)
#act_mask = act_mask.bool()
act_mask[act_idxs] = 1
batch_mask.append(act_mask)
return np.vstack(batch_mask)
def print_path(self):
for path in self._batch_path:
msg = 'Path: {}({})'.format(path[0][1], path[0][2])
for node in path[1:]:
msg += ' =={}=> {}({})'.format(node[0], node[1], node[2])
print(msg)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。