def generate_kg_batch(self, kg_dict, batch_size, highest_neg_idx): exist_heads = kg_dict.keys() # 头实体 if batch_size <= len(exist_heads): batch_head = random.sample(exist_heads, batch_size) # 从exist_heads中挑batch_size个样本 else: batch_head = [random.choice(exist_heads) for _ in range(batch_size)] batch_relation, batch_pos_tail, batch_neg_tail = [], [], [] for h in batch_head: relation, pos_tail = self.sample_pos_triples_for_h(kg_dict, h, 1) # 给每个三元组都只找一个关系和正尾实体 batch_relation += relation batch_pos_tail += pos_tail neg_tail = self.sample_neg_triples_for_h(kg_dict, h, relation[0], 1, highest_neg_idx) batch_neg_tail += neg_tail batch_head = torch.LongTensor(batch_head) batch_relation = torch.LongTensor(batch_relation) batch_pos_tail = torch.LongTensor(batch_pos_tail) batch_neg_tail = torch.LongTensor(batch_neg_tail) return batch_head, batch_relation, batch_pos_tail, batch_neg_tail
时间: 2024-04-02 17:37:29 浏览: 26
这段代码是用于生成知识图谱(KG)训练的batch数据的,可以看出其使用了随机采样的方式来选取batch中的头实体,然后针对每个头实体,从KG中随机选择一个正例三元组(即包含该头实体的三元组),并从KG中选择一个负例三元组(即不包含该头实体的三元组),最终返回四个Tensor类型的数据,分别是batch中的头实体、关系、正例尾实体和负例尾实体。
相关问题
generate_kg_batch
`generate_kg_batch` 函数的作用是生成训练时所需的知识图谱数据。它从知识图谱中随机选择 $batch_size$ 个三元组作为正例三元组,并针对每个正例三元组采样 $n_sample_neg_triples$ 个负例三元组,将它们合并成一个大小为 $(batch_size \times (n\_sample\_neg\_triples+1))$ 的三元组张量。在生成三元组时,函数还会为每个三元组生成对应的标签,标签值为 1 表示正例,标签值为 0 表示负例。函数还会为每个三元组生成一个掩码,掩码值为 1 表示该位置是有效的,掩码值为 0 表示该位置是无效的。最后,函数会将生成的三元组张量、标签张量和掩码张量返回给调用者。这些数据将用于训练知识图谱嵌入模型。
def generate_cf_batch(self, user_dict, batch_size): # 1024 exist_users = user_dict.keys() # 字典里所有用户 if batch_size <= len(exist_users): # batch_user = random.sample(exist_users, batch_size) else: batch_user = [random.choice(exist_users) for _ in range(batch_size)] batch_pos_item, batch_neg_item = [], [] for u in batch_user: batch_pos_item += self.sample_pos_items_for_u(user_dict, u, 1) batch_neg_item += self.sample_neg_items_for_u(user_dict, u, 1) batch_user = torch.LongTensor(batch_user) batch_pos_item = torch.LongTensor(batch_pos_item) batch_neg_item = torch.LongTensor(batch_neg_item) return batch_user, batch_pos_item, batch_neg_item
这是一个函数,它的作用是生成一个三元组(batch_user, batch_pos_item, batch_neg_item),其中batch_user是一个长度为batch_size的整数序列,表示从用户字典(user_dict)中随机选择的batch_size个用户;batch_pos_item是一个长度为(batch_size * 1)的整数序列,表示对于每个用户,随机选择一个该用户喜欢的物品;batch_neg_item是一个长度为(batch_size * 1)的整数序列,表示对于每个用户,随机选择一个该用户不喜欢的物品。其中sample_pos_items_for_u和sample_neg_items_for_u是两个函数,用于从用户字典(user_dict)中为指定用户(u)随机选择一个喜欢的物品和一个不喜欢的物品。