def evaluate(self, obs): pos_ind = np.where(obs) pos_set = [(pos_ind[0][i], pos_ind[1][i]) for i in range(len(pos_ind[0]))] score_atk, score_def = 0, 0 for x, y in pos_set: c = obs[x][y] pt_score = self.evaluate_point(obs, (x, y)) if c != self.color: score_def = max(score_def, pt_score) else: score_atk = max(score_atk, pt_score) return score_atk, score_def
时间: 2023-06-18 16:04:11 浏览: 87
这是一个评估函数,它以一个观察值 obs 作为输入,然后计算出攻击方和防守方的分数。函数首先找出观察值中所有非零元素的位置,然后对于每个位置,调用 evaluate_point 函数来计算该位置的得分。如果该位置代表的颜色不是自己的颜色,则将该得分与当前的防守方得分进行比较并取较大值;否则将该得分与当前的攻击方得分进行比较并取较大值。最终返回的是攻击方和防守方的得分。
相关问题
def generate(self, obs, all=False): good_pts = [] good_scores = [] pts = [] scores = [] dir_set = [(1, 0), (1, -1), (0, -1), (-1, -1), (-1, 0), (-1, 1), (0, 1), (1, 1)] if all: indices = np.where(obs) check_list = [(indices[0][i], indices[1][i]) for i in range(len(indices[0]))] else: if len(self._last_move_list) > 7: check_list = self._last_move_list[-7:] else: check_list = self._last_move_list for x0, y0 in check_list: for dir in dir_set: if x0 + dir[0] in range(0, 15) and y0 + dir[1] in range(0, 15): pos = (x0 + dir[0], y0 + dir[1]) if obs[pos[0]][pos[1]] == 0 and pos not in pts: obs[pos[0]][pos[1]] = self.color score_atk = self.evaluate_point(obs, pos) obs[pos[0]][pos[1]] = -self.color score_def = self.evaluate_point(obs, pos) score = max(score_atk, score_def) if score >= score_3_live: good_pts.append(pos) good_scores.append(score) if score_atk == score_5: break pts.append(pos) scores.append(score) obs[pos[0]][pos[1]] = 0 if len(good_pts) > 0 and max(good_scores) >= score_4: # print('good') pts = good_pts scores = good_scores lst = np.array([pts, scores]) pts = lst[:, lst[1].argsort()][0] pos_list = list(pts) pos_list.reverse() return pos_list
这段代码是一个五子棋AI的 generate 函数实现。该函数根据当前的观察数据 obs,生成AI下一步应该下的位置 pos_list。
首先,该函数会生成一个方向集合 dir_set,包含了八个方向。然后,如果参数 all 为 True,则遍历整个棋盘;否则,只遍历最近的七个落子位置。
接下来,对于每个遍历到的位置,都会向八个方向扩展,找到空位 pos。然后,分别计算在 pos 位置下子后的攻击分数 score_atk 和防御分数 score_def,取两者中的最大值作为该位置的得分 score。如果 score 大于等于特定的分数 score_3_live,则将该位置添加到 pts 和 scores 列表中。
如果存在得分大于等于特定值 score_4 的位置,则将这些位置作为好的位置 good_pts,并将它们的得分添加到 good_scores 列表中。如果存在得分为特定值 score_5 的位置,则直接选取这些位置。
最后,将 pts 和 scores 的元素按照 scores 从小到大进行排序,返回 pos_list。
class AbstractGreedyAndPrune(): def __init__(self, aoi: AoI, uavs_tours: dict, max_rounds: int, debug: bool = True): self.aoi = aoi self.max_rounds = max_rounds self.debug = debug self.graph = aoi.graph self.nnodes = self.aoi.n_targets self.uavs = list(uavs_tours.keys()) self.nuavs = len(self.uavs) self.uavs_tours = {i: uavs_tours[self.uavs[i]] for i in range(self.nuavs)} self.__check_depots() self.reachable_points = self.__reachable_points() def __pruning(self, mr_solution: MultiRoundSolution) -> MultiRoundSolution: return utility.pruning_multiroundsolution(mr_solution) def solution(self) -> MultiRoundSolution: mrs_builder = MultiRoundSolutionBuilder(self.aoi) for uav in self.uavs: mrs_builder.add_drone(uav) residual_ntours_to_assign = {i : self.max_rounds for i in range(self.nuavs)} tour_to_assign = self.max_rounds * self.nuavs visited_points = set() while not self.greedy_stop_condition(visited_points, tour_to_assign): itd_uav, ind_tour = self.local_optimal_choice(visited_points, residual_ntours_to_assign) residual_ntours_to_assign[itd_uav] -= 1 tour_to_assign -= 1 opt_tour = self.uavs_tours[itd_uav][ind_tour] visited_points |= set(opt_tour.targets_indexes) # update visited points mrs_builder.append_tour(self.uavs[itd_uav], opt_tour) return self.__pruning(mrs_builder.build()) class CumulativeGreedyCoverage(AbstractGreedyAndPrune): choice_dict = {} for ind_uav in range(self.nuavs): uav_residual_rounds = residual_ntours_to_assign[ind_uav] if uav_residual_rounds > 0: uav_tours = self.uavs_tours[ind_uav] for ind_tour in range(len(uav_tours)): tour = uav_tours[ind_tour] quality_tour = self.evaluate_tour(tour, uav_residual_rounds, visited_points) choice_dict[quality_tour] = (ind_uav, ind_tour) best_value = max(choice_dict, key=int) return choice_dict[best_value] def evaluate_tour(self, tour : Tour, round_count : int, visited_points : set): new_points = (set(tour.targets_indexes) - visited_points) return round_count * len(new_points) 如何改写上述程序,使其能返回所有已经探索过的目标点visited_points的数量,请用代码表示
可以在 `solution()` 方法中添加一个变量来记录已经探索过的目标点数量,然后在每次更新 `visited_points` 后更新这个变量。下面是修改后的代码:
```
class AbstractGreedyAndPrune():
def __init__(self, aoi: AoI, uavs_tours: dict, max_rounds: int, debug: bool = True):
self.aoi = aoi
self.max_rounds = max_rounds
self.debug = debug
self.graph = aoi.graph
self.nnodes = self.aoi.n_targets
self.uavs = list(uavs_tours.keys())
self.nuavs = len(self.uavs)
self.uavs_tours = {i: uavs_tours[self.uavs[i]] for i in range(self.nuavs)}
self.__check_depots()
self.reachable_points = self.__reachable_points()
def __pruning(self, mr_solution: MultiRoundSolution) -> MultiRoundSolution:
return utility.pruning_multiroundsolution(mr_solution)
def solution(self) -> Tuple[MultiRoundSolution, int]:
mrs_builder = MultiRoundSolutionBuilder(self.aoi)
for uav in self.uavs:
mrs_builder.add_drone(uav)
residual_ntours_to_assign = {i : self.max_rounds for i in range(self.nuavs)}
tour_to_assign = self.max_rounds * self.nuavs
visited_points = set()
explored_points = 0
while not self.greedy_stop_condition(visited_points, tour_to_assign):
itd_uav, ind_tour = self.local_optimal_choice(visited_points, residual_ntours_to_assign)
residual_ntours_to_assign[itd_uav] -= 1
tour_to_assign -= 1
opt_tour = self.uavs_tours[itd_uav][ind_tour]
new_points = set(opt_tour.targets_indexes) - visited_points
explored_points += len(new_points)
visited_points |= new_points # update visited points
mrs_builder.append_tour(self.uavs[itd_uav], opt_tour)
return self.__pruning(mrs_builder.build()), explored_points
class CumulativeGreedyCoverage(AbstractGreedyAndPrune):
def evaluate_tour(self, tour : Tour, round_count : int, visited_points : set):
new_points = set(tour.targets_indexes) - visited_points
return round_count * len(new_points)
def local_optimal_choice(self, visited_points, residual_ntours_to_assign):
choice_dict = {}
for ind_uav in range(self.nuavs):
uav_residual_rounds = residual_ntours_to_assign[ind_uav]
if uav_residual_rounds > 0:
uav_tours = self.uavs_tours[ind_uav]
for ind_tour in range(len(uav_tours)):
tour = uav_tours[ind_tour]
quality_tour = self.evaluate_tour(tour, uav_residual_rounds, visited_points)
choice_dict[quality_tour] = (ind_uav, ind_tour)
best_value = max(choice_dict, key=int)
return choice_dict[best_value]
阅读全文