def evaluate_bellman(env, policy, gamma=1.): # eye()创建4x12单位1对角线矩阵,zeros创建nS大小0矩阵 a, b = np.eye(env.nS), np.zeros(env.nS) for state in range(env.nS - 1): # 表示47个状态(0-46) for action in range(env.nA): pi = policy[state][action] for p, next_state, reward, done in env.P[state][action]: a[state, next_state] -= (pi * gamma * p) b[state] += (pi * reward * p) v = np.linalg.solve(a, b) q = np.zeros((env.nS, env.nA)) for state in range(env.nS - 1): for action in range(env.nA): for p, next_state, reward, done in env.P[state][action]: q[state][action] += ((reward + gamma * v[next_state]) * p) return v, q
时间: 2024-04-10 07:32:49 浏览: 71
这是一个评估贝尔曼方程的函数。它使用给定的环境和策略评估状态值函数和动作值函数。在循环中,对于每个状态和动作组合,计算状态转移概率、奖励以及是否结束的信息。然后,使用这些信息更新矩阵a和向量b。最后,使用线性求解器求解a和b的线性方程组,得到状态值函数v。接下来,计算动作值函数q,通过对每个状态和动作组合进行循环,使用状态转移概率、奖励和更新后的状态值函数计算动作值函数。最终返回状态值函数v和动作值函数q。
相关问题
class AbstractGreedyAndPrune(): def __init__(self, aoi: AoI, uavs_tours: dict, max_rounds: int, debug: bool = True): self.aoi = aoi self.max_rounds = max_rounds self.debug = debug self.graph = aoi.graph self.nnodes = self.aoi.n_targets self.uavs = list(uavs_tours.keys()) self.nuavs = len(self.uavs) self.uavs_tours = {i: uavs_tours[self.uavs[i]] for i in range(self.nuavs)} self.__check_depots() self.reachable_points = self.__reachable_points() def __pruning(self, mr_solution: MultiRoundSolution) -> MultiRoundSolution: return utility.pruning_multiroundsolution(mr_solution) def solution(self) -> MultiRoundSolution: mrs_builder = MultiRoundSolutionBuilder(self.aoi) for uav in self.uavs: mrs_builder.add_drone(uav) residual_ntours_to_assign = {i : self.max_rounds for i in range(self.nuavs)} tour_to_assign = self.max_rounds * self.nuavs visited_points = set() while not self.greedy_stop_condition(visited_points, tour_to_assign): itd_uav, ind_tour = self.local_optimal_choice(visited_points, residual_ntours_to_assign) residual_ntours_to_assign[itd_uav] -= 1 tour_to_assign -= 1 opt_tour = self.uavs_tours[itd_uav][ind_tour] visited_points |= set(opt_tour.targets_indexes) # update visited points mrs_builder.append_tour(self.uavs[itd_uav], opt_tour) return self.__pruning(mrs_builder.build()) class CumulativeGreedyCoverage(AbstractGreedyAndPrune): choice_dict = {} for ind_uav in range(self.nuavs): uav_residual_rounds = residual_ntours_to_assign[ind_uav] if uav_residual_rounds > 0: uav_tours = self.uavs_tours[ind_uav] for ind_tour in range(len(uav_tours)): tour = uav_tours[ind_tour] quality_tour = self.evaluate_tour(tour, uav_residual_rounds, visited_points) choice_dict[quality_tour] = (ind_uav, ind_tour) best_value = max(choice_dict, key=int) return choice_dict[best_value] def evaluate_tour(self, tour : Tour, round_count : int, visited_points : set): new_points = (set(tour.targets_indexes) - visited_points) return round_count * len(new_points) 如何改写上述程序,使其能返回所有已经探索过的目标点visited_points的数量,请用代码表示
可以在 `solution()` 方法中添加一个变量来记录已经探索过的目标点数量,然后在每次更新 `visited_points` 后更新这个变量。下面是修改后的代码:
```
class AbstractGreedyAndPrune():
def __init__(self, aoi: AoI, uavs_tours: dict, max_rounds: int, debug: bool = True):
self.aoi = aoi
self.max_rounds = max_rounds
self.debug = debug
self.graph = aoi.graph
self.nnodes = self.aoi.n_targets
self.uavs = list(uavs_tours.keys())
self.nuavs = len(self.uavs)
self.uavs_tours = {i: uavs_tours[self.uavs[i]] for i in range(self.nuavs)}
self.__check_depots()
self.reachable_points = self.__reachable_points()
def __pruning(self, mr_solution: MultiRoundSolution) -> MultiRoundSolution:
return utility.pruning_multiroundsolution(mr_solution)
def solution(self) -> Tuple[MultiRoundSolution, int]:
mrs_builder = MultiRoundSolutionBuilder(self.aoi)
for uav in self.uavs:
mrs_builder.add_drone(uav)
residual_ntours_to_assign = {i : self.max_rounds for i in range(self.nuavs)}
tour_to_assign = self.max_rounds * self.nuavs
visited_points = set()
explored_points = 0
while not self.greedy_stop_condition(visited_points, tour_to_assign):
itd_uav, ind_tour = self.local_optimal_choice(visited_points, residual_ntours_to_assign)
residual_ntours_to_assign[itd_uav] -= 1
tour_to_assign -= 1
opt_tour = self.uavs_tours[itd_uav][ind_tour]
new_points = set(opt_tour.targets_indexes) - visited_points
explored_points += len(new_points)
visited_points |= new_points # update visited points
mrs_builder.append_tour(self.uavs[itd_uav], opt_tour)
return self.__pruning(mrs_builder.build()), explored_points
class CumulativeGreedyCoverage(AbstractGreedyAndPrune):
def evaluate_tour(self, tour : Tour, round_count : int, visited_points : set):
new_points = set(tour.targets_indexes) - visited_points
return round_count * len(new_points)
def local_optimal_choice(self, visited_points, residual_ntours_to_assign):
choice_dict = {}
for ind_uav in range(self.nuavs):
uav_residual_rounds = residual_ntours_to_assign[ind_uav]
if uav_residual_rounds > 0:
uav_tours = self.uavs_tours[ind_uav]
for ind_tour in range(len(uav_tours)):
tour = uav_tours[ind_tour]
quality_tour = self.evaluate_tour(tour, uav_residual_rounds, visited_points)
choice_dict[quality_tour] = (ind_uav, ind_tour)
best_value = max(choice_dict, key=int)
return choice_dict[best_value]
import time import tensorflow.compat.v1 as tf tf.disable_v2_behavior() from tensorflow.examples.tutorials.mnist import input_data import mnist_inference import mnist_train tf.compat.v1.reset_default_graph() EVAL_INTERVAL_SECS = 10 def evaluate(mnist): with tf.Graph().as_default() as g: #定义输入与输出的格式 x = tf.compat.v1.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input') y_ = tf.compat.v1.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input') validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels} #直接调用封装好的函数来计算前向传播的结果 y = mnist_inference.inference(x, None) #计算正确率 correcgt_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correcgt_prediction, tf.float32)) #通过变量重命名的方式加载模型 variable_averages = tf.train.ExponentialMovingAverage(0.99) variable_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variable_to_restore) #每隔10秒调用一次计算正确率的过程以检测训练过程中正确率的变化 while True: with tf.compat.v1.Session() as sess: ckpt = tf.train.get_checkpoint_state(minist_train.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: #load the model saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] accuracy_score = sess.run(accuracy, feed_dict=validate_feed) print("After %s training steps, validation accuracy = %g" % (global_step, accuracy_score)) else: print('No checkpoint file found') return time.sleep(EVAL_INTERVAL_SECS) def main(argv=None): mnist = input_data.read_data_sets(r"D:\Anaconda123\Lib\site-packages\tensorboard\mnist", one_hot=True) evaluate(mnist) if __name__ == '__main__': tf.compat.v1.app.run()对代码进行改进
以下是对代码的改进建议:
1. 在代码开头添加注释,简要说明代码功能和使用方法。
2. 将导入模块的语句放在代码开头。
3. 将模型保存路径和评估时间间隔定义为常量,并使用有意义的变量名。
4. 将计算正确率和加载模型的过程封装为函数。
5. 在主函数中调用评估函数。
改进后的代码如下:
```
# 该代码实现了使用已训练好的模型对 MNIST 数据集进行评估
import time
import tensorflow.compat.v1 as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference
import mnist_train
# 定义常量
MODEL_SAVE_PATH = 'model/'
EVAL_INTERVAL_SECS = 10
def evaluate(mnist):
"""
计算模型在验证集上的正确率
"""
with tf.Graph().as_default() as g:
# 定义输入和输出格式
x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
y_ = tf.placeholder(tf.float32, [None, mnist_inference.OUTPUT_NODE], name='y-input')
# 直接调用封装好的函数计算前向传播结果
y = mnist_inference.inference(x, None)
# 计算正确率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 加载模型
variable_averages = tf.train.ExponentialMovingAverage(mnist_train.MOVING_AVERAGE_DECAY)
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
# 在验证集上计算正确率
with tf.Session() as sess:
ckpt = tf.train.get_checkpoint_state(MODEL_SAVE_PATH)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
accuracy_score = sess.run(accuracy, feed_dict={x: mnist.validation.images, y_: mnist.validation.labels})
print("After %s training steps, validation accuracy = %g" % (global_step, accuracy_score))
else:
print('No checkpoint file found')
def main(argv=None):
# 读取数据集
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 每隔一定时间评估模型在验证集上的正确率
while True:
evaluate(mnist)
time.sleep(EVAL_INTERVAL_SECS)
if __name__ == '__main__':
tf.app.run()
```
阅读全文
相关推荐
![rar](https://img-home.csdnimg.cn/images/20241231044955.png)
![rar](https://img-home.csdnimg.cn/images/20241231044955.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)