2.1编写出MAB的 Softmax算法 2.2编写评估方法对(1)各算法的累积reward(2)各算法每轮的reward(3)各算法选取最优arm的概率变化情况(4)算法选取arm的方差情况进行可视化呈现。 2.3编写程序产生一个数据集,文件名为data.txt。本数据集为10000行110列的由数字组成的文本文件,第一列由1-10产生的随机整数,第二列由0-1组成的随机整数,第3列至110列由0-99产生的稀疏矩阵,0占80%。 2用该数据集进行对上述算法进行测试,计算出四种算法平均reward值,绘制单次模拟后每轮累计reward,测试轮数设置为1000.
时间: 2023-11-27 20:54:41 浏览: 105
2.1 Softmax算法:
```python
def softmax_prob(rewards, temperature):
exp_rewards = np.exp(rewards / temperature)
return exp_rewards / np.sum(exp_rewards)
def softmax_mab(num_arms, temperature):
arm_rewards = np.zeros(num_arms)
arm_counts = np.zeros(num_arms)
total_reward = 0
def select_arm():
nonlocal arm_rewards, arm_counts, total_reward
prob = softmax_prob(arm_rewards, temperature)
arm = np.random.choice(num_arms, p=prob)
reward = get_reward(arm)
total_reward += reward
arm_rewards[arm] += reward
arm_counts[arm] += 1
return arm, reward
return select_arm, total_reward
```
2.2 评估方法:
```python
# 评估累积reward
def evaluate_cumulative_reward(select_arm, num_steps):
cumulative_reward = 0
for i in range(num_steps):
_, reward = select_arm()
cumulative_reward += reward
return cumulative_reward
# 评估每轮reward
def evaluate_step_reward(select_arm, num_steps):
step_rewards = []
for i in range(num_steps):
_, reward = select_arm()
step_rewards.append(reward)
return step_rewards
# 评估选取最优arm的概率变化情况
def evaluate_optimal_arm_prob(select_arm, num_steps, optimal_arm):
optimal_arm_counts = np.zeros(num_steps)
for i in range(num_steps):
arm, _ = select_arm()
if arm == optimal_arm:
optimal_arm_counts[i] = 1
return np.cumsum(optimal_arm_counts) / np.arange(1, num_steps + 1)
# 评估选取arm的方差情况
def evaluate_arm_variance(select_arm, num_steps):
arm_rewards = np.zeros(num_arms)
arm_counts = np.zeros(num_arms)
arm_variances = []
for i in range(num_steps):
arm, reward = select_arm()
arm_rewards[arm] += reward
arm_counts[arm] += 1
arm_mean = arm_rewards[arm] / arm_counts[arm]
arm_variances.append(np.var(arm_rewards / arm_counts))
return arm_variances
```
2.3 产生数据集:
```python
np.random.seed(42)
with open("data.txt", "w") as f:
for i in range(10000):
row = str(np.random.randint(1, 11)) + " " + str(np.random.randint(2)) + " "
sparse_row = np.random.choice(100, size=108, replace=True, p=[0.8] + [0.0025] * 99)
row += " ".join(str(x) for x in sparse_row)
f.write(row + "\n")
```
2.4 对算法进行测试:
```python
num_arms = 10
optimal_arm = 4
num_steps = 1000
num_simulations = 10
temperatures = [0.1, 0.5, 1, 5, 10]
rewards_cumulative = np.zeros((len(temperatures), num_simulations))
rewards_step = np.zeros((len(temperatures), num_simulations, num_steps))
optimal_arm_probs = np.zeros((len(temperatures), num_simulations, num_steps))
arm_variances = np.zeros((len(temperatures), num_simulations, num_steps))
for i, temperature in enumerate(temperatures):
for j in range(num_simulations):
select_arm, total_reward = softmax_mab(num_arms, temperature)
rewards_cumulative[i, j] = evaluate_cumulative_reward(select_arm, num_steps)
rewards_step[i, j, :] = evaluate_step_reward(select_arm, num_steps)
optimal_arm_probs[i, j, :] = evaluate_optimal_arm_prob(select_arm, num_steps, optimal_arm)
arm_variances[i, j, :] = evaluate_arm_variance(select_arm, num_steps)
mean_rewards_cumulative = np.mean(rewards_cumulative, axis=1)
std_rewards_cumulative = np.std(rewards_cumulative, axis=1)
mean_rewards_step = np.mean(rewards_step, axis=(1, 2))
std_rewards_step = np.std(rewards_step, axis=(1, 2))
mean_optimal_arm_probs = np.mean(optimal_arm_probs, axis=(1, 2))
std_optimal_arm_probs = np.std(optimal_arm_probs, axis=(1, 2))
mean_arm_variances = np.mean(arm_variances, axis=(1, 2))
std_arm_variances = np.std(arm_variances, axis=(1, 2))
plt.figure(figsize=(10, 6))
plt.errorbar(temperatures, mean_rewards_cumulative, yerr=std_rewards_cumulative, fmt="o-", label="Cumulative Reward")
plt.xlabel("Temperature")
plt.ylabel("Reward")
plt.legend()
plt.show()
plt.figure(figsize=(10, 6))
plt.errorbar(range(num_steps), mean_rewards_step, yerr=std_rewards_step, label="Average Reward")
plt.xlabel("Step")
plt.ylabel("Reward")
plt.legend()
plt.show()
plt.figure(figsize=(10, 6))
plt.errorbar(range(num_steps), mean_optimal_arm_probs, yerr=std_optimal_arm_probs, label="Optimal Arm Probability")
plt.xlabel("Step")
plt.ylabel("Probability")
plt.legend()
plt.show()
plt.figure(figsize=(10, 6))
plt.errorbar(range(num_steps), mean_arm_variances, yerr=std_arm_variances, label="Arm Variance")
plt.xlabel("Step")
plt.ylabel("Variance")
plt.legend()
plt.show()
```
阅读全文