import numpy as np
import gym
class MultiAgentCoverageEnv(gym.Env):
def __init__(self, num_agents, world_size):
self.num_agents = num_agents
self.world_size = world_size
self.agent_positions = np.zeros((self.num_agents, 2))
def reset(self):
self.agent_positions = np.random.uniform(-self.world_size/2, self.world_size/2,
size=(self.num_agents, 2))
return self.agent_positions
def step(self, actions):
for i in range(self.num_agents):
action = actions[i]
if action == 0: # move up
self.agent_positions[i][1] += 0.1
elif action == 1: # move down
self.agent_positions[i][1] -= 0.1
elif action == 2: # move right
self.agent_positions[i][0] += 0.1
elif action == 3: # move left
self.agent_positions[i][0] -= 0.1
# ensure agent stays within bounds of world
self.agent_positions[i][0] = np.clip(self.agent_positions[i][0],
-self.world_size/2, self.world_size/2)
self.agent_positions[i][1] = np.clip(self.agent_positions[i][1],
-self.world_size/2, self.world_size/2)
# compute reward based on coverage of the octagon
reward = 0
octagon_coords = np.array([[-2.5, -1.5], [-1.5, -2.5], [1.5, -2.5], [2.5, -1.5],
[2.5, 1.5], [1.5, 2.5], [-1.5, 2.5], [-2.5, 1.5]])
for coord in octagon_coords:
distances = np.linalg.norm(self.agent_positions - coord, axis=1)
if np.min(distances) < 0.5:
reward += 1/self.num_agents
return self.agent_positions, reward, False, {}
env = MultiAgentCoverageEnv(num_agents=3, world_size=5)
for episode in range(10):
obs = env.reset()
done = False
total_reward = 0
while not done:
actions = np.random.randint(4, size=3)
obs, reward, done, _ = env.step(actions)
total_reward += reward
print("Episode {}: Total Reward = {}".format(episode, total_reward))
代码中,我们定义了 `MultiAgentCoverageEnv` 类,这是一个OpenAI Gym环境,用于多智能体覆盖问题。在 `__init__` 函数中,我们初始化了环境的参数,包括智能体数量和世界大小。
在 `reset` 函数中,我们随机初始化智能体的位置,并返回该状态观测值。
在 `step` 函数中,我们接收智能体的动作,并根据动作更新智能体的位置。然后,我们计算智能体的覆盖率,并将其作为奖励返回。最后,我们检查是否完成了任务,并返回相应的信息。
在主函数中,我们使用 `env.render()` 函数可视化了整个过程,包括智能体的位置和覆盖区域。我们运行了10个episode,并打印出每个episode的总奖励。