强化学习python示例
时间: 2023-12-06 21:38:38 浏览: 29
以下是一个使用Python实现的强化学习示例,使用Q-learning算法来训练一个智能体在迷宫中找到宝藏的过程:
```python
import numpy as np
# 定义迷宫
maze = np.array([
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 1, 1, 0, 1, 1, 1, 0, 1, 0],
[0, 1, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 1, 0, 1, 0, 1, 0, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 0, 1, 0],
[0, 1, 0, 0, 0, 0, 1, 0, 1, 0],
[0, 1, 1, 1, 1, 1, 1, 1, 1, 0],
[0, 1, 1, 0, 0, 0, 0, 1, 1, 0],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
])
# 定义智能体
class QLearningAgent:
def __init__(self, maze, alpha=0.5, gamma=0.9, epsilon=0.1):
self.maze = maze
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon
self.q_table = np.zeros((self.maze.size, 4))
def get_state(self, x, y):
return self.maze.shape[1] * x + y
def get_action(self, state):
if np.random.rand() < self.epsilon:
return np.random.randint(4)
else:
return np.argmax(self.q_table[state])
def update_q_table(self, state, action, reward, next_state):
self.q_table[state, action] += self.alpha * (reward + self.gamma * np.max(self.q_table[next_state]) - self.q_table[state, action])
def train(self, num_episodes=1000):
for i in range(num_episodes):
x, y = np.random.randint(self.maze.shape[0]), np.random.randint(self.maze.shape[1])
state = self.get_state(x, y)
while True:
action = self.get_action(state)
if action == 0:
if x > 0 and self.maze[x-1, y] == 1:
x -= 1
elif action == 1:
if x < self.maze.shape[0]-1 and self.maze[x+1, y] == 1:
x += 1
elif action == 2:
if y > 0 and self.maze[x, y-1] == 1:
y -= 1
elif action == 3:
if y < self.maze.shape[1]-1 and self.maze[x, y+1] == 1:
y += 1
next_state = self.get_state(x, y)
if x == self.maze.shape[0]-1 and y == self.maze.shape[1]-1:
self.update_q_table(state, action, 1, next_state)
break
else:
self.update_q_table(state, action, 0, next_state)
state = next_state
# 训练智能体
agent = QLearningAgent(maze)
agent.train()
# 测试智能体
x, y = 0, 0
while True:
state = agent.get_state(x, y)
action = np.argmax(agent.q_table[state])
if action == 0:
if x > 0 and maze[x-1, y] == 1:
x -= 1
elif action == 1:
if x < maze.shape[0]-1 and maze[x+1, y] == 1:
x += 1
elif action == 2:
if y > 0 and maze[x, y-1] == 1:
y -= 1
elif action == 3:
if y < maze.shape[1]-1 and maze[x, y+1] == 1:
y += 1
if x == maze.shape[0]-1 and y == maze.shape[1]-1:
print("Found the treasure!")
break
```