用python写一个DQN
时间: 2024-05-08 18:15:04 浏览: 11
作为AI助手,我可以给您提供一个DQN的Python代码示例,如下所示:
```python
import random
import copy
import numpy as np
import tensorflow as tf
class DQN:
def __init__(self, state_dim, action_dim, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995):
self.state_dim = state_dim
self.action_dim = action_dim
self.gamma = gamma
self.epsilon = epsilon
self.epsilon_min = epsilon_min
self.epsilon_decay = epsilon_decay
self.memory = []
self.batch_size = 32
self.model = self.build_model()
self.target_model = self.build_model()
self.update_target_model()
def build_model(self):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, input_dim=self.state_dim, activation='relu'))
model.add(tf.keras.layers.Dense(64, activation='relu'))
model.add(tf.keras.layers.Dense(self.action_dim, activation='linear'))
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
return model
def update_target_model(self):
self.target_model.set_weights(self.model.get_weights())
def choose_action(self, state):
if np.random.rand() <= self.epsilon:
return random.randrange(self.action_dim)
else:
q_values = self.model.predict(state)
return np.argmax(q_values[0])
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
if len(self.memory) > 2000:
self.memory.pop(0)
def replay(self):
if len(self.memory) < self.batch_size:
return
samples = random.sample(self.memory, self.batch_size)
for state, action, reward, next_state, done in samples:
target = self.model.predict(state)
if done:
target[0][action] = reward
else:
Q_future = max(self.target_model.predict(next_state)[0])
target[0][action] = reward + self.gamma * Q_future
self.model.fit(state, target, epochs=1, verbose=0)
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
self.update_target_model()
```
这是一个简单的DQN实现,包括神经网络模型的构建、动作选择、经验回放等基本功能。您可以在这个基础上进行修改和扩展,以适应您的具体应用场景。