面向过程用akshare,gym写出股票强化学习代码
时间: 2023-07-19 20:21:30 浏览: 104
OpenAI Gym环境.md
好的,这里提供一个简单的股票强化学习代码示例,使用到了akshare和gym两个库。该代码使用的是Q-Learning算法,仅供参考。
首先需要安装一下akshare和gym库:
```python
!pip install akshare gym
```
代码如下:
```python
import akshare as ak
import gym
from gym import spaces
import numpy as np
# 获取股票数据
stock_zh_a_daily = ak.stock_zh_a_daily(symbol="sh600000", adjust="qfq")
# 定义股票强化学习环境
class StockEnv(gym.Env):
metadata = {'render.modes': ['human']}
def __init__(self, data, window_size=10):
super(StockEnv, self).__init__()
self.data = data
self.window_size = window_size
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(low=0, high=1, shape=(6, window_size + 1), dtype=np.float32)
def _next_observation(self):
obs = np.array([
self.data.loc[self.current_step: self.current_step + self.window_size, 'open'].values / 1000,
self.data.loc[self.current_step: self.current_step + self.window_size, 'high'].values / 1000,
self.data.loc[self.current_step: self.current_step + self.window_size, 'low'].values / 1000,
self.data.loc[self.current_step: self.current_step + self.window_size, 'close'].values / 1000,
self.data.loc[self.current_step: self.current_step + self.window_size, 'volume'].values / 1000000,
self.data.loc[self.current_step: self.current_step + self.window_size, 'amount'].values / 100000000,
])
return obs
def reset(self):
self.current_step = 0
self.profit = 0
self.buy_price = 0
self.sell_price = 0
return self._next_observation()
def step(self, action):
if action == 0:
# 不买入
self.sell_price = 0
reward = 0
elif action == 1:
# 买入
self.buy_price = self.data.loc[self.current_step, 'open']
self.sell_price = 0
reward = 0
elif action == 2 and self.buy_price > 0:
# 卖出
self.sell_price = self.data.loc[self.current_step, 'open']
self.profit = (self.sell_price - self.buy_price) / self.buy_price
reward = self.profit
self.buy_price = 0
done = False
if self.current_step == len(self.data) - 1:
done = True
obs = self._next_observation()
self.current_step += 1
return obs, reward, done, {}
def render(self, mode='human', close=False):
profit = (self.sell_price - self.buy_price) / self.buy_price
print(f'step: {self.current_step} profit: {profit:0.2%}')
# 定义Q-Learning算法
class QLearningAgent:
def __init__(self, env, learning_rate=0.1, discount_rate=0.99, exploration_rate=1.0, max_exploration_rate=1.0, min_exploration_rate=0.01, exploration_decay_rate=0.001):
self.env = env
self.learning_rate = learning_rate
self.discount_rate = discount_rate
self.exploration_rate = exploration_rate
self.max_exploration_rate = max_exploration_rate
self.min_exploration_rate = min_exploration_rate
self.exploration_decay_rate = exploration_decay_rate
self.q_table = np.zeros((6, env.window_size + 1, env.action_space.n))
def choose_action(self, observation):
if np.random.uniform(0, 1) < self.exploration_rate:
action = self.env.action_space.sample()
else:
action = np.argmax(self.q_table[observation])
return action
def learn(self, observation, action, reward, next_observation):
old_value = self.q_table[observation][action]
next_max = np.max(self.q_table[next_observation])
new_value = (1 - self.learning_rate) * old_value + self.learning_rate * (reward + self.discount_rate * next_max)
self.q_table[observation][action] = new_value
def decay_exploration_rate(self, episode):
self.exploration_rate = self.min_exploration_rate + (self.max_exploration_rate - self.min_exploration_rate) * np.exp(-self.exploration_decay_rate * episode)
# 训练股票强化学习
env = StockEnv(stock_zh_a_daily)
agent = QLearningAgent(env)
for episode in range(1, 10001):
observation = env.reset()
done = False
while not done:
action = agent.choose_action(observation)
next_observation, reward, done, info = env.step(action)
agent.learn(observation, action, reward, next_observation)
observation = next_observation
agent.decay_exploration_rate(episode)
if episode % 100 == 0:
print(f'episode: {episode}')
env.render()
```
注意:这里只是提供了一个简单的示例代码,实际应用中还需要进行更多的优化和改进,比如修改强化学习算法、调整参数等。
阅读全文