用akshare,gym写出股票强化学习完整代码
时间: 2023-06-30 20:18:49 浏览: 159
好的,以下是使用akshare和gym库实现股票强化学习的代码:
```python
import akshare as ak
import gym
from gym import spaces
import numpy as np
# 获取股票数据
stock_df = ak.stock_zh_a_daily(symbol="sh600000", adjust="hfq").iloc[:100]
# 定义股票强化学习环境
class StockEnv(gym.Env):
def __init__(self, df):
super(StockEnv, self).__init__()
self.df = df
self.reward_range = (0, np.inf)
self.action_space = spaces.Discrete(3) # 买入、卖出、不操作
self.observation_space = spaces.Box(low=0, high=np.inf, shape=(6,))
def _next_observation(self):
obs = np.array([
self.df.loc[self.current_step, 'open'],
self.df.loc[self.current_step, 'high'],
self.df.loc[self.current_step, 'low'],
self.df.loc[self.current_step, 'close'],
self.df.loc[self.current_step, 'volume'],
self.df.loc[self.current_step, 'amount'],
])
return obs
def reset(self):
self.current_step = 0
self.account_balance = 1000000 # 初始资金
self.stock_balance = 0 # 初始股票数量
return self._next_observation()
def step(self, action):
assert self.action_space.contains(action)
prev_val = self.account_balance + self.stock_balance * self.df.loc[self.current_step, 'close']
if action == 0: # 买入
buy_num = int(self.account_balance / self.df.loc[self.current_step, 'close'])
self.stock_balance += buy_num
self.account_balance -= buy_num * self.df.loc[self.current_step, 'close']
reward = self.account_balance + self.stock_balance * self.df.loc[self.current_step, 'close'] - prev_val
elif action == 1: # 卖出
sell_num = self.stock_balance
self.stock_balance = 0
self.account_balance += sell_num * self.df.loc[self.current_step, 'close']
reward = self.account_balance + self.stock_balance * self.df.loc[self.current_step, 'close'] - prev_val
else: # 不操作
reward = self.account_balance + self.stock_balance * self.df.loc[self.current_step, 'close'] - prev_val
self.current_step += 1
done = self.current_step == len(self.df)
if done:
info = {'account_balance': self.account_balance,
'stock_balance': self.stock_balance}
else:
info = {}
return self._next_observation(), reward, done, info
# 创建股票强化学习环境
env = StockEnv(stock_df)
# 定义随机策略
def random_policy(observation):
return np.random.choice(3)
# 运行强化学习环境
obs = env.reset()
for i in range(100):
action = random_policy(obs)
obs, reward, done, info = env.step(action)
print(f"step {i}: action={action}, reward={reward}")
if done:
print(f"account balance: {info['account_balance']}, stock balance: {info['stock_balance']}")
break
```
在这个例子中,我们使用akshare库获取了上海证券交易所股票“600000.SH”(浦发银行)的历史日线数据,并创建了一个股票强化学习环境。我们定义了一个随机策略,用于执行买入、卖出和不操作的动作。最后,我们运行强化学习环境,打印每个步骤的动作和奖励,并在完成后打印账户余额和股票余额。
阅读全文