基于tushare数据用Python写一个商品期货横截面多因子的CTA策略
时间: 2024-04-29 12:23:35 浏览: 311
首先,需要导入tushare库和其它必要的库,如下所示:
```python
import tushare as ts
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
```
然后,可以使用tushare获取商品期货行情数据,如下所示:
```python
# 获取商品期货行情数据
df = ts.futures.get_hist_data('RB', start='2010-01-01', end='2022-01-01')
df = df.sort_index()
```
在获取数据后,需要对数据进行一些必要的处理,例如将行情数据转换为日频率,计算收益率等。具体代码如下所示:
```python
# 将行情数据转换为日频率
df = df.resample('D').last().dropna()
# 计算收益率
df['ret'] = df['close'].pct_change()
df = df.dropna()
```
然后,可以根据数据构建横截面多因子模型。在本例中,我们选择了以下因子:
1. 收益率因子
2. 历史波动率因子
3. 历史收益率因子
4. 历史成交量因子
具体代码如下所示:
```python
# 构建横截面多因子模型
class CrossSectionalCTA:
def __init__(self):
self.lin_reg = Pipeline([
('scaler', StandardScaler()),
('pca', PCA(n_components=2)),
('lr', LinearRegression())
])
def fit(self, X, y):
self.lin_reg.fit(X, y)
def predict(self, X):
return self.lin_reg.predict(X)
def get_factors(self, df):
factors = pd.DataFrame()
factors['ret'] = df['ret']
factors['volatility'] = df['ret'].rolling(window=20).std()
factors['past_returns'] = df['ret'].rolling(window=20).mean()
factors['volume'] = df['volume'].rolling(window=20).mean()
factors = factors.dropna()
return factors
def get_X_y(self, factors):
X = factors.drop('ret', axis=1)
y = factors['ret'].values
return X, y
```
最后,可以使用构建好的横截面多因子模型对商品期货进行交易,如下所示:
```python
# 使用横截面多因子模型进行交易
ccta = CrossSectionalCTA()
factors = ccta.get_factors(df)
X, y = ccta.get_X_y(factors)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
ccta.fit(X_train, y_train)
y_pred = ccta.predict(X_test)
y_pred = np.sign(y_pred)
y_pred[y_pred == 0] = 1
ret = y_pred * y_test
cum_ret = np.cumprod(1 + ret) - 1
```
完整代码如下所示:
```python
import tushare as ts
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
# 构建横截面多因子模型
class CrossSectionalCTA:
def __init__(self):
self.lin_reg = Pipeline([
('scaler', StandardScaler()),
('pca', PCA(n_components=2)),
('lr', LinearRegression())
])
def fit(self, X, y):
self.lin_reg.fit(X, y)
def predict(self, X):
return self.lin_reg.predict(X)
def get_factors(self, df):
factors = pd.DataFrame()
factors['ret'] = df['ret']
factors['volatility'] = df['ret'].rolling(window=20).std()
factors['past_returns'] = df['ret'].rolling(window=20).mean()
factors['volume'] = df['volume'].rolling(window=20).mean()
factors = factors.dropna()
return factors
def get_X_y(self, factors):
X = factors.drop('ret', axis=1)
y = factors['ret'].values
return X, y
# 获取商品期货行情数据
df = ts.futures.get_hist_data('RB', start='2010-01-01', end='2022-01-01')
df = df.sort_index()
# 将行情数据转换为日频率
df = df.resample('D').last().dropna()
# 计算收益率
df['ret'] = df['close'].pct_change()
df = df.dropna()
# 使用横截面多因子模型进行交易
ccta = CrossSectionalCTA()
factors = ccta.get_factors(df)
X, y = ccta.get_X_y(factors)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
ccta.fit(X_train, y_train)
y_pred = ccta.predict(X_test)
y_pred = np.sign(y_pred)
y_pred[y_pred == 0] = 1
ret = y_pred * y_test
cum_ret = np.cumprod(1 + ret) - 1
```
阅读全文