import itertools import warnings import pandas as pd import numpy as np import statsmodels.api as sm from datetime import datetime from statsmodels.tsa.arima.model import ARIMA from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.stats.diagnostic import acorr_ljungbox from sklearn.model_selection import train_test_split data = pd.read_csv('data.csv', parse_dates=['x'], index_col='x') train_data1, test_data = train_test_split(data1, test_size=0.3, shuffle=False) data['lag1'] = data['y'].shift(1) data['lag2'] = data['y'].shift(2) data['lag3'] = data['y'].shift(3) data['lag4'] = data['y'].shift(4) data['lag5'] = data['y'].shift(5) data['lag6'] = data['y'].shift(6) data['lag7'] = data['y'].shift(7) data.dropna(inplace=True) train_data, test_data1 = train_test_split(data, test_size=0.3, shuffle=False) g=int(input("输入P的峰值: ")) h=int(input("输入D的峰值: ")) i=int(input("输入Q的峰值: ")) p = range(0, g) d = range(0, h) q = range(0, i) pdq = list(itertools.product(p, d, q)) best_pdq = None best_aic = np.inf for param in pdq: model = sm.tsa.ARIMA(data['y'], exog=data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']], order=param) results = model.fit() aic = results.aic if aic < best_aic: best_pdq = param best_aic = aic a=best_pdq[0] b=best_pdq[1] c=best_pdq[2] model = ARIMA(data['y'], exog=data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']], order=(a,b,c)) results = model.fit() max_lag = model.k_ar model_fit = model.fit() resid = model_fit.resid lb_test = acorr_ljungbox(resid) p_value=round(lb_test['lb_pvalue'][max_lag],4) if p_value>0.05: forecast = results.forecast(steps=1, exog=data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']].iloc[-1:]) # 输出预测值 forecast.index[0].strftime('%Y-%m') print("下个月的预测结果是",round(forecast[0])) else: print('输入的数据不适合使用arima模型进行预测分析,请尝试其他模型'),如何添加检测预测准确率的python代码
时间: 2023-08-17 07:05:57 浏览: 192
要添加检测预测准确率的代码,可以使用均方根误差(RMSE)或平均绝对误差(MAE)等指标。以RMSE为例,可以按以下步骤进行计算和输出:
1. 在导入所需的库后,将测试数据集中的实际值和预测值提取出来:
```
test_actual = test_data['y']
test_pred = results.predict(start=test_data.index[0], end=test_data.index[-1], exog=test_data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']])
```
2. 计算RMSE并输出结果:
```
rmse = np.sqrt(((test_pred - test_actual) ** 2).mean())
print('测试集的RMSE为', rmse)
```
完整的代码如下:
```
import itertools
import warnings
import pandas as pd
import numpy as np
import statsmodels.api as sm
from datetime import datetime
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.stats.diagnostic import acorr_ljungbox
from sklearn.model_selection import train_test_split
# 导入数据
data = pd.read_csv('data.csv', parse_dates=['x'], index_col='x')
# 划分训练集和测试集
train_data1, test_data = train_test_split(data1, test_size=0.3, shuffle=False)
# 添加滞后特征
data['lag1'] = data['y'].shift(1)
data['lag2'] = data['y'].shift(2)
data['lag3'] = data['y'].shift(3)
data['lag4'] = data['y'].shift(4)
data['lag5'] = data['y'].shift(5)
data['lag6'] = data['y'].shift(6)
data['lag7'] = data['y'].shift(7)
data.dropna(inplace=True)
# 再次划分训练集和测试集
train_data, test_data1 = train_test_split(data, test_size=0.3, shuffle=False)
# 寻找最优的ARIMA模型
g=int(input("输入P的峰值: "))
h=int(input("输入D的峰值: "))
i=int(input("输入Q的峰值: "))
p = range(0, g)
d = range(0, h)
q = range(0, i)
pdq = list(itertools.product(p, d, q))
best_pdq = None
best_aic = np.inf
for param in pdq:
model = sm.tsa.ARIMA(data['y'], exog=data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']], order=param)
results = model.fit()
aic = results.aic
if aic < best_aic:
best_pdq = param
best_aic = aic
a=best_pdq[0]
b=best_pdq[1]
c=best_pdq[2]
model = ARIMA(data['y'], exog=data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']], order=(a,b,c))
results = model.fit()
# 进行预测并计算测试集的RMSE
max_lag = model.k_ar
model_fit = model.fit()
resid = model_fit.resid
lb_test = acorr_ljungbox(resid)
p_value=round(lb_test['lb_pvalue'][max_lag],4)
if p_value>0.05:
# 预测结果
forecast = results.forecast(steps=1, exog=data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']].iloc[-1:])
print("下个月的预测结果是",round(forecast[0]))
# 计算RMSE并输出结果
test_actual = test_data['y']
test_pred = results.predict(start=test_data.index[0], end=test_data.index[-1], exog=test_data[['lag1', 'lag2', 'lag3', 'lag4', 'lag5', 'lag6', 'lag7']])
rmse = np.sqrt(((test_pred - test_actual) ** 2).mean())
print('测试集的RMSE为', rmse)
else:
print('输入的数据不适合使用ARIMA模型进行预测分析,请尝试其他模型')
```
阅读全文