import pandas as pd import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima_model import ARIMA from sklearn.metrics import mean_squared_error import pymysql import time,os import re import requests import urllib from datetime import datetime # ---------连接-------------- connect = pymysql.connect(host='localhost', # 本地数据库 user='root', password='123456', port=3306, charset='utf8') #服务器名,账户,密码,数据库名称 cur = connect.cursor() print(cur) # 读取数据 try: select_sqli = "SELECT time,xiaoliang FROM sheji.sale where chexing='海豚';" cur.execute(select_sqli) data = pd.DataFrame(cur.fetchall(), columns=['time', 'xiaoliang']) except Exception as e: print("读取数据失败:", e) else: print("读取数据成功") # 转换时间格式 data['time'] = pd.to_datetime(data['time'], format='%Y-%m') print(data)这个代码继续怎么写
时间: 2024-01-27 20:06:33 浏览: 116
接下来可以对数据进行预处理和可视化分析,例如:
1. 将时间列设置为数据的索引
```
data = data.set_index('time')
```
2. 对销量数据进行差分,以便将其转换为平稳的时间序列
```
diff_data = data.diff().dropna()
```
3. 对平稳时间序列进行自相关和偏自相关分析,以确定 ARIMA 模型的参数
```
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(diff_data)
plot_pacf(diff_data)
```
4. 通过交叉验证选择最佳的 ARIMA 模型
```
from sklearn.model_selection import TimeSeriesSplit
from itertools import product
p = range(0, 3)
d = range(0, 3)
q = range(0, 3)
params = product(p, d, q)
params_list = list(params)
tss = TimeSeriesSplit(n_splits=3)
best_model = None
best_mse = np.inf
for params in params_list:
mse_list = []
for train_index, test_index in tss.split(diff_data):
train_data = diff_data.iloc[train_index]
test_data = diff_data.iloc[test_index]
try:
model = ARIMA(train_data, order=params)
model_fit = model.fit(disp=False)
pred = model_fit.forecast(len(test_data))[0]
mse = mean_squared_error(test_data, pred)
mse_list.append(mse)
except:
continue
if len(mse_list) > 0:
mean_mse = np.mean(mse_list)
if mean_mse < best_mse:
best_mse = mean_mse
best_model = params
print(best_model)
```
5. 使用最佳的 ARIMA 模型进行预测,并将结果可视化
```
model = ARIMA(diff_data, order=best_model)
model_fit = model.fit(disp=False)
pred = model_fit.forecast(12)[0]
pred_cumsum = pred.cumsum()
forecast_data = pd.Series(data.iloc[-1, 0], index=pd.date_range(start=data.index[-1], periods=12, freq='M'))
forecast_data += pred_cumsum
plt.plot(data)
plt.plot(forecast_data)
```
阅读全文