用python #11.根据2015-2016年用水量数据,预测编号1-200的用户,2017年每个月用水量,并对预测值进行评价。
时间: 2024-03-17 09:45:18 浏览: 69
好的,具体实现如下:
首先,读取数据,进行预处理:
```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
# 读取数据
data = pd.read_csv('water_usage.csv', index_col='id')
# 将缺失值填充为前一个值
data.fillna(method='ffill', inplace=True)
# 去除异常值
data = data[(data >= 0) & (data <= 500)]
# 平稳性检验
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
# Determing rolling statistics
rolmean = timeseries.rolling(12).mean()
rolstd = timeseries.rolling(12).std()
# Plot rolling statistics:
plt.plot(timeseries, color='blue', label='Original')
plt.plot(rolmean, color='red', label='Rolling Mean')
plt.plot(rolstd, color='black', label='Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show()
# Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
test_stationarity(data.loc[1:200, '2016-12-31'])
```
然后,使用ARIMA模型进行预测:
```python
# 构建ARIMA模型
model = ARIMA(data.loc[1:200, '2015-01-31':'2016-12-31'], order=(2,1,2))
model_fit = model.fit(disp=0)
# 预测2017年每个月的用水量
forecast = model_fit.forecast(steps=12)[0]
# 输出预测结果
print(forecast)
```
最后,使用评价指标对预测结果进行评价:
```python
# 计算均方根误差
mse = mean_squared_error(data.loc[1:200, '2017-01-31':'2017-12-31'], forecast)
rmse = np.sqrt(mse)
print('RMSE: %.2f' % rmse)
# 可视化预测结果
plt.plot(data.loc[1:200, '2017-01-31':'2017-12-31'].values.flatten(), label='Actual')
plt.plot(forecast, label='Predicted')
plt.legend()
plt.show()
```
完整代码如下:
```python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
# 读取数据
data = pd.read_csv('water_usage.csv', index_col='id')
# 将缺失值填充为前一个值
data.fillna(method='ffill', inplace=True)
# 去除异常值
data = data[(data >= 0) & (data <= 500)]
# 平稳性检验
from statsmodels.tsa.stattools import adfuller
def test_stationarity(timeseries):
# Determing rolling statistics
rolmean = timeseries.rolling(12).mean()
rolstd = timeseries.rolling(12).std()
# Plot rolling statistics:
plt.plot(timeseries, color='blue', label='Original')
plt.plot(rolmean, color='red', label='Rolling Mean')
plt.plot(rolstd, color='black', label='Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean & Standard Deviation')
plt.show()
# Perform Dickey-Fuller test:
print('Results of Dickey-Fuller Test:')
dftest = adfuller(timeseries, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in dftest[4].items():
dfoutput['Critical Value (%s)'%key] = value
print(dfoutput)
test_stationarity(data.loc[1:200, '2016-12-31'])
# 构建ARIMA模型
model = ARIMA(data.loc[1:200, '2015-01-31':'2016-12-31'], order=(2,1,2))
model_fit = model.fit(disp=0)
# 预测2017年每个月的用水量
forecast = model_fit.forecast(steps=12)[0]
# 输出预测结果
print(forecast)
# 计算均方根误差
mse = mean_squared_error(data.loc[1:200, '2017-01-31':'2017-12-31'], forecast)
rmse = np.sqrt(mse)
print('RMSE: %.2f' % rmse)
# 可视化预测结果
plt.plot(data.loc[1:200, '2017-01-31':'2017-12-31'].values.flatten(), label='Actual')
plt.plot(forecast, label='Predicted')
plt.legend()
plt.show()
```
阅读全文