写一段python代码,读取一个csv文件,该文件的第一列为时间编号,编号以1,2,3……这样自然数的形式出现,第二列为充电量数值,将该数据集的前70%划分为训练集,其余部分设置为测试集,将该数据集所构成的时间序列进行EMD分解,分解出的子序列中低频的部分用svr模型进行训练,高频的部分用lstm模型进行训练,然后用stacking方法的集成学习对总体进行训练,并对总序列进行预测,并打印rmse作为评价指标,并分别绘制训练集的真实值和预测值图片以及测试集的真实值和预测值图片,两张图片的横坐标单位为time/h,纵坐标单位为kwh
时间: 2023-10-30 10:08:08 浏览: 292
# 导入需要的库
import pandas as pd
import numpy as np
from PyEMD import EMD
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# 读取csv文件
df = pd.read_csv('data.csv')
# 划分训练集和测试集
train_size = int(len(df) * 0.7)
train, test = df.values[:train_size, :], df.values[train_size:, :]
# 对时间序列进行EMD分解
emd = EMD()
imfs = emd(df.iloc[:, 1].values)
# 分离出低频部分和高频部分
low_freq = imfs[0]
high_freq = imfs[1:]
# 划分训练集和测试集
train_low, test_low = low_freq[:train_size], low_freq[train_size:]
train_high, test_high = high_freq[:, :train_size], high_freq[:, train_size:]
# 对低频部分进行SVR模型训练
scaler = StandardScaler()
train_low_scaled = scaler.fit_transform(train_low.reshape(-1, 1))
test_low_scaled = scaler.transform(test_low.reshape(-1, 1))
svr = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr.fit(train_low_scaled, train[:, 1])
# 对高频部分进行LSTM模型训练
train_high = train_high.T
test_high = test_high.T
def create_lstm_model(train_X, train_y, n_steps, n_features):
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(train_X, train_y, epochs=200, verbose=0)
return model
n_steps = 3
n_features = len(high_freq)
train_X, train_y = [], []
test_X, test_y = [], []
for i in range(n_steps, len(train_high)):
train_X.append(train_high[i-n_steps:i, :])
train_y.append(train[i, 1])
for i in range(n_steps, len(test_high)):
test_X.append(test_high[i-n_steps:i, :])
test_y.append(test[i, 1])
train_X, train_y = np.array(train_X), np.array(train_y)
test_X, test_y = np.array(test_X), np.array(test_y)
lstm = create_lstm_model(train_X, train_y, n_steps, n_features)
# 进行集成学习
def stacked_dataset(train_X, train_y, test_X):
stackX = None
for model in [svr, lstm]:
model.fit(train_X, train_y)
yhat = model.predict(test_X)
if stackX is None:
stackX = yhat
else:
stackX = np.column_stack((stackX, yhat))
return stackX
stacked_train_X = stacked_dataset(train_high.T, train[:, 1], train_X.reshape((train_X.shape[0], n_steps*n_features)))
stacked_test_X = stacked_dataset(test_high.T, test[:, 1], test_X.reshape((test_X.shape[0], n_steps*n_features)))
model = SVR(kernel='linear')
model.fit(stacked_train_X, train[:, 1])
yhat = model.predict(stacked_test_X)
print('RMSE:', np.sqrt(mean_squared_error(test[:, 1], yhat)))
# 绘制训练集和测试集的真实值和预测值图片
plt.plot(train[:, 0], train[:, 1], label='True Train')
plt.plot(train[:, 0], svr.predict(train_low_scaled), label='Predicted Train - SVR')
plt.plot(train[:, 0][n_steps:], lstm.predict(train_X).flatten(), label='Predicted Train - LSTM')
plt.plot(test[:, 0], test[:, 1], label='True Test')
plt.plot(test[:, 0], yhat, label='Predicted Test - Stacking')
plt.xlabel('Time/h')
plt.ylabel('kwh')
plt.legend()
plt.show()
阅读全文