写一段python代码,读取一个csv文件,该文件的第一列为时间编号,编号以1,2,3……这样自然数的形式出现,第二列为充电量数值,将该数据集的前70%划分为训练集,其余部分设置为测试集,将该数据集所构成的时间序列进行EMD分解,分解出的子序列中频率最高的3个子序列叠加后用lstm模型进行训练,其余的低频子序列叠加后用svr模型进行训练,然后用stacking方法的集成学习对总体利用lstm模型进行训练,并对总序列进行预测,并打印rmse作为评价指标,并分别绘制训练集的真实值和预测值图片以及测试集的真实值和预测值图片,两张图片的横坐标单位为time/h,纵坐标单位为kwh
时间: 2023-12-03 15:47:44 浏览: 114
python读取当前目录下的CSV文件数据
5星 · 资源好评率100%
以下是参考代码,其中使用了pandas、pyhht、sklearn和keras等常用库:
```python
import pandas as pd
import numpy as np
from pyhht import EMD
from sklearn.svm import SVR
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
# 读取csv文件
data = pd.read_csv('data.csv')
# 划分训练集和测试集
train_size = int(len(data) * 0.7)
train_data, test_data = data[:train_size], data[train_size:]
# 对时间序列进行EMD分解
emd = EMD(np.arange(len(data)))
imfs = emd(data['charge'].values)
imfs_df = pd.DataFrame(imfs).T
# 获取频率最高的3个IMF
freqs = np.fft.fftfreq(len(imfs_df.columns))
fft_imfs = np.abs(np.fft.fft(imfs_df))
max_freqs = np.argsort(np.sum(fft_imfs[:, freqs > 0], axis=1))[-3:]
high_freq_imfs = imfs_df.iloc[:, max_freqs]
# 使用LSTM模型训练高频IMF
look_back = 20
lstm_x, lstm_y = [], []
for i in range(look_back, len(high_freq_imfs)):
lstm_x.append(high_freq_imfs.iloc[i-look_back:i, :].values)
lstm_y.append(high_freq_imfs.iloc[i, :].values)
lstm_x, lstm_y = np.array(lstm_x), np.array(lstm_y)
lstm_x = np.reshape(lstm_x, (lstm_x.shape[0], look_back, lstm_x.shape[2]))
lstm_model = Sequential()
lstm_model.add(LSTM(50, input_shape=(look_back, high_freq_imfs.shape[1])))
lstm_model.add(Dense(high_freq_imfs.shape[1]))
lstm_model.compile(loss='mse', optimizer='adam')
lstm_model.fit(lstm_x, lstm_y, epochs=50, batch_size=72, verbose=2)
# 使用SVR模型训练低频IMF
low_freq_imfs = imfs_df.drop(columns=high_freq_imfs.columns)
svr_models = []
for i in range(low_freq_imfs.shape[1]):
svr_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_x, svr_y = [], []
for j in range(look_back, len(low_freq_imfs)):
svr_x.append(low_freq_imfs.iloc[j-look_back:j, i].values)
svr_y.append(low_freq_imfs.iloc[j, i])
svr_x, svr_y = np.array(svr_x), np.array(svr_y)
svr_model.fit(svr_x, svr_y)
svr_models.append(svr_model)
# 使用stacking方法的集成学习对总体利用LSTM模型进行训练
train_x, train_y = [], []
for i in range(look_back, len(train_data)):
x = []
for j in range(len(svr_models)):
x.append(svr_models[j].predict(np.reshape(low_freq_imfs.iloc[i-look_back:i, j].values, (1, -1)))[0])
x.append(lstm_model.predict(np.reshape(high_freq_imfs.iloc[i-look_back:i, :].values, (1, look_back, -1)))[0])
train_x.append(x)
train_y.append(train_data.iloc[i, 1])
train_x, train_y = np.array(train_x), np.array(train_y)
stack_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
stack_model.fit(train_x, train_y)
# 对测试集进行预测
test_x, test_y = [], []
for i in range(look_back, len(test_data)):
x = []
for j in range(len(svr_models)):
x.append(svr_models[j].predict(np.reshape(low_freq_imfs.iloc[train_size+i-look_back:train_size+i, j].values, (1, -1)))[0])
x.append(lstm_model.predict(np.reshape(high_freq_imfs.iloc[train_size+i-look_back:train_size+i, :].values, (1, look_back, -1)))[0])
test_x.append(x)
test_y.append(test_data.iloc[i, 1])
test_x, test_y = np.array(test_x), np.array(test_y)
test_pred = stack_model.predict(test_x)
# 计算RMSE
train_rmse = np.sqrt(mean_squared_error(train_data.iloc[look_back:, 1], stack_model.predict(train_x)))
test_rmse = np.sqrt(mean_squared_error(test_y, test_pred))
print('Train RMSE: %.3f' % train_rmse)
print('Test RMSE: %.3f' % test_rmse)
# 绘制训练集和测试集的真实值和预测值图像
plt.plot(train_data['time'], train_data['charge'], label='True')
plt.plot(train_data.iloc[look_back:, 0], stack_model.predict(train_x), label='Predicted')
plt.title('Training Set')
plt.xlabel('Time/h')
plt.ylabel('kwh')
plt.legend()
plt.show()
plt.plot(test_data['time'], test_data['charge'], label='True')
plt.plot(test_data.iloc[look_back:, 0], test_pred, label='Predicted')
plt.title('Testing Set')
plt.xlabel('Time/h')
plt.ylabel('kwh')
plt.legend()
plt.show()
```
需要注意的是,上述代码的具体参数和模型结构可能需要根据实际数据集进行调整。
阅读全文