写一个使用Jupyter实现FEDformer预测的代码,用于对一个包含5维特征的1400x5的数据集进行预测。代码中包括了数据标准化、绘制数据趋势图、早停、正则化、计算训练损失和验证损失并绘图,以及绘制各列数据的预测图和总体数据的预测图,并计算MAE/MAPE/MSE/RMSE/SMAPE指标。
时间: 2024-04-09 17:27:37 浏览: 212
以下是一个使用Jupyter实现FEDformer预测的代码示例:
```python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
# 数据预处理
data = pd.read_csv('dataset.csv') # 读取数据集
scaler = MinMaxScaler() # 数据标准化
data_scaled = scaler.fit_transform(data)
# 数据集划分为训练集和验证集
train_data, val_data = train_test_split(data_scaled, test_size=0.2, shuffle=False)
# 数据集类定义
class CustomDataset(Dataset):
def __init__(self, data):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx]
# FEDformer模型定义
class FEDformer(nn.Module):
def __init__(self, input_dim):
super(FEDformer, self).__init__()
self.fc = nn.Linear(input_dim, 1)
def forward(self, x):
return self.fc(x)
# 训练函数
def train(model, dataloader, criterion, optimizer, device):
model.train()
train_loss = 0
for i, data in enumerate(dataloader):
inputs = data.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, inputs)
loss.backward()
optimizer.step()
train_loss += loss.item()
return train_loss / len(dataloader)
# 验证函数
def validate(model, dataloader, criterion, device):
model.eval()
val_loss = 0
with torch.no_grad():
for i, data in enumerate(dataloader):
inputs = data.to(device)
outputs = model(inputs)
loss = criterion(outputs, inputs)
val_loss += loss.item()
return val_loss / len(dataloader)
# 训练过程
def train_model(model, train_dataloader, val_dataloader, criterion, optimizer, device, num_epochs=100, patience=5):
train_losses = []
val_losses = []
best_val_loss = float('inf')
best_model = None
early_stop_counter = 0
for epoch in range(num_epochs):
train_loss = train(model, train_dataloader, criterion, optimizer, device)
val_loss = validate(model, val_dataloader, criterion, device)
train_losses.append(train_loss)
val_losses.append(val_loss)
print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
if val_loss < best_val_loss:
best_val_loss = val_loss
best_model = model.state_dict()
early_stop_counter = 0
else:
early_stop_counter += 1
if early_stop_counter >= patience:
print('Early stopping.')
break
model.load_state_dict(best_model)
return model, train_losses, val_losses
# 设置超参数和设备
input_dim = 5
batch_size = 32
lr = 0.001
num_epochs = 100
patience = 5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 创建数据集和数据加载器
train_dataset = CustomDataset(train_data)
val_dataset = CustomDataset(val_data)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
# 初始化模型
model = FEDformer(input_dim).to(device)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
# 训练模型
trained_model, train_losses, val_losses = train_model(model, train_dataloader, val_dataloader,
criterion, optimizer, device, num_epochs, patience)
# 绘制训练损失和验证损失曲线
plt.plot(train_losses, label='Train Loss')
plt.plot(val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
# 预测函数
def predict(model, dataloader, scaler, device):
model.eval()
predictions = []
with torch.no_grad():
for data in dataloader:
inputs = data.to(device)
outputs = model(inputs)
predictions.extend(outputs.cpu().numpy())
predictions = np.array(predictions)
predictions = scaler.inverse_transform(predictions) # 反标准化
return predictions
# 预测训练集和验证集
train_predictions = predict(trained_model, train_dataloader, scaler, device)
val_predictions = predict(trained_model, val_dataloader, scaler, device)
# 计算指标
def calculate_metrics(actual, predicted):
mae = mean_absolute_error(actual, predicted)
mape = np.mean(np.abs((actual - predicted) / actual)) * 100
mse = mean_squared_error(actual, predicted)
rmse = np.sqrt(mse)
smape = np.mean(2 * np.abs(actual - predicted) / (np.abs(actual) + np.abs(predicted))) * 100
return mae, mape, mse, rmse, smape
# 计算训练集和验证集的指标
train_mae, train_mape, train_mse, train_rmse, train_smape = calculate_metrics(train_data, train_predictions)
val_mae, val_mape, val_mse, val_rmse, val_smape = calculate_metrics(val_data, val_predictions)
# 绘制各列数据的预测图
plt.figure(figsize=(12, 6))
for i in range(input_dim):
plt.subplot(2, 3, i+1)
plt.plot(train_data[:, i], label='Actual')
plt.plot(train_predictions[:, i], label='Predicted')
plt.xlabel('Time')
plt.ylabel(f'Feature {i+1}')
plt.title(f'Feature {i+1} - Train')
plt.legend()
plt.tight_layout()
plt.show()
# 绘制总体数据的预测图
plt.plot(train_data.flatten(), label='Actual')
plt.plot(train_predictions.flatten(), label='Predicted')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Overall Prediction - Train')
plt.legend()
plt.show()
# 打印指标结果
print('Train Metrics:')
print(f'MAE: {train_mae:.4f}')
print(f'MAPE: {train_mape:.4f}%')
print(f'MSE: {train_mse:.4f}')
print(f'RMSE: {train_rmse:.4f}')
print(f'SMAPE: {train_smape:.4f}%')
print('\nVal Metrics:')
print(f'MAE: {val_mae:.4f}')
print(f'MAPE: {val_mape:.4f}%')
print(f'MSE: {val_mse:.4f}')
print(f'RMSE: {val_rmse:.4f}')
print(f'SMAPE: {val_smape:.4f}%')
```
请确保将数据集文件命名为`dataset.csv`并与代码文件放在同一目录下。替换代码中的输入数据集、超参数等部分以适应你的实际需求。在Jupyter中运行此代码将输出训练损失和验证损失的曲线图,以及各列数据的预测图和总体数据的预测图,并打印出训练集和验证集的指标结果。
阅读全文