transformer时间序列预测pytorch
时间: 2025-01-02 19:37:50 浏览: 7
### 使用 PyTorch 构建 Transformer 模型进行时间序列预测
#### 加载和预处理数据
为了有效的时间序列预测,首先需要准备合适的数据集并执行必要的特征工程操作。这可能涉及标准化、差分或其他转换方法来使数据更适合模型训练。
```python
import pandas as pd
from sklearn.preprocessing import StandardScaler
# 假设 df 是包含时间序列数据的 DataFrame
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df)
# 将数据划分为训练集和测试集
train_size = int(len(df_scaled) * 0.8)
test_size = len(df_scaled) - train_size
train, test = df_scaled[0:train_size], df_scaled[train_size:len(df_scaled)]
```
#### 定义 Transformer 模型结构
基于标准的 Transformer 架构设计适合于时间序列预测的任务特定版本。该架构由编码器和解码器组成,其中每一部分都包含了多个自注意力层以及全连接前馈网络[^3]。
```python
import torch.nn as nn
import math
class PositionalEncoding(nn.Module):
def __init__(self, d_model, max_len=5000):
super(PositionalEncoding, self).__init__()
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)
def forward(self, x):
return x + self.pe[:x.size(0), :]
class TransformerModel(nn.Module):
def __init__(self, input_dim, output_dim, nhead=8, num_encoder_layers=6,
num_decoder_layers=6, dim_feedforward=2048, dropout=0.1):
super().__init__()
self.model_type = 'Transformer'
self.src_mask = None
self.pos_encoder = PositionalEncoding(input_dim)
encoder_layer = nn.TransformerEncoderLayer(d_model=input_dim, nhead=nhead,
dim_feedforward=dim_feedforward,
dropout=dropout)
decoder_layer = nn.TransformerDecoderLayer(d_model=input_dim, nhead=nhead,
dim_feedforward=dim_feedforward,
dropout=dropout)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_encoder_layers)
self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_decoder_layers)
self.fc_out = nn.Linear(input_dim, output_dim)
def _generate_square_subsequent_mask(self, sz):
mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
return mask
def forward(self, src, tgt, src_mask=None, tgt_mask=None):
if src_mask is None or tgt_mask is None:
device = src.device
src_mask = self._generate_square_subsequent_mask(src.shape[0]).to(device)
tgt_mask = self._generate_square_subsequent_mask(tgt.shape[0]).to(device)
src = self.pos_encoder(src)
memory = self.transformer_encoder(src, src_mask)
output = self.transformer_decoder(tgt, memory, tgt_mask)
output = self.fc_out(output)
return output
```
#### 训练过程设置
定义损失函数与优化算法,并编写循环来进行批量梯度下降更新参数直至收敛或达到预定的最大迭代次数为止。
```python
model = TransformerModel(input_dim=df.shape[1], output_dim=df.shape[1])
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
for epoch in range(num_epochs):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
prediction = model(data, data[:-1])
loss = loss_fn(prediction, target)
loss.backward()
optimizer.step()
print(f'Epoch {epoch}, Loss: {loss.item()}')
```
#### 执行预测
一旦模型经过充分训练,则可以利用其对未来时间段内的情况做出估计。
```python
with torch.no_grad():
future_predictions = []
last_sequence = test[-input_window_length:]
while len(future_predictions) < forecast_horizon:
pred = model(last_sequence.unsqueeze(dim=1), last_sequence[:-1].unsqueeze(dim=1)).squeeze()
next_value = pred[-1]
future_predictions.append(next_value.cpu().numpy())
last_sequence = torch.cat((last_sequence[1:], next_value.unsqueeze(dim=0)), dim=0)
```
阅读全文