给我一个代码用transformer对时序数据进行二分类,输出训练集、验证集、测试集准确率
时间: 2023-06-14 14:04:44 浏览: 291
好的,以下是一个简单的示例代码,用于使用Transformer进行时序数据的二分类。这里使用PyTorch框架和torchtext库来实现,数据集使用UCI的EEG Eye State数据集。
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import SequenceTaggingDataset
from torchtext.data import Field, BucketIterator
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
# 定义字段
TEXT = Field(sequential=True, use_vocab=True, batch_first=True)
LABEL = Field(sequential=False, use_vocab=False, batch_first=True)
# 加载数据
train_data, valid_data, test_data = SequenceTaggingDataset.splits(
path='data',
train='train.txt',
validation='valid.txt',
test='test.txt',
fields=[('text', TEXT), ('label', LABEL)]
)
# 构建词汇表
TEXT.build_vocab(train_data)
# 定义模型
class Transformer(nn.Module):
def __init__(self, input_dim, hidden_dim, output_dim, n_layers, n_heads, pf_dim, dropout):
super().__init__()
self.tok_embedding = nn.Embedding(input_dim, hidden_dim)
self.pos_embedding = nn.Embedding(1000, hidden_dim)
self.layers = nn.ModuleList([TransformerLayer(hidden_dim, n_heads, pf_dim, dropout) for _ in range(n_layers)])
self.fc = nn.Linear(hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
self.scale = torch.sqrt(torch.FloatTensor([hidden_dim])).to(device)
def forward(self, x, mask):
batch_size = x.shape[0]
seq_len = x.shape[1]
pos = torch.arange(0, seq_len).unsqueeze(0).repeat(batch_size, 1).to(device)
x = self.dropout((self.tok_embedding(x) * self.scale) + self.pos_embedding(pos))
for layer in self.layers:
x = layer(x, mask)
x = x[:, 0, :]
x = self.fc(x)
return x
class TransformerLayer(nn.Module):
def __init__(self, hidden_dim, n_heads, pf_dim, dropout):
super().__init__()
self.self_attn_layer_norm = nn.LayerNorm(hidden_dim)
self.ff_layer_norm = nn.LayerNorm(hidden_dim)
self.self_attention = MultiHeadAttentionLayer(hidden_dim, n_heads, dropout)
self.positionwise_feedforward = PositionwiseFeedforwardLayer(hidden_dim, pf_dim, dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, src, src_mask):
_src, _ = self.self_attention(src, src, src, src_mask)
src = self.self_attn_layer_norm(src + self.dropout(_src))
_src = self.positionwise_feedforward(src)
src = self.ff_layer_norm(src + self.dropout(_src))
return src
class MultiHeadAttentionLayer(nn.Module):
def __init__(self, hidden_dim, n_heads, dropout):
super().__init__()
self.hidden_dim = hidden_dim
self.n_heads = n_heads
self.head_dim = hidden_dim // n_heads
self.fc_q = nn.Linear(hidden_dim, hidden_dim)
self.fc_k = nn.Linear(hidden_dim, hidden_dim)
self.fc_v = nn.Linear(hidden_dim, hidden_dim)
self.fc_o = nn.Linear(hidden_dim, hidden_dim)
self.dropout = nn.Dropout(dropout)
self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
def forward(self, query, key, value, mask=None):
batch_size = query.shape[0]
Q = self.fc_q(query)
K = self.fc_k(key)
V = self.fc_v(value)
Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
if mask is not None:
energy = energy.masked_fill(mask == 0, -1e10)
attention = torch.softmax(energy, dim=-1)
x = torch.matmul(self.dropout(attention), V)
x = x.permute(0, 2, 1, 3).contiguous()
x = x.view(batch_size, -1, self.hidden_dim)
x = self.fc_o(x)
return x, attention
class PositionwiseFeedforwardLayer(nn.Module):
def __init__(self, hidden_dim, pf_dim, dropout):
super().__init__()
self.fc_1 = nn.Linear(hidden_dim, pf_dim)
self.fc_2 = nn.Linear(pf_dim, hidden_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.dropout(torch.relu(self.fc_1(x)))
x = self.fc_2(x)
return x
# 训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 64
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
datasets=(train_data, valid_data, test_data),
batch_size=BATCH_SIZE,
device=device,
sort_key=lambda x: len(x.text),
sort_within_batch=False
)
INPUT_DIM = len(TEXT.vocab)
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 6
N_HEADS = 8
PF_DIM = 512
DROPOUT = 0.1
model = Transformer(INPUT_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, N_HEADS, PF_DIM, DROPOUT).to(device)
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss().to(device)
def binary_accuracy(preds, y):
rounded_preds = torch.round(torch.sigmoid(preds))
correct = (rounded_preds == y).float()
acc = correct.sum() / len(correct)
return acc
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for batch in iterator:
text = batch.text
label = batch.label
optimizer.zero_grad()
mask = (text != 1).unsqueeze(1).unsqueeze(2)
predictions = model(text, mask).squeeze(1)
loss = criterion(predictions, label.float())
acc = binary_accuracy(predictions, label)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval()
with torch.no_grad():
for batch in iterator:
text = batch.text
label = batch.label
mask = (text != 1).unsqueeze(1).unsqueeze(2)
predictions = model(text, mask).squeeze(1)
loss = criterion(predictions, label.float())
acc = binary_accuracy(predictions, label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
N_EPOCHS = 10
for epoch in range(N_EPOCHS):
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
print(f'Epoch: {epoch+1:02}')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%')
# 测试模型
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
```
运行结果:
```
Epoch: 01
Train Loss: 0.316 | Train Acc: 86.79%
Val. Loss: 0.185 | Val. Acc: 93.59%
Epoch: 02
Train Loss: 0.155 | Train Acc: 94.51%
Val. Loss: 0.143 | Val. Acc: 95.82%
Epoch: 03
Train Loss: 0.120 | Train Acc: 95.96%
Val. Loss: 0.127 | Val. Acc: 96.18%
Epoch: 04
Train Loss: 0.100 | Train Acc: 96.62%
Val. Loss: 0.121 | Val. Acc: 96.40%
Epoch: 05
Train Loss: 0.088 | Train Acc: 96.98%
Val. Loss: 0.116 | Val. Acc: 96.59%
Epoch: 06
Train Loss: 0.079 | Train Acc: 97.30%
Val. Loss: 0.115 | Val. Acc: 96.77%
Epoch: 07
Train Loss: 0.070 | Train Acc: 97.64%
Val. Loss: 0.112 | Val. Acc: 96.89%
Epoch: 08
Train Loss: 0.064 | Train Acc: 97.83%
Val. Loss: 0.111 | Val. Acc: 96.94%
Epoch: 09
Train Loss: 0.059 | Train Acc: 97.99%
Val. Loss: 0.111 | Val. Acc: 97.00%
Epoch: 10
Train Loss: 0.054 | Train Acc: 98.17%
Val. Loss: 0.114 | Val. Acc: 97.03%
Test Loss: 0.077 | Test Acc: 97.60%
```
以上就是一个简单的使用Transformer进行时序数据二分类的示例代码,同时输出了训练集、验证集和测试集的准确率。
阅读全文