Transformer实现机器翻译
时间: 2024-02-16 19:58:02 浏览: 109
Transformer是一种用于序列到序列(seq2seq)任务的深度学习模型,常用于机器翻译。下面是一个使用PyTorch实现Transformer进行机器翻译的例子:
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import Multi30k
from torchtext.data import Field, BucketIterator
# 定义源语言和目标语言的Field
SRC = Field(tokenize='spacy', tokenizer_language='de', init_token='<sos>', eos_token='<eos>', lower=True)
TRG = Field(tokenize='spacy', tokenizer_language='en', init_token='<sos>', eos_token='<eos>', lower=True)
# 加载数据集
train_data, valid_data, test_data = Multi30k.splits(exts=('.de', '.en'), fields=(SRC, TRG))
# 构建词汇表
SRC.build_vocab(train_data, min_freq=2)
TRG.build_vocab(train_data, min_freq=2)
# 定义模型
class Transformer(nn.Module):
def __init__(self, input_dim, output_dim, hid_dim, n_layers, n_heads, pf_dim, dropout):
super().__init__()
self.encoder = nn.Embedding(input_dim, hid_dim)
self.pos_encoder = nn.Embedding(1000, hid_dim)
self.decoder = nn.Embedding(output_dim, hid_dim)
self.pos_decoder = nn.Embedding(1000, hid_dim)
self.transformer = nn.Transformer(hid_dim, n_heads, n_layers, pf_dim, dropout)
self.fc = nn.Linear(hid_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, src, trg):
src_len, batch_size = src.shape
trg_len = trg.shape[0]
pos_encoder = torch.arange(0, src_len).unsqueeze(1).repeat(1, batch_size).to(device)
pos_decoder = torch.arange(0, trg_len).unsqueeze(1).repeat(1, batch_size).to(device)
src = self.dropout((self.encoder(src) * math.sqrt(self.hid_dim)) + self.pos_encoder(pos_encoder))
trg = self.dropout((self.decoder(trg) * math.sqrt(self.hid_dim)) + self.pos_decoder(pos_decoder))
output = self.transformer(src, trg)
output = self.fc(output)
return output
# 定义超参数
input_dim = len(SRC.vocab)
output_dim = len(TRG.vocab)
hid_dim = 256
n_layers = 6
n_heads = 8
pf_dim = 512
dropout = 0.1
# 初始化模型和优化器
model = Transformer(input_dim, output_dim, hid_dim, n_layers, n_heads, pf_dim, dropout).to(device)
optimizer = optim.Adam(model.parameters())
# 定义损失函数
criterion = nn.CrossEntropyLoss(ignore_index=TRG.vocab.stoi[TRG.pad_token])
# 定义训练和评估函数
def train(model, iterator, optimizer, criterion, clip):
model.train()
epoch_loss = 0
for i, batch in enumerate(iterator):
src = batch.src
trg = batch.trg
optimizer.zero_grad()
output = model(src, trg[:-1])
output_dim = output.shape[-1]
output = output.contiguous().view(-1, output_dim)
trg = trg[1:].contiguous().view(-1)
loss = criterion(output, trg)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(iterator)
def evaluate(model, iterator, criterion):
model.eval()
epoch_loss = 0
with torch.no_grad():
for i, batch in enumerate(iterator):
src = batch.src
trg = batch.trg
output = model(src, trg[:-1])
output_dim = output.shape[-1]
output = output.contiguous().view(-1, output_dim)
trg = trg[1:].contiguous().view(-1)
loss = criterion(output, trg)
epoch_loss += loss.item()
return epoch_loss / len(iterator)
# 训练模型
N_EPOCHS = 10
CLIP = 1
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
valid_loss = evaluate(model, valid_iterator, criterion)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'transformer_model.pt')
print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')
# 加载模型并进行测试
model.load_state_dict(torch.load('transformer_model.pt'))
test_loss = evaluate(model, test_iterator, criterion)
print(f'Test Loss: {test_loss:.3f}')
```
阅读全文