请给我一个简短的,初学者不会报错的,仅用pytorch和pycharm的encoder-decoder模型代码
时间: 2024-03-12 10:46:51 浏览: 70
基于PyTorch与PyTorch-Lightning进行人工神经网络模型与CNN模型的构建.zip
5星 · 资源好评率100%
好的,以下是一个简单的encoder-decoder模型的代码,仅使用pytorch和pycharm:
```python
import torch
import torch.nn as nn
# 定义Encoder模型
class Encoder(nn.Module):
def __init__(self, input_size, hidden_size):
super(Encoder, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(input_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size)
def forward(self, input):
embedded = self.embedding(input)
output, hidden = self.gru(embedded)
return output, hidden
# 定义Decoder模型
class Decoder(nn.Module):
def __init__(self, hidden_size, output_size):
super(Decoder, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size)
self.out = nn.Linear(hidden_size, output_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, input, hidden):
output = self.embedding(input).view(1, 1, -1)
output, hidden = self.gru(output, hidden)
output = self.softmax(self.out(output[0]))
return output, hidden
# 定义训练过程
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=10):
encoder_hidden = encoder.init_hidden()
encoder_optimizer.zero_grad()
decoder_optimizer.zero_grad()
input_length = input_tensor.size(0)
target_length = target_tensor.size(0)
encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
loss = 0
for ei in range(input_length):
encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
encoder_outputs[ei] = encoder_output[0, 0]
decoder_input = torch.tensor([[SOS_token]], device=device)
decoder_hidden = encoder_hidden
for di in range(target_length):
decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
loss += criterion(decoder_output, target_tensor[di])
decoder_input = target_tensor[di]
loss.backward()
encoder_optimizer.step()
decoder_optimizer.step()
return loss.item() / target_length
# 定义预测过程
def evaluate(encoder, decoder, sentence, max_length=10):
with torch.no_grad():
input_tensor = tensor_from_sentence(sentence)
input_length = input_tensor.size()[0]
encoder_hidden = encoder.init_hidden()
encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
for ei in range(input_length):
encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)
encoder_outputs[ei] += encoder_output[0, 0]
decoder_input = torch.tensor([[SOS_token]], device=device)
decoder_hidden = encoder_hidden
decoded_words = []
for di in range(max_length):
decoder_output, decoder_hidden = decoder(decoder_input, decoder_hidden)
topv, topi = decoder_output.data.topk(1)
if topi.item() == EOS_token:
decoded_words.append('<EOS>')
break
else:
decoded_words.append(output_lang.index2word[topi.item()])
decoder_input = topi.squeeze().detach()
return decoded_words
# 设置训练超参数
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = 10
output_size = 10
hidden_size = 256
learning_rate = 0.01
n_iters = 10000
print_every = 1000
# 定义训练数据和目标
input_tensor = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=torch.long, device=device).view(-1, 1)
target_tensor = torch.tensor([10, 9, 8, 7, 6, 5, 4, 3, 2, 1], dtype=torch.long, device=device).view(-1, 1)
# 初始化模型
encoder = Encoder(input_size, hidden_size).to(device)
decoder = Decoder(hidden_size, output_size).to(device)
# 定义优化器和损失函数
encoder_optimizer = torch.optim.SGD(encoder.parameters(), lr=learning_rate)
decoder_optimizer = torch.optim.SGD(decoder.parameters(), lr=learning_rate)
criterion = nn.NLLLoss()
# 开始训练
for iter in range(1, n_iters + 1):
loss = train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
if iter % print_every == 0:
print('Iteration: {}, Loss: {:.4f}'.format(iter, loss))
# 测试模型
test_sentence = [1, 2, 3, 4, 5]
output_words = evaluate(encoder, decoder, test_sentence)
print('Input Sentence:', test_sentence)
print('Output Sentence:', output_words)
```
这个代码实现了一个简单的encoder-decoder模型,用于将一个长度为10的数字序列进行倒序输出。在这个例子中,模型的输入和输出都是长度为10的数字序列,每个数字都被embedding为一个256维的向量。Encoder使用GRU来编码输入,Decoder使用GRU来解码输出,并将解码后的结果通过一个线性层和softmax函数映射到输出空间。训练过程中使用SGD优化器和NLLLoss损失函数,每1000次迭代输出一次训练结果。最后测试模型在输入序列[1, 2, 3, 4, 5]上的表现。
阅读全文