pytorch LSTM 案例
时间: 2023-11-03 16:55:33 浏览: 165
以下是一个使用PyTorch实现LSTM的简单案例,在该案例中,我们将使用LSTM模型对莎士比亚的一些文本进行训练,以生成新的文本。
首先,我们需要导入必要的库:
```python
import torch
import torch.nn as nn
import numpy as np
```
接下来,我们需要准备数据。我们将读取莎士比亚的一些文本,并将其转换为数字序列。我们将创建一个字典来将字符映射到数字。我们还将创建一个函数来将文本转换为数字序列:
```python
with open('shakespeare.txt', 'r') as f:
text = f.read()
# create a dictionary to map characters to integers
chars = list(set(text))
char_to_int = { ch:i for i,ch in enumerate(chars) }
int_to_char = { i:ch for i,ch in enumerate(chars) }
# convert text to a sequence of integers
seq = [char_to_int[ch] for ch in text]
# define a function to get batches from the sequence
def get_batches(seq, batch_size, seq_length):
# calculate the number of batches
num_batches = len(seq) // (batch_size * seq_length)
# trim the sequence to make it evenly divisible by batch_size * seq_length
seq = seq[:num_batches * batch_size * seq_length]
# reshape the sequence into a matrix with batch_size rows and num_batches * seq_length columns
seq = np.reshape(seq, (batch_size, -1))
# loop over the sequence, extracting batches of size seq_length
for i in range(0, seq.shape[1], seq_length):
x = seq[:, i:i+seq_length]
y = np.zeros_like(x)
y[:, :-1] = x[:, 1:]
y[:, -1] = seq[:, i+seq_length] if i+seq_length < seq.shape[1] else seq[:, 0]
yield x, y
```
现在我们可以定义我们的LSTM模型:
```python
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, dropout=0.5):
super().__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.num_layers = num_layers
self.dropout = dropout
self.embedding = nn.Embedding(input_size, hidden_size)
self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, dropout=dropout)
self.fc = nn.Linear(hidden_size, input_size)
def forward(self, x, hidden):
x = self.embedding(x)
output, hidden = self.lstm(x, hidden)
output = self.fc(output)
return output, hidden
def init_hidden(self, batch_size):
weight = next(self.parameters()).data
return (weight.new(self.num_layers, batch_size, self.hidden_size).zero_(),
weight.new(self.num_layers, batch_size, self.hidden_size).zero_())
```
接下来,我们将定义一些超参数并创建模型实例:
```python
# define hyperparameters
input_size = len(chars)
hidden_size = 256
num_layers = 2
dropout = 0.5
learning_rate = 0.001
batch_size = 64
seq_length = 100
# create model instance
model = LSTM(input_size, hidden_size, num_layers, dropout=dropout)
```
现在我们可以定义我们的损失函数和优化器:
```python
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
```
最后,我们可以开始训练模型:
```python
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
# train loop
for epoch in range(100):
hidden = model.init_hidden(batch_size)
for i, (x, y) in enumerate(get_batches(seq, batch_size, seq_length)):
# convert inputs and targets to PyTorch tensors
x = torch.from_numpy(x).to(device)
y = torch.from_numpy(y).to(device)
# zero the gradients
optimizer.zero_grad()
# forward pass
output, hidden = model(x, hidden)
loss = criterion(output.view(-1, input_size), y.view(-1))
# backward pass
loss.backward()
nn.utils.clip_grad_norm_(model.parameters(), 5)
optimizer.step()
# print progress
if i % 100 == 0:
print(f'Epoch [{epoch+1}/{100}], Step [{i+1}/{len(seq)//batch_size//seq_length}], Loss: {loss.item():.4f}')
```
训练完成后,我们可以使用模型来生成新的文本:
```python
# generate new text
with torch.no_grad():
hidden = model.init_hidden(1)
x = torch.randint(input_size, (1, 1), dtype=torch.long).to(device)
result = []
for i in range(1000):
output, hidden = model(x, hidden)
prob = nn.functional.softmax(output.view(-1), dim=0)
char = int_to_char[torch.argmax(prob).item()]
result.append(char)
x = torch.tensor([[char_to_int[char]]]).to(device)
print(''.join(result))
```
以上就是一个简单的PyTorch LSTM案例,可以帮助您入门LSTM模型的编写。
阅读全文