pytorch实现gru分类
时间: 2023-07-11 12:49:30 浏览: 195
pytorch实现用CNN和LSTM对文本进行分类方式
5星 · 资源好评率100%
好的,我可以为您提供一个简单的PyTorch实现GRU分类的代码示例。假设我们要对一个文本数据集进行情感分类,数据集包含两个类别:正面和负面。以下是一个基本的代码框架,您可以根据自己的数据集和需求进行修改和扩展。
首先,导入所需的PyTorch库和其他必要的库:
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext.data import Field, LabelField, BucketIterator
```
接下来,定义数据集的字段和超参数:
```python
TEXT = Field(tokenize='spacy', tokenizer_language='en_core_web_sm', include_lengths=True)
LABEL = LabelField(dtype=torch.float)
train_data, test_data = IMDB.splits(TEXT, LABEL)
MAX_VOCAB_SIZE = 25000
TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE, vectors='glove.6B.100d', unk_init=torch.Tensor.normal_)
LABEL.build_vocab(train_data)
BATCH_SIZE = 64
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
```
然后,定义模型:
```python
class GRU(nn.Module):
def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
super().__init__()
self.embedding = nn.Embedding(input_dim, embedding_dim)
self.gru = nn.GRU(embedding_dim, hidden_dim, num_layers=2, bidirectional=True, dropout=0.5)
self.fc = nn.Linear(hidden_dim * 2, output_dim)
self.dropout = nn.Dropout(0.5)
def forward(self, text, text_lengths):
embedded = self.dropout(self.embedding(text))
packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths.to('cpu'), enforce_sorted=False)
packed_output, hidden = self.gru(packed_embedded)
output, output_lengths = nn.utils.rnn.pad_packed_sequence(packed_output)
hidden = self.dropout(torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1))
return self.fc(hidden)
```
在这个模型中,我们使用了一个双向GRU来处理输入文本数据,并将最后一个时刻的隐藏状态输入到一个全连接层进行分类。
最后,定义训练和测试函数,并开始训练:
```python
def train(model, iterator, optimizer, criterion):
epoch_loss = 0
epoch_acc = 0
model.train()
for batch in iterator:
text, text_lengths = batch.text
optimizer.zero_grad()
predictions = model(text, text_lengths).squeeze(1)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def evaluate(model, iterator, criterion):
epoch_loss = 0
epoch_acc = 0
model.eval()
with torch.no_grad():
for batch in iterator:
text, text_lengths = batch.text
predictions = model(text, text_lengths).squeeze(1)
loss = criterion(predictions, batch.label)
acc = binary_accuracy(predictions, batch.label)
epoch_loss += loss.item()
epoch_acc += acc.item()
return epoch_loss / len(iterator), epoch_acc / len(iterator)
def binary_accuracy(preds, y):
rounded_preds = torch.round(torch.sigmoid(preds))
correct = (rounded_preds == y).float()
acc = correct.sum() / len(correct)
return acc
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
model = GRU(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
model.embedding.weight.data.copy_(TEXT.vocab.vectors)
optimizer = optim.Adam(model.parameters())
criterion = nn.BCEWithLogitsLoss()
model = model.to(device)
criterion = criterion.to(device)
train_iterator, test_iterator = BucketIterator.splits((train_data, test_data), batch_size=BATCH_SIZE, device=device, sort_within_batch=True, sort_key=lambda x: len(x.text))
N_EPOCHS = 10
for epoch in range(N_EPOCHS):
train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
test_loss, test_acc = evaluate(model, test_iterator, criterion)
print(f'Epoch: {epoch+1:02}')
print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
print(f'\t Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')
```
希望这个示例代码能够帮助您实现一个基本的PyTorch GRU分类器。如果您有任何问题或需要进一步的帮助,请告诉我。
阅读全文