采用Keras/PyTorch中的LSTM循环神经网络模块,在IMDB数据集上训练一个神经网络模型进行情感分类,超参数自己设定。绘制图形,显示:训练精度与验证精度,训练损失与验证损失,显示全部python代码
时间: 2024-03-05 19:53:42 浏览: 221
Pytorch实现基于LSTM的情感分析的代码和数据集
好的,以下是基于PyTorch的LSTM模型在IMDB数据集上进行情感分类任务的代码:
```python
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchtext.datasets import IMDB
from torchtext.data import Field, LabelField, BucketIterator
import numpy as np
import matplotlib.pyplot as plt
# Define fields for preprocessing data
tokenizer = lambda x: x.split() # split text into tokens
text_field = Field(tokenize=tokenizer, lower=True, include_lengths=True, batch_first=True)
label_field = LabelField(dtype=torch.float)
# Load and preprocess IMDB dataset
train_data, test_data = IMDB.splits(text_field, label_field)
text_field.build_vocab(train_data, max_size=10000)
label_field.build_vocab(train_data)
# Create data loaders
train_loader, test_loader = BucketIterator.splits(
(train_data, test_data),
batch_size=32,
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
shuffle=True
)
# Define LSTM model
class LSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(LSTM, self).__init__()
self.hidden_size = hidden_size
self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, input):
batch_size = input[0].size(0)
hidden = self.init_hidden(batch_size)
lstm_out, hidden = self.lstm(input, hidden)
output = self.fc(lstm_out[:, -1, :])
return output
def init_hidden(self, batch_size):
return (torch.zeros(1, batch_size, self.hidden_size).to(device),
torch.zeros(1, batch_size, self.hidden_size).to(device))
# Define hyperparameters
input_size = len(text_field.vocab)
hidden_size = 128
output_size = 1
lr = 0.001
epochs = 10
# Create model, loss function, and optimizer
model = LSTM(input_size, hidden_size, output_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)
# Train model
train_loss = []
train_acc = []
val_loss = []
val_acc = []
for epoch in range(epochs):
epoch_train_loss = 0.0
epoch_train_acc = 0.0
epoch_val_loss = 0.0
epoch_val_acc = 0.0
model.train()
for i, batch in enumerate(train_loader):
inputs, lengths = batch.text
labels = batch.label
optimizer.zero_grad()
outputs = model((inputs, lengths))
loss = criterion(outputs.squeeze(), labels)
loss.backward()
optimizer.step()
epoch_train_loss += loss.item()
epoch_train_acc += ((outputs.squeeze() > 0) == labels.byte()).sum().item()
model.eval()
with torch.no_grad():
for i, batch in enumerate(test_loader):
inputs, lengths = batch.text
labels = batch.label
outputs = model((inputs, lengths))
loss = criterion(outputs.squeeze(), labels)
epoch_val_loss += loss.item()
epoch_val_acc += ((outputs.squeeze() > 0) == labels.byte()).sum().item()
train_loss.append(epoch_train_loss / len(train_loader))
train_acc.append(epoch_train_acc / len(train_data))
val_loss.append(epoch_val_loss / len(test_loader))
val_acc.append(epoch_val_acc / len(test_data))
print(f"Epoch {epoch+1}/{epochs}: Train Loss: {train_loss[-1]:.5f}, Train Acc: {train_acc[-1]:.5f}, Val Loss: {val_loss[-1]:.5f}, Val Acc: {val_acc[-1]:.5f}")
# Plot the training and validation accuracy and loss
plt.plot(np.arange(epochs), train_acc, label='Train Acc')
plt.plot(np.arange(epochs), val_acc, label='Val Acc')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
plt.plot(np.arange(epochs), train_loss, label='Train Loss')
plt.plot(np.arange(epochs), val_loss, label='Val Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
```
在这个例子中,我们使用了PyTorch中的LSTM模型来训练IMDB情感分类任务。我们首先使用torchtext库加载和预处理IMDB数据集,然后定义了一个LSTM模型,该模型有一个LSTM层和一个全连接层。我们使用Adam优化器和BCEWithLogitsLoss损失函数来训练模型。在每个epoch结束时,我们计算训练和验证集上的损失和精度,并将它们保存到列表中。最后,我们使用Matplotlib库绘制了训练和验证集上的精度和损失曲线。
阅读全文