给我一段能够运行的情感分析代码,要用pytorch实现
时间: 2023-05-28 20:03:10 浏览: 112
以下是一个使用PyTorch实现的情感分析代码示例。该代码使用IMDB电影评论数据集进行训练和测试,以预测评论的情感(正面或负面)。
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext.data import Field, LabelField, BucketIterator
# 定义文本和标签字段
TEXT = Field(tokenize='spacy', lower=True)
LABEL = LabelField(dtype=torch.float)
# 加载IMDB数据集
train_data, test_data = IMDB.splits(TEXT, LABEL)
# 构建词汇表
TEXT.build_vocab(train_data, max_size=10000, vectors='glove.6B.100d')
LABEL.build_vocab(train_data)
# 定义模型
class SentimentAnalysisModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, num_layers, dropout):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers, dropout=dropout)
self.fc = nn.Linear(hidden_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
embedded = self.dropout(self.embedding(text))
output, (hidden, cell) = self.rnn(embedded)
hidden = self.dropout(hidden[-1])
return self.fc(hidden)
# 设置超参数
VOCAB_SIZE = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1
NUM_LAYERS = 2
DROPOUT = 0.5
# 初始化模型和优化器
model = SentimentAnalysisModel(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, NUM_LAYERS, DROPOUT)
optimizer = optim.Adam(model.parameters())
# 将数据集划分为批次
BATCH_SIZE = 64
train_iterator, test_iterator = BucketIterator.splits(
(train_data, test_data),
batch_size=BATCH_SIZE,
sort_within_batch=True,
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
# 训练模型
NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS):
for batch in train_iterator:
optimizer.zero_grad()
predictions = model(batch.text).squeeze(1)
loss = nn.BCEWithLogitsLoss()(predictions, batch.label)
loss.backward()
optimizer.step()
# 在测试集上测试模型
with torch.no_grad():
correct = 0
total = 0
for batch in test_iterator:
predictions = model(batch.text).squeeze(1)
rounded_predictions = torch.round(torch.sigmoid(predictions))
total += batch.label.size(0)
correct += (rounded_predictions == batch.label).sum().item()
accuracy = 100 * correct / total
print(f'Epoch: {epoch+1}, Test Accuracy: {accuracy:.2f}%')
```
该代码使用了一个基于LSTM的模型,该模型在词嵌入层之后接着了一个LSTM层和一个全连接层。训练过程使用了Adam优化器和二元交叉熵损失函数。在每个epoch结束时,代码还在测试集上计算了模型的准确率。