使用RNN实现自然语言词性标注代码
时间: 2024-02-11 18:07:06 浏览: 92
以下是使用RNN实现自然语言词性标注的示例代码,基于PyTorch框架:
```python
import torch
import torch.nn as nn
import torch.optim as optim
# 定义模型类
class RNN_POS(nn.Module):
def __init__(self, vocab_size, embedding_dim, hidden_dim, output_size):
super(RNN_POS, self).__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.rnn = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_size)
def forward(self, x):
x = self.embedding(x)
x, _ = self.rnn(x)
x = self.fc(x)
return x
# 定义训练函数
def train(model, train_loader, optimizer, criterion):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
output = model(data)
loss = criterion(output.view(-1, output.shape[-1]), target.view(-1))
loss.backward()
optimizer.step()
# 定义测试函数
def test(model, test_loader, criterion):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
output = model(data)
test_loss += criterion(output.view(-1, output.shape[-1]), target.view(-1)).item()
pred = output.argmax(dim=-1)
correct += pred.eq(target).sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
test_loss, correct, len(test_loader.dataset), accuracy))
# 定义超参数
vocab_size = 10000
embedding_dim = 128
hidden_dim = 64
output_size = 10
lr = 0.01
epochs = 10
batch_size = 64
# 加载数据集(这里使用PyTorch自带的Penn Treebank数据集)
from torchtext.datasets import PennTreebank
from torchtext.data.utils import get_tokenizer
from torchtext.data.utils import ngrams_iterator
from torchtext.vocab import build_vocab_from_iterator
tokenizer = get_tokenizer('basic_english')
train_iter = PennTreebank(split='train')
train_text = []
for (text,) in train_iter:
train_text += tokenizer(text)
vocab = build_vocab_from_iterator(ngrams_iterator(train_text, 1, 2))
vocab.set_default_index(vocab["<unk>"])
train_iter, val_iter, test_iter = PennTreebank()
train_loader = torch.utils.data.DataLoader(train_iter, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_iter, batch_size=batch_size, shuffle=True)
# 初始化模型、优化器、损失函数
model = RNN_POS(vocab_size, embedding_dim, hidden_dim, output_size)
optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()
# 训练模型
for epoch in range(1, epochs+1):
train(model, train_loader, optimizer, criterion)
test(model, test_loader, criterion)
```
这个示例代码使用Penn Treebank数据集进行训练和测试,输出结果包括测试集的平均损失和准确率。你可以将其修改为适用于其他数据集的代码。
阅读全文