帮我写一个pytorch版本的TextCNN加自注意力机制的代码
时间: 2024-05-06 22:20:26 浏览: 7
以下是一个简单的PyTorch实现,包括TextCNN和自注意力机制:
```python
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
class TextCNN(nn.Module):
def __init__(self, vocab_size, embedding_dim, num_filters, filter_sizes, output_dim, dropout):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embedding_dim)
self.convs = nn.ModuleList([
nn.Conv2d(in_channels=1, out_channels=num_filters, kernel_size=(fs, embedding_dim))
for fs in filter_sizes
])
self.fc = nn.Linear(len(filter_sizes) * num_filters, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, text):
# text = [batch size, sent len]
embedded = self.embedding(text)
# embedded = [batch size, sent len, emb dim]
embedded = embedded.unsqueeze(1)
# embedded = [batch size, 1, sent len, emb dim]
conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
# conved_n = [batch size, num_filters, sent len - filter_sizes[n] + 1]
pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
# pooled_n = [batch size, num_filters]
cat = self.dropout(torch.cat(pooled, dim=1))
# cat = [batch size, num_filters * len(filter_sizes)]
return self.fc(cat)
class SelfAttention(nn.Module):
def __init__(self, hidden_dim, num_heads, dropout):
super().__init__()
self.hidden_dim = hidden_dim
self.num_heads = num_heads
self.head_dim = hidden_dim // num_heads
self.q_linear = nn.Linear(hidden_dim, hidden_dim)
self.k_linear = nn.Linear(hidden_dim, hidden_dim)
self.v_linear = nn.Linear(hidden_dim, hidden_dim)
self.dropout = nn.Dropout(dropout)
self.out_linear = nn.Linear(hidden_dim, hidden_dim)
def forward(self, query, key, value, mask=None):
batch_size = query.shape[0]
Q = self.q_linear(query).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
K = self.k_linear(key).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
V = self.v_linear(value).view(batch_size, -1, self.num_heads, self.head_dim).transpose(1, 2)
scores = torch.matmul(Q, K.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.head_dim, dtype=torch.float32))
if mask is not None:
scores = scores.masked_fill(mask == 0, -1e9)
attention = self.dropout(F.softmax(scores, dim=-1))
x = torch.matmul(attention, V).transpose(1, 2).contiguous().view(batch_size, -1, self.hidden_dim)
x = self.out_linear(x)
return x
```
要使用这些模块,您需要实例化它们并将它们添加到PyTorch模型中。例如,如果您想要一个TextCNN模型和一个自注意力层:
```python
class MyModel(nn.Module):
def __init__(self, vocab_size, embedding_dim, num_filters, filter_sizes, output_dim, dropout, hidden_dim, num_heads):
super().__init__()
self.text_cnn = TextCNN(vocab_size, embedding_dim, num_filters, filter_sizes, output_dim, dropout)
self.self_attention = SelfAttention(hidden_dim, num_heads, dropout)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, text):
# text = [batch size, sent len]
cnn_out = self.text_cnn(text)
# cnn_out = [batch size, output dim]
attn_out = self.self_attention(cnn_out.unsqueeze(1), cnn_out.unsqueeze(1), cnn_out.unsqueeze(1))
# attn_out = [batch size, 1, hidden dim]
return self.fc(attn_out.squeeze(1))
```
在这个例子中,我们将TextCNN的输出传递给自注意力层,然后将其传递给一个线性层以获得输出。注意,我们需要对TextCNN的输出进行一些重新形状,以便它可以正确地输入自注意力层。