帮我写一段用两层transformer的代码
时间: 2024-09-30 19:04:01 浏览: 26
在深度学习中,Transformer是一种用于处理序列数据的强大模型,尤其是自然语言处理任务。这里是一个简单的例子,展示如何使用PyTorch库构建一个包含两层Transformer编码器的结构:
```python
import torch
from torch import nn
class MultiHeadAttention(nn.Module):
# ... (省略注意力头部分的实现)
class TransformerBlock(nn.Module):
def __init__(self, d_model, num_heads, dropout=0.1):
super(TransformerBlock, self).__init__()
self.self_attn = MultiHeadAttention(d_model, num_heads)
self.fc1 = nn.Linear(d_model, d_model)
self.fc2 = nn.Linear(d_model, d_model)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
attn_output = self.self_attn(x, x, x) # 自注意力机制
out = self.dropout(attn_output + x) # 添加残差连接和归一化
out = self.fc1(out)
out = self.dropout(torch.relu(out))
out = self.fc2(out)
return out
class TwoLayerTransformer(nn.Module):
def __init__(self, input_dim, hidden_dim, num_heads, num_layers=2, dropout=0.1):
super(TwoLayerTransformer, self).__init__()
self.embedding = nn.Embedding(input_dim, hidden_dim)
self.positional_encoding = PositionalEncoding(hidden_dim, dropout)
self.layers = nn.ModuleList([TransformerBlock(hidden_dim, num_heads, dropout) for _ in range(num_layers)])
def forward(self, src):
embedded = self.embedding(src) * math.sqrt(embedded.size(-1)) # 应用词嵌入并缩放
pos_encoded = self.positional_encoding(embedded) # 添加位置编码
for layer in self.layers:
pos_encoded = layer(pos_encoded)
return pos_encoded
# 示例用法
input_seq = torch.randint(0, 100, (10, 64)) # 假设输入序列长度为64,词汇表大小为100
model = TwoLayerTransformer(100, 512, 8) # 输入维度、隐藏层维度、注意力头的数量
output = model(input_seq) # 输出经过两层Transformer后的序列表示
阅读全文