7. 完成一个学生管理系统,定义学生类,输入3-5个学生的姓名和成绩,计算学生的平均成绩,所有的功能在学生类中完成,在Demo类中定义main函数演示功能;
时间: 2024-01-04 15:04:36 浏览: 133
以下是一个在PyTorch中实现的transformer加上自注意力机制的代码示例:
```python
import torch
import torch.nn as nn
import torch.nn.functional as F
class SelfAttention(nn.Module):
def __init__(self, hidden_size, num_heads):
super(SelfAttention, self).__init__()
self.hidden_size = hidden_size
self.num_heads = num_heads
self.head_size = hidden_size // num_heads
self.query = nn.Linear(hidden_size, hidden_size)
self.key = nn.Linear(hidden_size, hidden_size)
self.value = nn.Linear(hidden_size, hidden_size)
self.dropout = nn.Dropout(0.1)
def forward(self, x):
batch_size = x.size(0)
query = self.query(x).view(batch_size, -1, self.num_heads, self.head_size).transpose(1, 2)
key = self.key(x).view(batch_size, -1, self.num_heads, self.head_size).transpose(1, 2)
value = self.value(x).view(batch_size, -1, self.num_heads, self.head_size).transpose(1, 2)
attention_scores = torch.matmul(query, key.transpose(-2, -1)) / (self.head_size ** 0.5)
attention_probs = F.softmax(attention_scores, dim=-1)
attention_probs = self.dropout(attention_probs)
context = torch.matmul(attention_probs, value).transpose(1, 2).contiguous().view(batch_size, -1, self.hidden_size)
return context
class TransformerBlock(nn.Module):
def __init__(self, hidden_size, num_heads, feed_forward_dim):
super(TransformerBlock, self).__init__()
self.self_attention = SelfAttention(hidden_size, num_heads)
self.norm1 = nn.LayerNorm(hidden_size)
self.feed_forward = nn.Sequential(
nn.Linear(hidden_size, feed_forward_dim),
nn.ReLU(),
nn.Linear(feed_forward_dim, hidden_size)
)
self.norm2 = nn.LayerNorm(hidden_size)
self.dropout = nn.Dropout(0.1)
def forward(self, x):
attention_output = self.self_attention(x)
attention_output = self.dropout(attention_output)
x = self.norm1(x + attention_output)
feed_forward_output = self.feed_forward(x)
feed_forward_output = self.dropout(feed_forward_output)
x = self.norm2(x + feed_forward_output)
return x
class Transformer(nn.Module):
def __init__(self, num_layers, hidden_size, num_heads, feed_forward_dim, num_classes):
super(Transformer, self).__init__()
self.embedding = nn.Embedding(10000, hidden_size)
self.transformer_blocks = nn.ModuleList([TransformerBlock(hidden_size, num_heads, feed_forward_dim) for _ in range(num_layers)])
self.avg_pool = nn.AdaptiveAvgPool1d(1)
self.fc = nn.Linear(hidden_size, num_classes)
def forward(self, x):
x = self.embedding(x)
x = x.transpose(1, 2)
for transformer_block in self.transformer_blocks:
x = transformer_block(x)
x = self.avg_pool(x).squeeze(-1)
x = self.fc(x)
return x
model = Transformer(num_layers=6, hidden_size=512, num_heads=8, feed_forward_dim=2048, num_classes=10)
```
这个代码实现了一个Transformer模型,其中包含N个Transformer Block。每个Transformer Block包含一个自注意力机制和一个前馈神经网络。在Transformer模型的输出层,使用了一个全局平均池化层和一个全连接层来预测输出类别。
阅读全文