src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)这段代码什么意思
时间: 2024-03-29 19:34:19 浏览: 170
这段代码的作用是生成一个源序列掩码,用于在Transformer模型中进行自注意力机制的计算。具体来说,它会首先检查源序列 `src` 中哪些位置是被填充符号(一般为0)所填充的,将这些位置的值设为0,其他位置的值设为1,得到一个与 `src` 相同形状的二元张量。然后,它会通过 `unsqueeze` 方法在第1个和第2个维度上各增加一个维度,使得掩码张量的形状变成 `(batch_size, 1, 1, src_len)`,其中 `batch_size` 是输入数据的批大小,`src_len` 是源序列的长度。这样生成的掩码张量可以直接与注意力矩阵相乘,实现对填充位置的屏蔽,只让真实的输入参与注意力计算。
相关问题
可以将seg.train转化成未封装好的代码吗
当然可以,以下是将 `seg.train` 转化为未封装的代码的步骤:
1. 导入相关库
```python
import json
import random
from collections import Counter
from functools import partial
from pathlib import Path
from typing import List, Tuple
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
```
2. 准备数据
```python
def load_data(file_path: str) -> List[Tuple[List[str], List[str]]]:
"""加载数据"""
with open(file_path, "r", encoding="utf-8") as f:
data = f.read().splitlines()
data = [line.split("\t") for line in data]
return [(text.split(), label.split()) for text, label in data]
train_data = load_data("train.txt")
valid_data = load_data("dev.txt")
test_data = load_data("test.txt")
```
3. 构建词表
```python
def build_vocab(data: List[Tuple[List[str], List[str]]], min_freq: int = 2) -> Tuple[Dict[str, int], Dict[int, str]]:
"""构建词表"""
counter = Counter([word for text, _ in data for word in text])
counter = {word: freq for word, freq in counter.items() if freq >= min_freq}
word_list = sorted(list(counter.keys()))
word2id = {"[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3}
for word in word_list:
word2id[word] = len(word2id)
id2word = {id: word for word, id in word2id.items()}
return word2id, id2word
word2id, id2word = build_vocab(train_data)
```
4. 定义数据处理函数
```python
def process_data(data: List[Tuple[List[str], List[str]]], word2id: Dict[str, int]) -> List[Tuple[List[int], List[int]]]:
"""数据处理函数"""
return [(torch.tensor([word2id.get(word, word2id["[UNK]"]) for word in text]), torch.tensor([int(label) for label in labels])) for text, labels in data]
train_data = process_data(train_data, word2id)
valid_data = process_data(valid_data, word2id)
test_data = process_data(test_data, word2id)
```
5. 定义数据集和数据加载器
```python
class SegDataset(Dataset):
"""分词数据集"""
def __init__(self, data: List[Tuple[List[int], List[int]]]):
self.data = data
def __len__(self):
return len(self.data)
def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
return self.data[index]
def collate_fn(batch: List[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, torch.Tensor]:
"""数据处理函数"""
texts = [item[0] for item in batch]
labels = [item[1] for item in batch]
max_len = max([len(text) for text in texts])
texts = [torch.cat([text, torch.tensor([0] * (max_len - len(text)))]) for text in texts]
labels = [torch.cat([label, torch.tensor([-1] * (max_len - len(label)))]) for label in labels]
mask = torch.tensor([[1] * len(text) + [0] * (max_len - len(text)) for text in texts])
return torch.stack(texts), torch.stack(labels), mask
train_dataset = SegDataset(train_data)
valid_dataset = SegDataset(valid_data)
test_dataset = SegDataset(test_data)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)
```
6. 定义模型
```python
class SegModel(torch.nn.Module):
"""分词模型"""
def __init__(self, vocab_size: int, embedding_size: int, hidden_size: int):
super().__init__()
self.embedding = torch.nn.Embedding(vocab_size, embedding_size, padding_idx=0)
self.lstm = torch.nn.LSTM(embedding_size, hidden_size, batch_first=True, bidirectional=True)
self.linear = torch.nn.Linear(2 * hidden_size, 1)
self.sigmoid = torch.nn.Sigmoid()
def forward(self, inputs: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
embeddings = self.embedding(inputs)
outputs, _ = self.lstm(embeddings)
logits = self.linear(outputs)
logits = logits.squeeze(-1)
logits = self.sigmoid(logits)
logits = logits * mask
return logits
model = SegModel(len(word2id), 128, 256)
```
7. 定义训练函数
```python
def train(model: torch.nn.Module, optimizer: torch.optim.Optimizer, data_loader: DataLoader, device: str):
"""训练函数"""
model.train()
loss_func = torch.nn.BCELoss(reduction="none")
total_loss = 0
total_num = 0
for texts, labels, mask in tqdm(data_loader, desc="Train"):
texts = texts.to(device)
labels = labels.float().to(device)
mask = mask.float().to(device)
logits = model(texts, mask)
loss = loss_func(logits, labels)
loss = (loss * mask).sum() / mask.sum()
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item() * mask.sum().item()
total_num += mask.sum().item()
return total_loss / total_num
def evaluate(model: torch.nn.Module, data_loader: DataLoader, device: str):
"""评估函数"""
model.eval()
loss_func = torch.nn.BCELoss(reduction="none")
total_loss = 0
total_num = 0
with torch.no_grad():
for texts, labels, mask in tqdm(data_loader, desc="Evaluate"):
texts = texts.to(device)
labels = labels.float().to(device)
mask = mask.float().to(device)
logits = model(texts, mask)
loss = loss_func(logits, labels)
loss = (loss * mask).sum() / mask.sum()
total_loss += loss.item() * mask.sum().item()
total_num += mask.sum().item()
return total_loss / total_num
def train_and_evaluate(model: torch.nn.Module, optimizer: torch.optim.Optimizer, train_loader: DataLoader,
valid_loader: DataLoader, num_epochs: int, device: str):
"""训练和评估函数"""
best_valid_loss = float("inf")
for epoch in range(num_epochs):
train_loss = train(model, optimizer, train_loader, device)
valid_loss = evaluate(model, valid_loader, device)
print(f"Epoch [{epoch + 1}/{num_epochs}], Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}")
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), "best_model.pt")
model.load_state_dict(torch.load("best_model.pt"))
test_loss = evaluate(model, test_loader, device)
print(f"Test Loss: {test_loss:.4f}")
```
8. 训练模型
```python
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters())
train_and_evaluate(model, optimizer, train_loader, valid_loader, 10, device)
```
以上就是将 `seg.train` 转化为未封装的代码的步骤,你可以根据需要进行修改和调整。
flatten transformer代码
flatten transformer是一种用于自然语言处理任务的模型架构,它基于transformer模型,并在其基础上进行了改进。下面是flatten transformer的代码介绍:
首先,我们需要导入所需的库和模块:
```
import torch
import torch.nn as nn
import torch.nn.functional as F
```
接下来,定义flatten transformer的主要模块,包括Encoder、Decoder和Transformer模块。
1. Encoder模块:
```
class Encoder(nn.Module):
def __init__(self, input_dim, hid_dim, n_layers, n_heads, pf_dim, dropout):
super().__init__()
self.tok_embedding = nn.Embedding(input_dim, hid_dim)
self.pos_embedding = nn.Embedding(max_len, hid_dim)
self.layers = nn.ModuleList([EncoderLayer(hid_dim, n_heads, pf_dim, dropout) for _ in range(n_layers)])
self.dropout = nn.Dropout(dropout)
def forward(self, src, src_mask):
batch_size = src.shape
src_len = src.shape
pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(device)
src = self.dropout((self.tok_embedding(src) * math.sqrt(self.hid_dim)) + self.pos_embedding(pos))
for layer in self.layers:
src = layer(src, src_mask)
return src
```
2. Decoder模块:
```
class Decoder(nn.Module):
def __init__(self, output_dim, hid_dim, n_layers, n_heads, pf_dim, dropout):
super().__init__()
self.tok_embedding = nn.Embedding(output_dim, hid_dim)
self.pos_embedding = nn.Embedding(max_len, hid_dim)
self.layers = nn.ModuleList([DecoderLayer(hid_dim, n_heads, pf_dim, dropout) for _ in range(n_layers)])
self.fc_out = nn.Linear(hid_dim, output_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, trg, enc_src, trg_mask, src_mask):
batch_size = trg.shape
trg_len = trg.shape
pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(device)
trg = self.dropout((self.tok_embedding(trg) * math.sqrt(self.hid_dim)) + self.pos_embedding(pos))
for layer in self.layers:
trg, attention = layer(trg, enc_src, trg_mask, src_mask)
output = self.fc_out(trg)
return output, attention
```
3. Transformer模块:
```
class Transformer(nn.Module):
def __init__(self, encoder, decoder, src_pad_idx, trg_pad_idx):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.src_pad_idx = src_pad_idx
self.trg_pad_idx = trg_pad_idx
def make_src_mask(self, src):
src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
return src_mask
def make_trg_mask(self, trg):
trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(3)
trg_len = trg.shape
trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device=device)).bool()
trg_mask = trg_pad_mask & trg_sub_mask
return trg_mask
def forward(self, src, trg):
src_mask = self.make_src_mask(src)
trg_mask = self.make_trg_mask(trg)
enc_src = self.encoder(src, src_mask)
output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
return output, attention
```
以上是flatten transformer的主要代码介绍,其中包括Encoder、Decoder和Transformer模块的定义和前向传播过程。这些模块通过嵌套调用实现了flatten transformer的功能。
阅读全文