预训练模型 pred_right = torch.where(mask == 1, text, torch.tensor(ignore_index).to(device))
时间: 2023-05-31 17:02:40 浏览: 43
这段代码是用于处理预训练模型中的文本预测结果的。其中,text是模型预测的文本结果,mask是标记哪些位置需要预测的掩码,ignore_index是忽略掉的标记。torch.where函数的作用是根据掩码选择要保留的文本结果或者使用ignore_index填充掩码位置。具体来说,当mask等于1时,保留text中对应位置的结果;当mask等于0时,使用ignore_index填充对应位置。这样处理后,就可以得到模型在预测时只预测需要预测的位置,并且忽略掉不需要预测的位置的结果。
相关问题
python操作resultym.csv数据表(有Date(YYYY/MM)、TotalPrice两列数据),数据表第一行为表头信息,数据表中前27行都有数据,以此为基础,python调用resultym.csv表进行操作:循环调用以resultym.csv为数据集构建的pytorch lstm预测模型(模型实现过程:先读取shuju.csv(共有24条数据,包含Year、Month和TotalPrice三个属性),然后用scaler将TotalPrice进行归一化处理,之后定义一个函数def split_data(data, lookback):将数据集划分为测试集(0.2)和训练集(0.8),data_raw = data.to_numpy(),lookback = 4,然后再将划分完成后的测试集和训练集转换为PyTorch张量,然后定义超参数,定义算法模型model=LSTM()、损失函数和优化器(Adam)然后训练模型),该模型能够根据Date值来预测TotalPrice值,然后将第一次预测出的y_test_pred赋值给B26、将第二次预测出的值赋给B27、将第三次预测出的值赋给B28,一直循环直到求出B50的数值。每预测出一个值就在表的最后一行插入一组数据,插入的数据为:Date插入的值按照前面的年月往下延(即按照2023/03、2023/04、2023/05········2025/01的顺序),TotalPrice插入的值定义为2222222.5。直到求出第50行的数值,脚本停止运行。
首先,我们需要导入相关的库和模块:
```python
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
```
然后,我们需要读取resultym.csv文件,获取前27行数据,以及读取shuju.csv文件,获取24条数据:
```python
# 读取resultym.csv文件
data = pd.read_csv('resultym.csv', header=0, usecols=[0, 1])
data = data.iloc[:27]
# 读取shuju.csv文件
data_raw = pd.read_csv('shuju.csv', header=0, usecols=[1, 2])
```
接下来,我们需要对TotalPrice进行归一化处理,并定义split_data函数,将数据集划分为测试集和训练集:
```python
# 对TotalPrice进行归一化处理
scaler = MinMaxScaler()
data['TotalPrice'] = scaler.fit_transform(data['TotalPrice'].values.reshape(-1, 1))
# 定义split_data函数,将数据集划分为测试集和训练集
def split_data(data, lookback):
# 生成输入序列和输出序列
X, y = [], []
for i in range(len(data)-lookback-1):
a = data[i:(i+lookback), 0]
X.append(a)
y.append(data[i + lookback, 0])
return np.array(X), np.array(y)
# 将数据集划分为测试集和训练集
lookback = 4
X_train, y_train = split_data(data['TotalPrice'].values.reshape(-1, 1), lookback)
X_test, y_test = split_data(data_raw['TotalPrice'].values.reshape(-1, 1), lookback)
# 将划分完成后的测试集和训练集转换为PyTorch张量
X_train = torch.from_numpy(X_train).type(torch.Tensor)
X_test = torch.from_numpy(X_test).type(torch.Tensor)
y_train = torch.from_numpy(y_train).type(torch.Tensor)
y_test = torch.from_numpy(y_test).type(torch.Tensor)
# 对张量进行reshape操作
X_train = Variable(X_train.view(-1, 1, lookback))
X_test = Variable(X_test.view(-1, 1, lookback))
y_train = Variable(y_train)
y_test = Variable(y_test)
```
然后,我们需要定义超参数、算法模型、损失函数和优化器:
```python
# 定义超参数
input_dim = 1
hidden_dim = 2
num_layers = 1
output_dim = 1
num_epochs = 1000
learning_rate = 0.01
# 定义算法模型
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_dim)
def forward(self, x):
h0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_dim))
c0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_dim))
out, (hn, cn) = self.lstm(x, (h0.detach(), c0.detach()))
out = self.fc(out[:, -1, :])
return out
model = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
```
接下来,我们需要训练模型,并循环调用模型进行预测,将预测出的值插入到resultym.csv表格的最后一行:
```python
# 训练模型
for epoch in range(num_epochs):
outputs = model(X_train)
optimizer.zero_grad()
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
if epoch % 100 == 0:
print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
# 循环调用模型进行预测
for i in range(24):
x = X_test[i].unsqueeze(0)
y_test_pred = model(x).item()
y_test = torch.cat([y_test, torch.Tensor([y_test_pred])])
data = data.append({'Date': '2023/'+str(int(data_raw.iloc[i, 1])+1).zfill(2), 'TotalPrice': 2222222.5}, ignore_index=True)
# 将预测出的值插入到resultym.csv表格的最后一行
for i in range(26, 51):
x = data['TotalPrice'][i-lookback:i].values.reshape(-1, 1)
x = scaler.transform(x)
x = torch.from_numpy(x).type(torch.Tensor)
x = Variable(x.view(-1, 1, lookback))
y_pred = model(x).item()
y_pred = scaler.inverse_transform(np.array([[y_pred]]))
data = data.append({'Date': '2023/'+str(int(data_raw.iloc[i-lookback, 1])+1).zfill(2), 'TotalPrice': y_pred[0][0]}, ignore_index=True)
```
最后,我们将处理后的结果保存到resultym.csv文件中:
```python
# 将处理后的结果保存到resultym.csv文件中
data.to_csv('resultym.csv', index=False)
```
改进transformer基本翻译模型代码
以下是改进后的Transformer基本翻译模型代码:
```python
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
class Transformer(nn.Module):
def __init__(self, input_dim, output_dim, hid_dim, n_layers, n_heads, pf_dim, dropout, device):
super().__init__()
self.tok_embedding = nn.Embedding(input_dim, hid_dim)
self.pos_embedding = nn.Embedding(1000, hid_dim)
self.layers = nn.ModuleList([TransformerBlock(hid_dim, n_heads, pf_dim, dropout, device) for _ in range(n_layers)])
self.fc_out = nn.Linear(hid_dim, output_dim)
self.dropout = nn.Dropout(dropout)
self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
def forward(self, src, trg, src_mask, trg_mask):
# src = [batch size, src len]
# trg = [batch size, trg len]
# src_mask = [batch size, 1, 1, src len]
# trg_mask = [batch size, 1, trg len, trg len]
batch_size = trg.shape[0]
trg_len = trg.shape[1]
src_len = src.shape[1]
pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(device)
trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))
pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(device)
src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
for layer in self.layers:
trg, _ = layer(trg, src, trg_mask, src_mask)
output = self.fc_out(trg)
return output
class TransformerBlock(nn.Module):
def __init__(self, hid_dim, n_heads, pf_dim, dropout, device):
super().__init__()
self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
self.ff_layer_norm = nn.LayerNorm(hid_dim)
self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout)
self.dropout = nn.Dropout(dropout)
def forward(self, trg, src, trg_mask, src_mask):
# trg = [batch size, trg len, hid dim]
# src = [batch size, src len, hid dim]
# trg_mask = [batch size, 1, trg len, trg len]
# src_mask = [batch size, 1, 1, src len]
# self attention
_trg, _ = self.self_attention(trg, trg, trg, trg_mask)
trg = self.self_attn_layer_norm(trg + self.dropout(_trg))
# encoder attention
_trg, attention = self.encoder_attention(trg, src, src, src_mask)
trg = self.enc_attn_layer_norm(trg + self.dropout(_trg))
# positionwise feedforward
_trg = self.positionwise_feedforward(trg)
trg = self.ff_layer_norm(trg + self.dropout(_trg))
return trg, attention
class MultiHeadAttentionLayer(nn.Module):
def __init__(self, hid_dim, n_heads, dropout, device):
super().__init__()
assert hid_dim % n_heads == 0
self.hid_dim = hid_dim
self.n_heads = n_heads
self.head_dim = hid_dim // n_heads
self.fc_q = nn.Linear(hid_dim, hid_dim)
self.fc_k = nn.Linear(hid_dim, hid_dim)
self.fc_v = nn.Linear(hid_dim, hid_dim)
self.fc_o = nn.Linear(hid_dim, hid_dim)
self.dropout = nn.Dropout(dropout)
self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
def forward(self, query, key, value, mask = None):
batch_size = query.shape[0]
Q = self.fc_q(query)
K = self.fc_k(key)
V = self.fc_v(value)
Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
if mask is not None:
energy = energy.masked_fill(mask == 0, -1e10)
attention = torch.softmax(energy, dim = -1)
x = torch.matmul(self.dropout(attention), V)
x = x.permute(0, 2, 1, 3).contiguous()
x = x.view(batch_size, -1, self.hid_dim)
x = self.fc_o(x)
return x, attention
class PositionwiseFeedforwardLayer(nn.Module):
def __init__(self, hid_dim, pf_dim, dropout):
super().__init__()
self.fc_1 = nn.Linear(hid_dim, pf_dim)
self.fc_2 = nn.Linear(pf_dim, hid_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# x = [batch size, seq len, hid dim]
x = self.dropout(torch.relu(self.fc_1(x)))
x = self.fc_2(x)
# x = [batch size, seq len, hid dim]
return x
class TranslationDataset(Dataset):
def __init__(self, src_sentences, trg_sentences, src_vocab, trg_vocab):
self.src_sentences = src_sentences
self.trg_sentences = trg_sentences
self.src_vocab = src_vocab
self.trg_vocab = trg_vocab
def __len__(self):
return len(self.src_sentences)
def __getitem__(self, idx):
src_sentence = self.src_sentences[idx]
trg_sentence = self.trg_sentences[idx]
src_indexes = [self.src_vocab.stoi["<sos>"]] + [self.src_vocab.stoi[word] for word in src_sentence] + [self.src_vocab.stoi["<eos>"]]
trg_indexes = [self.trg_vocab.stoi["<sos>"]] + [self.trg_vocab.stoi[word] for word in trg_sentence] + [self.trg_vocab.stoi["<eos>"]]
return {"src": src_indexes, "trg": trg_indexes}
def train(model, iterator, optimizer, criterion, clip):
model.train()
epoch_loss = 0
for i, batch in enumerate(iterator):
src = batch["src"]
trg = batch["trg"]
src_mask = (src != SRC.vocab.stoi["<pad>"]).unsqueeze(1).unsqueeze(2)
trg_mask = (trg != TRG.vocab.stoi["<pad>"]).unsqueeze(1).unsqueeze(3)
trg_len = trg.shape[1]
trg_pad_mask = torch.ones((batch_size, 1, trg_len, trg_len), device = device)
trg_pad_mask = trg_pad_mask & trg_mask
optimizer.zero_grad()
output = model(src, trg[:,:-1], src_mask, trg_pad_mask[:,:-1,:-1,:])
output_dim = output.shape[-1]
output = output.contiguous().view(-1, output_dim)
trg = trg[:,1:].contiguous().view(-1)
loss = criterion(output, trg)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(iterator)
def evaluate(model, iterator, criterion):
model.eval()
epoch_loss = 0
with torch.no_grad():
for i, batch in enumerate(iterator):
src = batch["src"]
trg = batch["trg"]
src_mask = (src != SRC.vocab.stoi["<pad>"]).unsqueeze(1).unsqueeze(2)
trg_mask = (trg != TRG.vocab.stoi["<pad>"]).unsqueeze(1).unsqueeze(3)
trg_len = trg.shape[1]
trg_pad_mask = torch.ones((batch_size, 1, trg_len, trg_len), device = device)
trg_pad_mask = trg_pad_mask & trg_mask
output = model(src, trg[:,:-1], src_mask, trg_pad_mask[:,:-1,:-1,:])
output_dim = output.shape[-1]
output = output.contiguous().view(-1, output_dim)
trg = trg[:,1:].contiguous().view(-1)
loss = criterion(output, trg)
epoch_loss += loss.item()
return epoch_loss / len(iterator)
def translate_sentence(sentence, src_field, trg_field, model, device, max_len = 50):
model.eval()
if isinstance(sentence, str):
nlp = spacy.load("en_core_web_sm")
tokens = [token.text.lower() for token in nlp(sentence)]
else:
tokens = [token.lower() for token in sentence]
tokens = [src_field.init_token] + tokens + [src_field.eos_token]
src_indexes = [src_field.vocab.stoi[token] for token in tokens]
src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
src_mask = (src_tensor != src_field.vocab.stoi["<pad>"]).unsqueeze(1).unsqueeze(2)
with torch.no_grad():
enc_src = model.tok_embedding(src_tensor) * model.scale
enc_src += model.pos_embedding(torch.arange(0, src_tensor.shape[1]).unsqueeze(0).to(device))
for layer in model.layers:
enc_src, _ = layer(enc_src, enc_src, src_mask, src_mask)
trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]
for i in range(max_len):
trg_tensor = torch.LongTensor([trg_indexes[-1]]).unsqueeze(0).to(device)
trg_mask = (trg_tensor != trg_field.vocab.stoi["<pad>"]).unsqueeze(1).unsqueeze(2)
with torch.no_grad():
output, attention = model(enc_src, trg_tensor, src_mask, trg_mask)
pred_token = output.argmax(2)[:,-1].item()
trg_indexes.append(pred_token)
if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
break
trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
return trg_tokens[1:], attention
# 定义超参数
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
HID_DIM = 256
N_LAYERS = 3
N_HEADS = 8
PF_DIM = 512
DROPOUT = 0.1
BATCH_SIZE = 128
CLIP = 1
# 初始化模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Transformer(INPUT_DIM, OUTPUT_DIM, HID_DIM, N_LAYERS, N_HEADS, PF_DIM, DROPOUT, device).to(device)
# 定义优化器和损失函数
optimizer = optim.Adam(model.parameters(), lr=0.0005)
criterion = nn.CrossEntropyLoss(ignore_index = TRG.vocab.stoi["<pad>"])
# 定义训练和验证数据集
train_dataset = TranslationDataset(train_src_sentences, train_trg_sentences, SRC.vocab, TRG.vocab)
valid_dataset = TranslationDataset(valid_src_sentences, valid_trg_sentences, SRC.vocab, TRG.vocab)
# 定义训练和验证数据集的迭代器
train_iterator = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_iterator = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)
# 训练模型
N_EPOCHS = 10
CLIP = 1
best_valid_loss = float('inf')
for epoch in range(N_EPOCHS):
train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
valid_loss = evaluate(model, valid_iterator, criterion)
if valid_loss < best_valid_loss:
best_valid_loss = valid_loss
torch.save(model.state_dict(), 'tut6-model.pt')
print(f'Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Val. Loss: {valid_loss:.3f}')
# 测试模型
model.load_state_dict(torch.load('tut6-model.pt'))
test_dataset = TranslationDataset(test_src_sentences, test_trg_sentences, SRC.vocab, TRG.vocab)
test_iterator = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loss = evaluate(model, test_iterator, criterion)
print(f'| Test Loss: {test_loss:.3f}')
# 翻译句子
example_idx = 12
src_sentence = ' '.join(test_src_sentences[example_idx])
trg_sentence = ' '.join(test_trg_sentences[example_idx])
translation, attention = translate_sentence(src_sentence, SRC, TRG, model, device)
print(f'src = {src_sentence}')
print(f'trg = {trg_sentence}')
print(f'predicted trg = {translation}')
# 相关问题:
--相关问题--: