pytorch实现LSTM训练模型,使用NSS-KDD数据集
时间: 2023-12-09 14:06:32 浏览: 80
以下是使用PyTorch实现LSTM训练模型并使用NSS-KDD数据集的示例代码:
```python
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
# load dataset
train_df = pd.read_csv('KDDTrain+.txt', header=None)
test_df = pd.read_csv('KDDTest+.txt', header=None)
# preprocess dataset
train_df[41] = train_df[41].apply(lambda x: 0 if x == 'normal' else 1)
test_df[41] = test_df[41].apply(lambda x: 0 if x == 'normal' else 1)
train_x = train_df.iloc[:, :-1].values
train_y = train_df.iloc[:, -1].values
test_x = test_df.iloc[:, :-1].values
test_y = test_df.iloc[:, -1].values
# define dataset class
class NSLKDDDataset(Dataset):
def __init__(self, x, y):
self.x = torch.tensor(x, dtype=torch.float32)
self.y = torch.tensor(y, dtype=torch.float32)
def __len__(self):
return len(self.x)
def __getitem__(self, idx):
return self.x[idx], self.y[idx]
# define LSTM model
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, output_size):
super(LSTMModel, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device)
out, _ = self.lstm(x, (h0, c0))
out = self.fc(out[:, -1, :])
return out
# train function
def train(model, dataloader, criterion, optimizer):
model.train()
total_loss = 0.0
for i, (inputs, labels) in enumerate(dataloader):
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
total_loss += loss.item()
return total_loss / len(dataloader)
# evaluate function
def evaluate(model, dataloader, criterion):
model.eval()
total_loss = 0.0
with torch.no_grad():
for i, (inputs, labels) in enumerate(dataloader):
inputs = inputs.to(device)
labels = labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
total_loss += loss.item()
return total_loss / len(dataloader)
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# set hyperparameters
input_size = train_x.shape[1]
hidden_size = 128
num_layers = 2
output_size = 1
batch_size = 64
learning_rate = 0.001
num_epochs = 10
# create datasets and dataloaders
train_dataset = NSLKDDDataset(train_x, train_y)
test_dataset = NSLKDDDataset(test_x, test_y)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
# create model, criterion and optimizer
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# train and evaluate model
for epoch in range(num_epochs):
train_loss = train(model, train_dataloader, criterion, optimizer)
test_loss = evaluate(model, test_dataloader, criterion)
print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')
```
在此示例中,我们首先加载NSS-KDD数据集,并将其预处理为包含输入和标签的NumPy数组。然后,我们定义了一个PyTorch数据集类,该类将输入和标签转换为PyTorch张量,并使其可用于PyTorch的DataLoader。接下来,我们定义了LSTM模型,使用两个LSTM层和一个全连接层来预测二元分类标签。然后,我们定义了训练和评估函数,分别用于训练和评估模型。最后,我们设置了设备(CPU或GPU),定义了超参数,创建了数据集和数据加载器,创建了模型、损失函数和优化器,然后进行训练和评估。运行此代码将在控制台中显示训练和测试损失。
阅读全文