在matlab上用cnn对nsl-kdd数据集进行训练完整代码
时间: 2023-06-18 22:07:55 浏览: 489
NSL_KDD数据集
4星 · 用户满意度95%
以下使用Matlab中的CNN对NSL-KDD数据集进行训练的完整代码,包含数据预处理、模型定义、训练和测试。
首先,需要下载NSL-KDD数据集并解压缩。该数据集包含4个文件,分别为:
- KDDTrain+.txt:训练集数据
- KDDTest+.txt:测试集数据
- KDDTrain+_20Percent.txt:20%的训练集数据
- KDDTest-21.txt:未见过的测试集数据
本示例使用KDDTrain+.txt和KDDTest+.txt进行训练和测试。
代码如下:
```matlab
%% 数据预处理
% 加载训练数据
train_data = readtable('KDDTrain+.txt','Delimiter',',','ReadVariableNames',false);
train_data.Properties.VariableNames = {'duration','protocol_type','service','flag','src_bytes','dst_bytes','land','wrong_fragment','urgent','hot',...
'num_failed_logins','logged_in','num_compromised','root_shell','su_attempted','num_root','num_file_creations','num_shells','num_access_files','num_outbound_cmds','is_host_login','is_guest_login',...
'count','srv_count','serror_rate','srv_serror_rate','rerror_rate','srv_rerror_rate','same_srv_rate','diff_srv_rate','srv_diff_host_rate',...
'dst_host_count','dst_host_srv_count','dst_host_same_srv_rate','dst_host_diff_srv_rate','dst_host_same_src_port_rate','dst_host_srv_diff_host_rate','dst_host_serror_rate','dst_host_srv_serror_rate','dst_host_rerror_rate','dst_host_srv_rerror_rate','attack_type','difficulty_level'};
% 加载测试数据
test_data = readtable('KDDTest+.txt','Delimiter',',','ReadVariableNames',false);
test_data.Properties.VariableNames = train_data.Properties.VariableNames;
% 将攻击类型替换为类别编号
attack_types = unique(train_data.attack_type);
num_attack_types = length(attack_types);
for i = 1:num_attack_types
idx = strcmp(train_data.attack_type, attack_types(i));
train_data.attack_type(idx) = {sprintf('attack%d',i)};
test_data.attack_type(strcmp(test_data.attack_type, attack_types(i))) = {sprintf('attack%d',i)};
end
% 将数据转换为表格数组
train_data = table2array(train_data);
test_data = table2array(test_data);
% 将分类变量转换为数值变量
protocol_types = unique([train_data(:,2); test_data(:,2)]);
num_protocol_types = length(protocol_types);
service_types = unique([train_data(:,3); test_data(:,3)]);
num_service_types = length(service_types);
flag_types = unique([train_data(:,4); test_data(:,4)]);
num_flag_types = length(flag_types);
for i = 1:length(train_data)
train_data(i,2) = find(strcmp(protocol_types,train_data(i,2)));
train_data(i,3) = find(strcmp(service_types,train_data(i,3)));
train_data(i,4) = find(strcmp(flag_types,train_data(i,4)));
end
for i = 1:length(test_data)
test_data(i,2) = find(strcmp(protocol_types,test_data(i,2)));
test_data(i,3) = find(strcmp(service_types,test_data(i,3)));
test_data(i,4) = find(strcmp(flag_types,test_data(i,4)));
end
% 将数据分为特征和标签
train_features = train_data(:,1:end-2);
train_labels = train_data(:,end-1:end);
test_features = test_data(:,1:end-2);
test_labels = test_data(:,end-1:end);
% 将数据归一化
[train_features, mu, sigma] = zscore(train_features);
test_features = (test_features - mu) ./ sigma;
% 将标签转换为分类数组
train_labels = categorical(train_labels(:,1), 0:num_attack_types);
test_labels = categorical(test_labels(:,1), 0:num_attack_types);
% 将数据转换为图像
image_size = 32;
num_channels = 1;
train_images = zeros(size(train_features,1),image_size,image_size,num_channels);
test_images = zeros(size(test_features,1),image_size,image_size,num_channels);
for i = 1:size(train_features,1)
img = reshape(train_features(i,:),[image_size,image_size,num_channels]);
train_images(i,:,:,:) = img;
end
for i = 1:size(test_features,1)
img = reshape(test_features(i,:),[image_size,image_size,num_channels]);
test_images(i,:,:,:) = img;
end
%% 模型定义
layers = [
imageInputLayer([image_size image_size num_channels])
% 第1个卷积层
convolution2dLayer(5,32,'Padding',2)
batchNormalizationLayer
reluLayer
% 第2个卷积层
convolution2dLayer(5,32,'Padding',2)
batchNormalizationLayer
reluLayer
% 最大池化层
maxPooling2dLayer(2,'Stride',2)
% 第3个卷积层
convolution2dLayer(5,64,'Padding',2)
batchNormalizationLayer
reluLayer
% 第4个卷积层
convolution2dLayer(5,64,'Padding',2)
batchNormalizationLayer
reluLayer
% 最大池化层
maxPooling2dLayer(2,'Stride',2)
% 第5个卷积层
convolution2dLayer(5,128,'Padding',2)
batchNormalizationLayer
reluLayer
% 第6个卷积层
convolution2dLayer(5,128,'Padding',2)
batchNormalizationLayer
reluLayer
% 最大池化层
maxPooling2dLayer(2,'Stride',2)
% 全连接层
fullyConnectedLayer(128)
reluLayer
fullyConnectedLayer(num_attack_types)
softmaxLayer
classificationLayer];
options = trainingOptions('adam',...
'MaxEpochs',20,...
'MiniBatchSize',100,...
'Plots','training-progress');
%% 训练模型
net = trainNetwork(train_images,train_labels,layers,options);
%% 测试模型
predicted_labels = classify(net,test_images);
accuracy = sum(predicted_labels == test_labels) / length(test_labels);
fprintf('Accuracy = %.2f%%\n',accuracy*100);
```
此代码将NSL-KDD数据集转换为图像,使用CNN模型进行训练,并输出测试集的准确率。可以根据需要更改模型架构、训练选项等参数。
阅读全文