基于pytorch对excel数据进行预处理,建立多通道一维卷积神经网络模型,包括数据导入,数据集划分,数据标准化,模型的训练,测试集和验证集的导入,准确率、丢失以及混淆矩阵图的绘制
时间: 2024-05-04 18:20:37 浏览: 120
首先,你需要安装以下的库:
```python
pip install pandas
pip install numpy
pip install torch
pip install torchvision
pip install sklearn
pip install matplotlib
```
然后,我们可以开始处理excel数据:
```python
import pandas as pd
import numpy as np
# 读取excel文件
data = pd.read_excel('data.xlsx', header=None)
# 拆分特征和标签
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# 数据集划分
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 数据标准化
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 转换成张量
import torch
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).long()
y_test = torch.from_numpy(y_test).long()
```
接下来,我们可以建立多通道一维卷积神经网络模型:
```python
import torch.nn as nn
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3)
self.pool1 = nn.MaxPool1d(kernel_size=2)
self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
self.pool2 = nn.MaxPool1d(kernel_size=2)
self.fc1 = nn.Linear(in_features=64 * 23, out_features=128)
self.fc2 = nn.Linear(in_features=128, out_features=1)
self.drop = nn.Dropout(p=0.5)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool2(x)
x = x.view(-1, 64 * 23)
x = self.fc1(x)
x = self.relu(x)
x = self.drop(x)
x = self.fc2(x)
return x
net = Net()
```
然后,我们可以训练模型:
```python
import torch.optim as optim
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
epochs = 50
for epoch in range(epochs):
running_loss = 0.0
net.train()
for i, data in enumerate(train_loader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs.unsqueeze(1))
loss = criterion(outputs.squeeze(), labels.float())
loss.backward()
optimizer.step()
running_loss += loss.item()
net.eval()
correct = 0
total = 0
with torch.no_grad():
for data in val_loader:
inputs, labels = data
outputs = net(inputs.unsqueeze(1))
predicted = torch.round(torch.sigmoid(outputs.squeeze()))
total += labels.size(0)
correct += (predicted == labels.float()).sum().item()
print('[%d, %5d] loss: %.3f val_acc: %.3f' %
(epoch + 1, i + 1, running_loss / len(train_loader), 100 * correct / total))
```
最后,我们可以测试模型并绘制混淆矩阵图:
```python
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
net.eval()
test_outputs = net(X_test.unsqueeze(1))
test_predicted = torch.round(torch.sigmoid(test_outputs.squeeze()))
test_total = y_test.size(0)
test_correct = (test_predicted == y_test.float()).sum().item()
print('test_acc: %.3f' % (100 * test_correct / test_total))
cm = confusion_matrix(y_test, test_predicted)
plt.imshow(cm, cmap='binary')
plt.show()
```
完整代码如下:
```python
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
# 读取excel文件
data = pd.read_excel('data.xlsx', header=None)
# 拆分特征和标签
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# 数据集划分
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 数据标准化
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 转换成张量
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).long()
y_test = torch.from_numpy(y_test).long()
# 建立数据集和数据加载器
from torch.utils.data import TensorDataset, DataLoader
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
val_data = TensorDataset(X_test, y_test)
val_loader = DataLoader(val_data, batch_size=64, shuffle=False)
# 建立多通道一维卷积神经网络模型
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3)
self.pool1 = nn.MaxPool1d(kernel_size=2)
self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3)
self.pool2 = nn.MaxPool1d(kernel_size=2)
self.fc1 = nn.Linear(in_features=64 * 23, out_features=128)
self.fc2 = nn.Linear(in_features=128, out_features=1)
self.drop = nn.Dropout(p=0.5)
self.relu = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.pool1(x)
x = self.conv2(x)
x = self.relu(x)
x = self.pool2(x)
x = x.view(-1, 64 * 23)
x = self.fc1(x)
x = self.relu(x)
x = self.drop(x)
x = self.fc2(x)
return x
net = Net()
# 训练模型
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
epochs = 50
for epoch in range(epochs):
running_loss = 0.0
net.train()
for i, data in enumerate(train_loader, 0):
inputs, labels = data
optimizer.zero_grad()
outputs = net(inputs.unsqueeze(1))
loss = criterion(outputs.squeeze(), labels.float())
loss.backward()
optimizer.step()
running_loss += loss.item()
net.eval()
correct = 0
total = 0
with torch.no_grad():
for data in val_loader:
inputs, labels = data
outputs = net(inputs.unsqueeze(1))
predicted = torch.round(torch.sigmoid(outputs.squeeze()))
total += labels.size(0)
correct += (predicted == labels.float()).sum().item()
print('[%d, %5d] loss: %.3f val_acc: %.3f' %
(epoch + 1, i + 1, running_loss / len(train_loader), 100 * correct / total))
# 测试模型并绘制混淆矩阵图
net.eval()
test_outputs = net(X_test.unsqueeze(1))
test_predicted = torch.round(torch.sigmoid(test_outputs.squeeze()))
test_total = y_test.size(0)
test_correct = (test_predicted == y_test.float()).sum().item()
print('test_acc: %.3f' % (100 * test_correct / test_total))
cm = confusion_matrix(y_test, test_predicted)
plt.imshow(cm, cmap='binary')
plt.show()
```
阅读全文