用堆叠自编码器进行对指定文件夹内的4类彩色图像进行分类并可视化混淆矩阵python代码
时间: 2024-05-10 07:14:30 浏览: 103
以下是使用堆叠自编码器进行图像分类和可视化混淆矩阵的Python代码:
```
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout, Input, UpSampling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.utils import plot_model
# 设置图像大小
IMG_SIZE = 100
# 读取数据
def load_data(folder_dir):
data = []
labels = []
for label, folder_name in enumerate(os.listdir(folder_dir)):
folder_path = os.path.join(folder_dir, folder_name)
for filename in os.listdir(folder_path):
img_path = os.path.join(folder_path, filename)
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
data.append(img)
labels.append(label)
return np.array(data), np.array(labels)
X, y = load_data('path/to/folder') # 替换为你的文件夹路径
# 将标签转化为独热编码
y = tf.keras.utils.to_categorical(y, num_classes=4)
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 堆叠自编码器模型
input_img = Input(shape=(IMG_SIZE, IMG_SIZE, 3))
# 编码部分
x = Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
# 解码部分
x = Conv2D(16, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
# 训练自编码器
autoencoder.fit(X_train, X_train, epochs=50, batch_size=64, shuffle=True, validation_data=(X_test, X_test))
# 提取特征
encoder = Model(input_img, encoded)
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)
# 分类模型
model = Sequential([
Dense(256, activation='relu', input_shape=(25, 25, 16)),
Flatten(),
Dropout(0.5),
Dense(4, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# 训练分类模型
model.fit(X_train_encoded, y_train, epochs=50, batch_size=64, shuffle=True, validation_data=(X_test_encoded, y_test))
# 预测测试集
y_pred = model.predict(X_test_encoded)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
# 可视化混淆矩阵
cm = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(cm, index = ['class1', 'class2', 'class3', 'class4'], columns = ['class1', 'class2', 'class3', 'class4'])
plt.figure(figsize = (10,7))
sns.heatmap(df_cm, annot=True)
plt.show()
```
注意替换代码中的文件夹路径为你的实际路径,并根据需要调整超参数。
阅读全文