用堆叠自编码器实现对指定文件夹内的4类彩色图像进行分类并可视化混淆矩阵python代码
时间: 2024-04-29 17:26:56 浏览: 12
以下是用堆叠自编码器实现对指定文件夹内的4类彩色图像进行分类并可视化混淆矩阵的Python代码:
```python
# 导入所需的库
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
# 定义函数:将图片转换为数组
def image_to_array(directory, img_size):
X = []
y = []
for folder in os.listdir(directory):
folder_path = os.path.join(directory, folder)
for file in os.listdir(folder_path):
file_path = os.path.join(folder_path, file)
img = Image.open(file_path)
img = img.resize((img_size, img_size))
img_array = np.array(img)
X.append(img_array)
y.append(folder)
X = np.array(X)
y = np.array(y)
return X, y
# 加载数据集
directory = '/path/to/directory' # 指定文件夹路径
img_size = 64 # 图片尺寸
X, y = image_to_array(directory, img_size)
# 将标签编码为数字
label_dict = {'class1': 0, 'class2': 1, 'class3': 2, 'class4': 3}
y = np.array([label_dict[label] for label in y])
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 定义堆叠自编码器模型
input_img = Input(shape=(img_size, img_size, 3))
x = Dense(256, activation='relu')(input_img)
x = Dense(128, activation='relu')(x)
encoded = Dense(64, activation='relu')(x)
x = Dense(128, activation='relu')(encoded)
x = Dense(256, activation='relu')(x)
decoded = Dense(3, activation='sigmoid')(x)
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
# 训练自编码器模型
autoencoder.fit(X_train, X_train, epochs=10, batch_size=32, validation_data=(X_test, X_test))
# 提取特征向量
encoder = Model(input_img, encoded)
X_train_encoded = encoder.predict(X_train)
X_test_encoded = encoder.predict(X_test)
# 定义分类器模型
input_encoded = Input(shape=(64,))
x = Dense(32, activation='relu')(input_encoded)
output = Dense(4, activation='softmax')(x)
classifier = Model(input_encoded, output)
classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# 训练分类器模型
classifier.fit(X_train_encoded, y_train, epochs=10, batch_size=32, validation_data=(X_test_encoded, y_test))
# 预测测试集
y_pred = classifier.predict(X_test_encoded)
y_pred = np.argmax(y_pred, axis=1)
# 可视化混淆矩阵
cm = confusion_matrix(y_test, y_pred)
labels = ['class1', 'class2', 'class3', 'class4']
sns.heatmap(cm, annot=True, cmap='Blues', xticklabels=labels, yticklabels=labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()
```
需要注意的是,上述代码中的 `'/path/to/directory'` 需要替换为实际的文件夹路径。此外,代码中的堆叠自编码器和分类器模型仅作示例用途,实际使用时需要根据数据集的特点进行调整。