标签字段名称调整:将 file_name 和 label 改为 文件名 和 标签,以匹配 JSON 文件中的实际字段名称。将完整的代码列出
时间: 2024-10-25 16:13:43 浏览: 24
为了将 `file_name` 和 `label` 字段改为 `文件名` 和 `标签`,我们需要对代码中涉及这些字段的部分进行相应的修改。以下是修改后的完整代码:
```python
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score
# 数据路径
data_dir = 'C:/Users/24067/Desktop/peach_split'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
# 加载标签
with open('C:/Users/24067/Desktop/train_label.json', 'r') as f:
train_labels = json.load(f)
with open('C:/Users/24067/Desktop/val_label.json', 'r') as f:
val_labels = json.load(f)
# 将标签转换为DataFrame
train_df = pd.DataFrame(list(train_labels.items()), columns=['文件名', '标签'])
val_df = pd.DataFrame(list(val_labels.items()), columns=['文件名', '标签'])
# 映射标签到数字
label_map = {'特级': 0, '一级': 1, '二级': 2, '三级': 3}
train_df['标签'] = train_df['标签'].map(label_map)
val_df['标签'] = val_df['标签'].map(label_map)
# 图像数据生成器
image_size = (224, 224)
batch_size = 32
train_datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)
train_generator = train_datagen.flow_from_dataframe(
dataframe=train_df,
directory=train_dir,
x_col='文件名',
y_col='标签',
target_size=image_size,
batch_size=batch_size,
class_mode='sparse',
subset='training'
)
validation_generator = train_datagen.flow_from_dataframe(
dataframe=train_df,
directory=train_dir,
x_col='文件名',
y_col='标签',
target_size=image_size,
batch_size=batch_size,
class_mode='sparse',
subset='validation'
)
val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow_from_dataframe(
dataframe=val_df,
directory=val_dir,
x_col='文件名',
y_col='标签',
target_size=image_size,
batch_size=batch_size,
class_mode='sparse'
)
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Flatten(),
Dense(512, activation='relu'),
Dropout(0.5),
Dense(4, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(
train_generator,
steps_per_epoch=len(train_generator),
epochs=20,
validation_data=validation_generator,
validation_steps=len(validation_generator)
)
# 在验证集上评估模型
val_loss, val_acc = model.evaluate(val_generator, steps=len(val_generator))
print(f'Validation Accuracy: {val_acc:.4f}')
# 预测并计算F1值
y_true = []
y_pred = []
for i in range(len(val_generator)):
images, labels = next(val_generator)
predictions = model.predict(images)
y_true.extend(labels)
y_pred.extend(np.argmax(predictions, axis=1))
y_true = np.array(y_true)
y_pred = np.array(y_pred)
f1 = f1_score(y_true, y_pred, average=None)
print(f'F1 Scores: {f1}')
# 读取测试集数据
test_dir = os.path.join(data_dir, 'test')
test_filenames = os.listdir(test_dir)
test_df = pd.DataFrame({'文件名': test_filenames})
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
dataframe=test_df,
directory=test_dir,
x_col='文件名',
y_col=None,
target_size=image_size,
batch_size=batch_size,
class_mode=None,
shuffle=False
)
# 预测测试集
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)
predicted_labels = [list(label_map.keys())[i] for i in predicted_classes]
# 创建提交文件
submission_df = pd.DataFrame({'文件名': test_filenames, '标签': predicted_labels})
submission_df.to_csv('submission.csv', index=False)
model.save('peach_grading_model.h5')
```
在这个版本的代码中,所有涉及 `file_name` 和 `label` 的地方都已替换为 `文件名` 和 `标签`,以匹配 JSON 文件中的实际字段名称。
阅读全文