标签字段名称调整:将 file_name 和 label 改为 文件名 和 标签,以匹配 JSON 文件中的实际字段名称。按照以上要求修改代码,并将修改后代码列出
时间: 2024-10-24 16:07:00 浏览: 15
为了将 `file_name` 和 `label` 字段名称调整为 `文件名` 和 `标签`,我们需要在代码中进行相应的修改。以下是修改后的代码:
```python
import os
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
def load_data(train_dir, val_dir, label_path):
# 加载标签
with open(label_path, 'r') as f:
labels = json.load(f)
# 获取图像文件列表
image_files = [os.path.join(train_dir, img) for img in os.listdir(train_dir)]
image_labels = [labels[img.split('/')[-1]]['标签'] for img in image_files]
# 将标签转换为数值
label_mapping = {'特级': 3, '一级': 2, '二级': 1, '三级': 0}
image_labels = [label_mapping[label] for label in image_labels]
return image_files, image_labels
def preprocess_image(image_path, target_size=(224, 224)):
image = tf.io.read_file(image_path)
image = tf.image.decode_jpeg(image, channels=3)
image = tf.image.resize(image, target_size)
image = tf.cast(image, tf.float32) / 255.0
return image
def create_dataset(image_files, image_labels, batch_size=32):
dataset = tf.data.Dataset.from_tensor_slices((image_files, image_labels))
dataset = dataset.map(lambda x, y: (preprocess_image(x), y))
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
return dataset
# 路径配置
train_dir = "C:\\Users\\24067\\Desktop\\peach_split\\train"
val_dir = "C:\\Users\\24067\\Desktop\\peach_split\\val"
train_label_path = "C:\\Users\\24067\\Desktop\\train_label.json"
val_label_path = "C:\\Users\\24067\\Desktop\\val_label.json"
# 加载数据
train_files, train_labels = load_data(train_dir, val_dir, train_label_path)
val_files, val_labels = load_data(val_dir, val_dir, val_label_path)
# 创建数据集
train_dataset = create_dataset(train_files, train_labels)
val_dataset = create_dataset(val_files, val_labels)
def build_model(input_shape=(224, 224, 3), num_classes=4):
model = models.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dense(128, activation='relu'),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])
return model
model = build_model()
model.compile(optimizer=optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=1,
callbacks=[
tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
]
)
def evaluate_model(model, dataset):
predictions = []
true_labels = []
for images, labels in dataset:
preds = model.predict(images)
preds = np.argmax(preds, axis=-1)
predictions.extend(preds)
true_labels.extend(labels.numpy())
accuracy = accuracy_score(true_labels, predictions)
f1 = f1_score(true_labels, predictions, average='weighted')
print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')
evaluate_model(model, val_dataset)
def generate_submission(test_dir, model, output_path="submission.csv"):
test_files = [os.path.join(test_dir, img) for img in os.listdir(test_dir)]
test_dataset = create_dataset(test_files, [0] * len(test_files)) # Dummy labels
predictions = []
for images, _ in test_dataset:
preds = model.predict(images)
preds = np.argmax(preds, axis=-1)
predictions.extend(preds)
# 反映射标签
reverse_label_mapping = {v: k for k, v in label_mapping.items()}
predictions = [reverse_label_mapping[pred] for pred in predictions]
submission_df = pd.DataFrame({
'文件名': [os.path.basename(file) for file in test_files],
'标签': predictions
})
submission_df.to_csv(output_path, index=False)
generate_submission("C:\\Users\\24067\\Desktop\\peach_split\\test", model)
```
### 主要修改点:
1. **加载标签时**:从 `labels[img.split('/')[-1]]` 修改为 `labels[img.split('/')[-1]]['标签']`。
2. **生成提交文件时**:将 `submission_df` 中的列名从 `'file_name'` 和 `'label'` 修改为 `'文件名'` 和 `'标签'`。
这样修改后,代码将与 JSON 文件中的实际字段名称保持一致。
阅读全文