ValueError: Must provide at least one structure
时间: 2024-10-23 20:18:19 浏览: 216
从提供的错误信息来看,`ValueError: math domain error` 是由于 `math.log10` 函数尝试对一个非正数进行对数计算引起的。具体来说,这通常发生在 `self.target` 为零或负数的情况下。
在 `generate_submission` 函数中,`test_images` 可能为空,导致 `model.predict(test_images)` 失败。我们需要确保 `test_images` 不为空,并且正确处理这种情况。
以下是修改后的完整代码:
```python
import json
import os
import pandas as pd
from future.standard_library import install_aliases
install_aliases()
from pygame.examples.sprite_texture import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import accuracy_score, f1_score
import numpy as np
import cv2
# 读取 train_label.json 和 val_label.json
with open(r'C:\Users\24067\Desktop\train_label.json', 'r', encoding='utf-8') as f:
train_labels = json.load(f)
with open(r'C:\Users\24067\Desktop\val_label.json', 'r', encoding='utf-8') as f:
val_labels = json.load(f)
# 创建文件名到标签的映射
train_labels_dict = {item['文件名']: item['标签'] for item in train_labels}
val_labels_dict = {item['文件名']: item['标签'] for item in val_labels}
# 标签映射到类别索引
label_mapping = {"特级": 0, "一级": 1, "二级": 2, "三级": 3}
train_image_paths = []
train_image_labels = []
val_image_paths = []
val_image_labels = []
# 获取训练集的图片路径和标签
train_folder = r'C:\Users\24067\Desktop\peach_split\train'
for filename in os.listdir(train_folder):
if filename in train_labels_dict:
train_image_paths.append(os.path.join(train_folder, filename))
train_image_labels.append(label_mapping[train_labels_dict[filename]])
# 获取验证集的图片路径和标签
val_folder = r'C:\Users\24067\Desktop\peach_split\val'
for filename in os.listdir(val_folder):
if filename in val_labels_dict:
val_image_paths.append(os.path.join(val_folder, filename))
val_image_labels.append(label_mapping[val_labels_dict[filename]])
# 将文件路径列表转换为图像数组
def load_images_from_paths(image_paths, target_size=(224, 224)):
images = []
for path in image_paths:
img = load_img(path, target_size=target_size)
img_array = img_to_array(img)
images.append(img_array)
return np.array(images)
# 加载训练集图片并转换为数组
train_images = load_images_from_paths(train_image_paths)
val_images = load_images_from_paths(val_image_paths)
# 将标签转换为 NumPy 数组
train_labels = np.array(train_image_labels)
val_labels = np.array(val_image_labels)
# 将标签转换为 one-hot 编码
train_labels = to_categorical(train_labels, num_classes=4)
val_labels = to_categorical(val_labels, num_classes=4)
# 定义数据增强器
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
horizontal_flip=True
)
val_datagen = ImageDataGenerator(rescale=1./255)
# 训练数据生成器
train_generator = train_datagen.flow(
x=train_images,
y=train_labels,
batch_size=32
)
# 验证数据生成器
val_generator = val_datagen.flow(
x=val_images,
y=val_labels,
batch_size=32
)
# 测试数据生成器 (注意:测试集没有类别标签)
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(
directory=r'C:\Users\24067\Desktop\peach_split\test',
target_size=(224, 224),
batch_size=32,
class_mode=None, # 因为测试集没有标签
shuffle=False
)
# 加载预训练的 ResNet50 模型,不包含顶层
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
# 构建自定义分类层
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(4, activation='softmax')(x) # 4类
# 构建模型
model = Model(inputs=base_model.input, outputs=predictions)
# 冻结预训练模型的卷积层
for layer in base_model.layers:
layer.trainable = False
# 编译模型
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# 提前停止回调,当验证损失在 3 个 epoch 内没有改善时停止训练
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
# 训练模型
history = model.fit(
train_generator,
epochs=1, # 设置为你需要的迭代次数
validation_data=val_generator,
callbacks=[early_stopping]
)
# 评估模型
def evaluate_model(model, x, y_true):
y_pred = model.predict(x)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_true, axis=1)
accuracy = accuracy_score(y_true_classes, y_pred_classes)
f1 = f1_score(y_true_classes, y_pred_classes, average='weighted')
print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')
evaluate_model(model, val_images, val_labels)
# 生成提交文件
def generate_submission(model, test_dir, submission_path):
test_images = []
filenames = []
for filename in os.listdir(test_dir):
if filename.endswith('.png'):
img_path = os.path.join(test_dir, filename)
image = cv2.imread(img_path)
image = cv2.resize(image, (224, 224)) # 修改为目标尺寸
test_images.append(image)
filenames.append(filename)
if not test_images:
print("No images found in the test directory.")
return
test_images = np.array(test_images) / 255.0
predictions = model.predict(test_images)
predicted_classes = np.argmax(predictions, axis=1)
inverse_label_mapping = {v: k for k, v in label_mapping.items()}
predicted_labels = [inverse_label_mapping[pred] for pred in predicted_classes]
submission_df = pd.DataFrame({'文件名': filenames, '标签': predicted_labels})
submission_df.to_csv(submission_path, index=False)
generate_submission(model, r'C:\Users\24067\Desktop\peach_split\test', 'submission.csv')
```
### 关键修改点:
1. **检查 `test_images` 是否为空**:在 `generate_submission` 函数中添加了对 `test_images` 是否为空的检查,如果为空则输出提示信息并返回。
2. **统一目标尺寸**:将 `cv2.resize` 的目标尺寸统一为 `(224, 224)`,与训练和验证集保持一致。
这些修改应该可以解决 `ValueError: math domain error` 的问题,并确保代码能够正常运行。
阅读全文