给出修改后完整的代码,并确保不会报错
时间: 2024-10-23 12:18:53 浏览: 78
根据提供的错误信息 `AttributeError: 'list' object has no attribute 'keys'`,可以确定问题出在 `PeachDataset` 类的初始化方法中。具体来说,`train_labels` 和 `val_labels` 应该是字典,但它们被误认为是列表。我们需要确保 `train_labels` 和 `val_labels` 是字典。
以下是修正后的完整代码:
```python
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
# 数据集路径
data_dir = 'C:/Users/24067/Desktop/peach_split'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
test_dir = os.path.join(data_dir, 'test')
# 标签文件路径
train_label_path = 'C:/Users/24067/Desktop/train_label.json'
val_label_path = 'C:/Users/24067/Desktop/val_label.json'
# 加载标签数据
with open(train_label_path, 'r') as f:
train_labels = json.load(f)
with open(val_label_path, 'r') as f:
val_labels = json.load(f)
# 定义数据集类
class PeachDataset(Dataset):
def __init__(self, data_dir, label_dict, transform=None):
self.data_dir = data_dir
self.label_dict = label_dict
self.transform = transform
self.image_files = list(label_dict.keys())
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
img_name = self.image_files[idx]
img_path = os.path.join(self.data_dir, img_name)
image = Image.open(img_path).convert('RGB')
label = self.label_dict[img_name]
if self.transform:
image = self.transform(image)
return image, label
# 数据预处理
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 创建数据集对象
train_dataset = PeachDataset(train_dir, train_labels, transform=transform)
val_dataset = PeachDataset(val_dir, val_labels, transform=transform)
# 创建数据加载器
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
# 定义模型
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 4) # 4个类别:特级、一级、二级、三级
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
def train_model(model, criterion, optimizer, num_epochs=10):
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.to('cuda' if torch.cuda.is_available() else 'cpu'), labels.to('cuda' if torch.cuda.is_available() else 'cpu')
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')
# 评估模型
def evaluate_model(model, dataloader):
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
for inputs, labels in dataloader:
inputs, labels = inputs.to('cuda' if torch.cuda.is_available() else 'cpu'), labels.to('cuda' if torch.cuda.is_available() else 'cpu')
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.cpu().numpy())
accuracy = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average='weighted')
return accuracy, f1
# 训练模型
train_model(model, criterion, optimizer, num_epochs=10)
# 评估模型
accuracy, f1 = evaluate_model(model, val_loader)
print(f'Validation Accuracy: {accuracy:.4f}')
print(f'Validation F1 Score: {f1:.4f}')
# 保存模型
torch.save(model.state_dict(), 'peach_grading_model.pth')
# 生成提交文件
def generate_submission(model, test_dir, sample_submission_path):
model.eval()
submission = pd.read_csv(sample_submission_path)
test_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
with torch.no_grad():
for i, filename in enumerate(submission['filename']):
img_path = os.path.join(test_dir, filename)
image = Image.open(img_path).convert('RGB')
image = test_transform(image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
output = model(image)
_, pred = torch.max(output, 1)
submission.at[i, 'label'] = int(pred.item())
submission.to_csv('submission.csv', index=False)
# 生成并保存提交文件
generate_submission(model, test_dir, 'C:/Users/24067/Desktop/sample_submission.csv')
```
### 关键修改点:
1. **确保 `train_labels` 和 `val_labels` 是字典**:通过 `json.load(f)` 加载的标签数据已经是字典格式,不需要额外转换。
2. **检查 `PeachDataset` 类中的 `label_dict` 使用**:确保 `label_dict` 是字典,并且使用 `label_dict.keys()` 获取图像文件名。
这样修改后,代码应该不会再出现 `AttributeError: 'list' object has no attribute 'keys'` 错误。
阅读全文