def load_data(filename, sample_num, sample_len)是什么意思
时间: 2024-03-04 07:53:29 浏览: 91
这是一个函数,它从指定的文件名中加载数据,并返回一个列表,其中包含指定数量的样本,每个样本的长度为指定的长度。具体来说:
- filename:指定要加载数据的文件名。
- sample_num:指定要加载的样本数量。
- sample_len:指定每个样本的长度。
函数会返回一个包含sample_num个元素的列表,每个元素都是一个长度为sample_len的样本。
相关问题
标签字段名称调整:将 file_name 和 label 改为 文件名 和 标签,以匹配 JSON 文件中的实际字段名称。
为了使代码与 JSON 文件中的实际字段名称匹配,需要对以下部分进行修改:
1. **加载标签数据时**:
- 将 `file_name` 字段改为 `文件名`。
- 将 `label` 字段改为 `标签`。
2. **生成提交文件时**:
- 将 `filename` 列改为 `文件名`。
- 将 `label` 列改为 `标签`。
以下是修改后的代码:
```python
import os
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
# 数据集路径
data_dir = 'C:/Users/24067/Desktop/peach_split'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
test_dir = os.path.join(data_dir, 'test')
# 标签文件路径
train_label_path = 'C:/Users/24067/Desktop/train_label.json'
val_label_path = 'C:/Users/24067/Desktop/val_label.json'
# 加载标签数据
with open(train_label_path, 'r') as f:
train_labels = json.load(f)
with open(val_label_path, 'r') as f:
val_labels = json.load(f)
# 调整标签字典的键值
train_labels = {item['文件名']: item['标签'] for item in train_labels}
val_labels = {item['文件名']: item['标签'] for item in val_labels}
# 定义数据集类
class PeachDataset(Dataset):
def __init__(self, data_dir, label_dict, transform=None):
self.data_dir = data_dir
self.label_dict = label_dict
self.transform = transform
self.image_files = list(label_dict.keys())
def __len__(self):
return len(self.image_files)
def __getitem__(self, idx):
img_name = self.image_files[idx]
img_path = os.path.join(self.data_dir, img_name)
image = Image.open(img_path).convert('RGB')
label = self.label_dict[img_name]
if self.transform:
image = self.transform(image)
return image, label
# 数据预处理
transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 创建数据集对象
train_dataset = PeachDataset(train_dir, train_labels, transform=transform)
val_dataset = PeachDataset(val_dir, val_labels, transform=transform)
# 创建数据加载器
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
# 定义模型
model = models.resnet18(pretrained=True)
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, 4) # 4个类别:特级、一级、二级、三级
model = model.to('cuda' if torch.cuda.is_available() else 'cpu')
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
def train_model(model, criterion, optimizer, num_epochs=10):
for epoch in range(num_epochs):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.to('cuda' if torch.cuda.is_available() else 'cpu'), labels.to('cuda' if torch.cuda.is_available() else 'cpu')
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')
# 评估模型
def evaluate_model(model, dataloader):
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
for inputs, labels in dataloader:
inputs, labels = inputs.to('cuda' if torch.cuda.is_available() else 'cpu'), labels.to('cuda' if torch.cuda.is_available() else 'cpu')
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
all_preds.extend(preds.cpu().numpy())
all_labels.extend(labels.cpu().numpy())
accuracy = accuracy_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds, average='weighted')
return accuracy, f1
# 训练模型
train_model(model, criterion, optimizer, num_epochs=10)
# 评估模型
accuracy, f1 = evaluate_model(model, val_loader)
print(f'Validation Accuracy: {accuracy:.4f}')
print(f'Validation F1 Score: {f1:.4f}')
# 保存模型
torch.save(model.state_dict(), 'peach_grading_model.pth')
# 生成提交文件
def generate_submission(model, test_dir, sample_submission_path):
model.eval()
submission = pd.read_csv(sample_submission_path)
test_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
with torch.no_grad():
for i, filename in enumerate(submission['文件名']):
img_path = os.path.join(test_dir, filename)
image = Image.open(img_path).convert('RGB')
image = test_transform(image).unsqueeze(0).to('cuda' if torch.cuda.is_available() else 'cpu')
output = model(image)
_, pred = torch.max(output, 1)
submission.at[i, '标签'] = int(pred.item())
submission.to_csv('submission.csv', index=False)
# 生成并保存提交文件
generate_submission(model, test_dir, 'C:/Users/24067/Desktop/sample_submission.csv')
```
### 主要修改点:
1. **加载标签数据时**:
```python
train_labels = {item['文件名']: item['标签'] for item in train_labels}
val_labels = {item['文件名']: item['标签'] for item in val_labels}
```
2. **生成提交文件时**:
```python
for i, filename in enumerate(submission['文件名']):
...
submission.at[i, '标签'] = int(pred.item())
```
这些修改确保了代码中的字段名称与 JSON 文件中的实际字段名称一致。
阅读全文