使用cleanlab置信实现猫狗分类图像的错误标签的查找,用pytorch框架实现resnet8的分类
时间: 2024-05-03 09:21:52 浏览: 214
1. 使用cleanlab查找错误标签
cleanlab是一个Python库,可用于检测和纠正数据集中的错误标签。下面是使用cleanlab查找猫狗分类图像中的错误标签的示例代码:
```
import cleanlab
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
# 加载数据集
train_dir = 'train/'
train_df = pd.read_csv('train.csv')
train_df['path'] = train_df['id'].map(lambda x: os.path.join(train_dir, '{}.jpg'.format(x)))
train_df['label'] = train_df['breed'].map(lambda x: class_to_idx[x])
# 定义模型和数据增强
model = torchvision.models.resnet18(pretrained=True)
model.fc = nn.Linear(512, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
train_transforms = transforms.Compose([
transforms.RandomResizedCrop(size=256, scale=(0.8, 1.0)),
transforms.RandomRotation(degrees=15),
transforms.RandomHorizontalFlip(),
transforms.CenterCrop(size=224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
# 获取训练集标签和预测概率
train_dataset = CustomDataset(train_df, train_transforms)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
train_labels = np.array(train_df['label'])
train_probs = cleanlab.util.label_model_predictions(model, train_loader)
# 查找错误标签
train_errors = cleanlab.pruning.get_noise_indices(
s=train_labels,
psx=train_probs,
sorted_index_method='normalized_margin', # 使用标准化边界排序
prune_method='prune_by_class', # 根据类别删除标签
num_to_remove_per_class=2, # 每个类别删除两个标签
seed=0
)
# 打印错误标签
print("Number of errors found:", len(train_errors))
print("Train accuracy:", accuracy_score(train_labels, np.argmax(train_probs, axis=1)))
print("Train accuracy after cleaning:", accuracy_score(train_labels[train_errors], np.argmax(train_probs[train_errors], axis=1)))
```
2. 使用pytorch实现resnet8分类模型
下面是使用pytorch实现resnet8分类模型的示例代码:
```
import torch.nn as nn
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion*planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion*planes, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion*planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet8(nn.Module):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet8, self).__init__()
self.in_planes = 16
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(16)
self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
self.linear = nn.Linear(64, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1]*(num_blocks-1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = F.avg_pool2d(out, 8)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet8():
return ResNet8(BasicBlock, [1, 1, 1], num_classes=2)
```
阅读全文