for i in range(len(image)): if (label[i] == '0') & (len(train_label_0) < 641): train_image_0.append(image[i]) train_label_0.append(label[i]) continue if (label[i] == '1') & (len(train_label_1) < 699): train_image_1.append(image[i]) train_label_1.append(label[i]) continue test_image.append(image[i]) #剩余的部分作为测试数据(1000+1000 = 2000) test_label.append(label[i])
时间: 2024-04-18 09:30:58 浏览: 76
这段代码是一个数据分割的逻辑,根据给定的图像和标签数据,将其分割为训练数据和测试数据。首先,通过迭代图像列表的索引,根据标签值将图像和标签分别添加到对应的训练数据列表train_image_0、train_label_0、train_image_1、train_label_1中。其中,如果标签为'0'且train_label_0列表长度小于641,则将图像和标签添加到train_image_0和train_label_0中;如果标签为'1'且train_label_1列表长度小于699,则将图像和标签添加到train_image_1和train_label_1中。如果以上条件都不满足,则将图像和标签添加到测试数据列表test_image和test_label中。最终,得到的训练数据列表包含641个标签为'0'的样本和699个标签为'1'的样本,测试数据列表包含剩余的样本(2000个)。
相关问题
def get_Image_dim_len(png_dir: str,jpg_dir:str): png = Image.open(png_dir) png_w,png_h=png.width,png.height #若第十行报错,说明jpg图片没有对应的png图片 png_dim_len = len(np.array(png).shape) assert png_dim_len==2,"提示:存在三维掩码图" jpg=Image.open(jpg_dir) jpg = ImageOps.exif_transpose(jpg) jpg.save(jpg_dir) jpg_w,jpg_h=jpg.width,jpg.height print(jpg_w,jpg_h,png_w,png_h) assert png_w==jpg_w and png_h==jpg_h,print("提示:%s mask图与原图宽高参数不一致"%(png_dir)) """2.读取单个图像均值和方差""" def pixel_operation(image_path: str): img = cv.imread(image_path, cv.IMREAD_COLOR) means, dev = cv.meanStdDev(img) return means,dev """3.分割数据集,生成label文件""" # 原始数据集 ann上一级 data_root = './work/voc_data02' #图像地址 image_dir="./JPEGImages" # ann图像文件夹 ann_dir = "./SegmentationClass" # txt文件保存路径 split_dir = './ImageSets/Segmentation' mmengine.mkdir_or_exist(osp.join(data_root, split_dir)) png_filename_list = [osp.splitext(filename)[0] for filename in mmengine.scandir( osp.join(data_root, ann_dir), suffix='.png')] jpg_filename_list=[osp.splitext(filename)[0] for filename in mmengine.scandir( osp.join(data_root, image_dir), suffix='.jpg')] assert len(jpg_filename_list)==len(png_filename_list),"提示:原图与掩码图数量不统一" print("数量检查无误") for i in range(10): random.shuffle(jpg_filename_list) red_num=0 black_num=0 with open(osp.join(data_root, split_dir, 'trainval.txt'), 'w+') as f: length = int(len(jpg_filename_list)) for line in jpg_filename_list[:length]: pngpath=osp.join(data_root,ann_dir,line+'.bmp') jpgpath=osp.join(data_root,image_dir,line+'.bmp') get_Image_dim_len(pngpath,jpgpath) img=cv.imread(pngpath,cv.IMREAD_GRAYSCALE) red_num+=len(img)*len(img[0])-len(img[img==0]) black_num+=len(img[img==0]) f.writelines(line + '\n') value=0 train_mean,train_dev=[[0.0,0.0,0.0]],[[0.0,0.0,0.0]] with open(osp.join(data_root, split_dir, 'train.txt'), 'w+') as f: train_length = int(len(jpg_filename_list) * 7/ 10) for line in jpg_filename_list[:train_length]: jpgpath=osp.join(data_root,image_dir,line+'.bmp') mean,dev=pixel_operation(jpgpath) train_mean+=mean train_dev+=dev f.writelines(line + '\n') with open(osp.join(data_root, split_dir, 'val.txt'), 'w+') as f: for line in jpg_filename_list[train_length:]: jpgpath=osp.join(data_root,image_dir,line+'.bmp') mean,dev=pixel_operation(jpgpath) train_mean+=mean train_dev+=dev f.writelines(line + '\n') 帮我把这段代码改成bmp图像可以制作数据集的代码
当然,我可以帮你将代码修改为支持bmp图像的数据集制作代码。请看下面的修改后的代码:
```python
from PIL import Image
import os
import random
import cv2 as cv
import numpy as np
def get_Image_dim_len(png_dir: str,jpg_dir:str):
png = Image.open(png_dir)
png_w,png_h=png.width,png.height
#若第十行报错,说明jpg图片没有对应的png图片
png_dim_len = len(np.array(png).shape)
assert png_dim_len==2,"提示:存在三维掩码图"
jpg=Image.open(jpg_dir)
jpg = jpg.transpose(Image.FLIP_LEFT_RIGHT) # 翻转图像
jpg.save(jpg_dir)
jpg_w,jpg_h=jpg.width,jpg.height
print(jpg_w,jpg_h,png_w,png_h)
assert png_w==jpg_w and png_h==jpg_h,print("提示:%s mask图与原图宽高参数不一致"%(png_dir))
def pixel_operation(image_path: str):
img = cv.imread(image_path, cv.IMREAD_COLOR)
means, dev = cv.meanStdDev(img)
return means,dev
data_root = './work/voc_data02'
image_dir="./JPEGImages"
ann_dir = "./SegmentationClass"
split_dir = './ImageSets/Segmentation'
os.makedirs(os.path.join(data_root, split_dir), exist_ok=True)
png_filename_list = [os.path.splitext(filename)[0] for filename in os.listdir(os.path.join(data_root, ann_dir)) if filename.endswith('.bmp')]
jpg_filename_list=[os.path.splitext(filename)[0] for filename in os.listdir(os.path.join(data_root, image_dir)) if filename.endswith('.bmp')]
assert len(jpg_filename_list)==len(png_filename_list),"提示:原图与掩码图数量不统一"
print("数量检查无误")
random.shuffle(jpg_filename_list)
red_num=0
black_num=0
with open(os.path.join(data_root, split_dir, 'trainval.txt'), 'w+') as f:
length = int(len(jpg_filename_list))
for line in jpg_filename_list[:length]:
pngpath=os.path.join(data_root,ann_dir,line+'.bmp')
jpgpath=os.path.join(data_root,image_dir,line+'.bmp')
get_Image_dim_len(pngpath,jpgpath)
img=cv.imread(pngpath,cv.IMREAD_GRAYSCALE)
red_num+=len(img)*len(img[0])-len(img[img==0])
black_num+=len(img[img==0])
f.writelines(line + '\n')
train_mean,train_dev=[[0.0,0.0,0.0]],[[0.0,0.0,0.0]]
with open(os.path.join(data_root, split_dir, 'train.txt'), 'w+') as f:
train_length = int(len(jpg_filename_list) * 7/ 10)
for line in jpg_filename_list[:train_length]:
jpgpath=os.path.join(data_root,image_dir,line+'.bmp')
mean,dev=pixel_operation(jpgpath)
train_mean+=mean
train_dev+=dev
f.writelines(line + '\n')
with open(os.path.join(data_root, split_dir, 'val.txt'), 'w+') as f:
for line in jpg_filename_list[train_length:]:
jpgpath=os.path.join(data_root,image_dir,line+'.bmp')
mean,dev=pixel_operation(jpgpath)
train_mean+=mean
train_dev+=dev
f.writelines(line + '\n')
```
希望这可以帮到你!如果你还有其他问题,请随时提问。
import os import random from tqdm import tqdm # 指定 images 文件夹路径 image_dir = "C:/Users/86152/Desktop/coco128/images/train2017" # 指定 labels 文件夹路径 label_dir = "C:/Users/86152/Desktop/coco128/labels/train2017" # 创建一个空列表来存储有效图片的路径 valid_images = [] # 创建一个空列表来存储有效 label 的路径 valid_labels = [] # 遍历 images 文件夹下的所有图片 for image_name in os.listdir(image_dir): # 获取图片的完整路径 image_path = os.path.join(image_dir, image_name) # 获取图片文件的扩展名 ext = os.path.splitext(image_name)[-1] # 根据扩展名替换成对应的 label 文件名 label_name = image_name.replace(ext, ".txt") # 获取对应 label 的完整路径 label_path = os.path.join(label_dir, label_name) # 判断 label 是否存在 if not os.path.exists(label_path): # 删除图片 os.remove(image_path) print("deleted:", image_path) else: # 将图片路径添加到列表中 valid_images.append(image_path) # 将label路径添加到列表中 valid_labels.append(label_path) # print("valid:", image_path, label_path) # 遍历每个有效图片路径 for i in tqdm(range(len(valid_images))): image_path = valid_images[i] label_path = valid_labels[i] # 随机生成一个概率 r = random.random() # 判断图片应该移动到哪个文件夹 # train:valid:test = 7:2:1 if r < 0.1: # 移动到 test 文件夹 destination = "C:/Users/86152/PycharmProjects/pythonProject/ultralytics-main/datasets/test" elif r < 0.2: # 移动到 valid 文件夹 destination = "C:/Users/86152/PycharmProjects/pythonProject/ultralytics-main/datasets/valid" else: # 移动到 train 文件夹 destination = "C:/Users/86152/PycharmProjects/pythonProject/ultralytics-main/datasets/train" # 生成目标文件夹中图片的新路径 image_destination_path = os.path.join(destination, "images", os.path.basename(image_path)) # 移动图片到目标文件夹 os.rename(image_path, image_destination_path) # 生成目标文件夹中 label 的新路径 label_destination_path = os.path.join(destination, "labels", os.path.basename(label_path)) # 移动 label 到目标文件夹 os.rename(label_path, label_destination_path) print("valid images:", valid_images) #输出有效label路径列表 print("valid labels:", valid_labels)
这段代码主要是在处理 COCO 数据集中的图片和标签文件。通过遍历 images 文件夹下的所有图片,获取图片的完整路径和扩展名,根据扩展名替换成对应的 label 文件名,获取对应 label 的完整路径。然后判断 label 是否存在,如果不存在就删除对应的图片。如果存在,就将图片路径和 label 路径添加到 valid_images 和 valid_labels 列表中。
然后通过遍历每个有效图片路径,随机生成一个概率,判断图片应该移动到哪个文件夹(train/valid/test)。最后生成目标文件夹中图片的新路径和 label 的新路径,移动图片和 label 到目标文件夹中。
最后输出有效图片路径列表和有效标签路径列表。
阅读全文