yolov8 数据集划分代码
时间: 2025-01-05 08:13:34 浏览: 11
### YOLOv8 数据集划分
为了实现YOLOv8的数据集划分,通常会采用Python脚本处理标签文件和图像路径。下面提供一段用于将VOC格式转换为YOLO格式并进行训练/验证集划分的代码示例:
```python
import os
from xml.etree import ElementTree as ET
import random
import shutil
def convert_voc_to_yolo(voc_annotation_file, output_dir, class_names):
tree = ET.parse(voc_annotation_file)
root = tree.getroot()
image_name = root.find('filename').text
width = int(root.find('size/width').text)
height = int(root.find('size/height').text)
yolo_lines = []
for obj in root.findall('object'):
label = obj.find('name').text
if label not in class_names:
continue
index = class_names.index(label)
bbox = obj.find('bndbox')
xmin = float(bbox.find('xmin').text)
ymin = float(bbox.find('ymin').text)
xmax = float(bbox.find('xmax').text)
ymax = float(bbox.find('ymax').text)
x_center = (xmin + xmax) / 2.0 / width
y_center = (ymin + ymax) / 2.0 / height
w_bbox = (xmax - xmin) / width
h_bbox = (ymax - ymin) / height
line = f"{index} {x_center:.6f} {y_center:.6f} {w_bbox:.6f} {h_bbox:.6f}\n"
yolo_lines.append(line)
with open(os.path.join(output_dir, os.path.splitext(image_name)[0] + '.txt'), 'w') as file:
file.writelines(yolo_lines)
def split_dataset(input_folder, output_train_folder, output_val_folder, train_ratio=0.8):
all_files = [os.path.join(input_folder, f) for f in os.listdir(input_folder)]
images = sorted([file for file in all_files if file.lower().endswith('.jpg')])
annotations = sorted([file.replace('.jpg', '.xml') for file in images])
combined = list(zip(images, annotations))
random.shuffle(combined)
images[:], annotations[:] = zip(*combined)
num_images = len(images)
split_index = int(num_images * train_ratio)
# 创建目标目录
os.makedirs(output_train_folder, exist_ok=True)
os.makedirs(output_val_folder, exist_ok=True)
for i, (image_path, annotation_path) in enumerate(zip(images, annotations)):
destination_folder = output_train_folder if i < split_index else output_val_folder
shutil.copy(image_path, destination_folder)
convert_voc_to_yolo(annotation_path, destination_folder, ["class1", "class2"]) # 替换类别名称列表
if __name__ == "__main__":
input_directory = "/path/to/voc_format_data" # 输入VOC格式数据的位置
training_output = "./data/train/"
validation_output = "./data/validation/"
split_dataset(input_directory, training_output, validation_output)
```
此段代码实现了两个主要功能:一是`convert_voc_to_yolo()`函数负责读取XML文件并将边界框坐标从Pascal VOC格式转换成YOLO所需的相对位置表示;二是`split_dataset()`函数按照指定比例随机分配图片到训练集和验证集中。
阅读全文