输出能将voc格式数据集转换为coco格式数据集的脚本
时间: 2023-06-13 22:05:40 浏览: 128
python脚本,划分训练集和测试集,coco、voc格式的数据转换成yolo系列数据
5星 · 资源好评率100%
下面是一个将VOC格式数据集转换为COCO格式数据集的Python脚本,需要安装COCO API:
```python
import os
import xml.etree.ElementTree as ET
import json
def xml_to_json(xml_file, root_path, image_id):
with open(xml_file, 'r') as f:
tree = ET.parse(f)
root = tree.getroot()
size = root.find('size')
width = int(size.find('width').text)
height = int(size.find('height').text)
image = {
'id': image_id,
'width': width,
'height': height,
'file_name': os.path.join(root_path, root.find('filename').text),
'annotations': []
}
for obj in root.findall('object'):
name = obj.find('name').text
bbox = obj.find('bndbox')
xmin = int(bbox.find('xmin').text)
ymin = int(bbox.find('ymin').text)
xmax = int(bbox.find('xmax').text)
ymax = int(bbox.find('ymax').text)
image['annotations'].append({
'id': len(image['annotations'])+1,
'image_id': image_id,
'category_id': name,
'bbox': [xmin, ymin, xmax-xmin, ymax-ymin],
'area': (xmax-xmin)*(ymax-ymin),
'iscrowd': 0
})
return image
def voc_to_coco(voc_path, save_path):
categories = []
annotations = []
images = []
category_id = {}
image_id = 1
annotation_id = 1
for idx, class_name in enumerate(os.listdir(os.path.join(voc_path, 'Annotations'))):
categories.append({'id': idx+1, 'name': class_name})
category_id[class_name] = idx+1
xml_files = os.listdir(os.path.join(voc_path, 'Annotations', class_name))
for xml_file in xml_files:
image = xml_to_json(os.path.join(voc_path, 'Annotations', class_name, xml_file), os.path.join(voc_path, 'JPEGImages'), image_id)
images.append(image)
for annotation in image['annotations']:
annotation['id'] = annotation_id
annotation['category_id'] = category_id[annotation['category_id']]
annotations.append(annotation)
annotation_id += 1
image_id += 1
data = {
'categories': categories,
'annotations': annotations,
'images': images
}
with open(save_path, 'w') as f:
json.dump(data, f, indent=2)
voc_path = 'path/to/VOC_dataset'
save_path = 'path/to/save/COCO_dataset.json'
voc_to_coco(voc_path, save_path)
```
其中,`voc_path`为VOC格式数据集的路径,`save_path`为转换后COCO格式数据集的保存路径。注意,该脚本仅适用于单标签分类任务,如果需要进行多标签分类、检测或分割任务,请根据需要进行修改。
阅读全文