利用旋转平移缩放裁剪,高斯椒盐,均匀,高斯模糊方法扩充数据集并生成xml,txt 文件代码
时间: 2024-12-16 11:14:07 浏览: 7
在图像处理领域中,使用这些技术扩充数据集通常是在计算机视觉任务如物体识别、行人检测等场景。这里提供一个基于Python和OpenCV的基本框架,展示了如何对图片进行操作后保存成XML(例如PASCAL VOC格式,用于标注)和TXT文件。请注意,这只是一个简化版本,实际应用可能需要更复杂的图像处理库如`imgaug`。
```python
import cv2
import numpy as np
from lxml import etree
import os
# 假设你有一个基础图片目录base_dir
base_dir = 'base_images'
output_xml_dir = 'output_xml'
output_txt_dir = 'output_txt'
def rotate_scale_crop(image, angle=(-5, 5), scale=(0.9, 1.1)):
# 随机旋转、缩放和平移
h, w = image.shape[:2]
center = (w // 2, h // 2)
rotation_matrix = cv2.getRotationMatrix2D(center, random.uniform(angle[0], angle[1]), scale[0])
image = cv2.warpAffine(image, rotation_matrix, (w, h))
top_left = (np.random.randint(0, int((1-scale[1]) * w)), np.random.randint(0, int((1-scale[1]) * h)))
bottom_right = (np.random.randint(int(scale[1] * w), w), np.random.randint(int(scale[1] * h), h))
return image[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
def add_gaussian_noise(image, sigma=1):
noise = np.random.normal(loc=0, scale=sigma, size=image.shape)
noisy_image = image + noise
return noisy_image.astype(np.uint8)
def augment_image(image, save_path):
# 先旋转和裁剪
img_augmented = rotate_scale_crop(image)
# 然后添加高斯噪声
img_augmented = add_gaussian_noise(img_augmented)
# ... 这里也可以加入其他滤波效果(如高斯模糊)
# 保存图片到目标路径
cv2.imwrite(save_path, img_augmented)
# 生成XML标注(PASCAL VOC格式)
root = etree.Element("annotation")
folder = etree.SubElement(root, "folder")
folder.text = 'test'
filename = etree.SubElement(root, "filename")
filename.text = os.path.basename(save_path)
size = etree.SubElement(root, "size")
width = etree.SubElement(size, "width")
width.text = str(img_augmented.shape[1])
height = etree.SubElement(size, "height")
height.text = str(img_augmented.shape[0])
depth = etree.SubElement(size, "depth")
depth.text = "3"
# 添加随机坐标框信息
x, y, w, h = np.random.randint(0, high=img_augmented.shape[1], size=4) # 随机边界框
obj = etree.SubElement(root, "object")
name = etree.SubElement(obj, "name")
name.text = 'example_object' # 你需要替换为你想要的对象名称
bndbox = etree.SubElement(obj, "bndbox")
xmin = etree.SubElement(bndbox, "xmin")
xmin.text = str(x)
ymin = etree.SubElement(bndbox, "ymin")
ymin.text = str(y)
xmax = etree.SubElement(bndbox, "xmax")
xmax.text = str(x+w)
ymax = etree.SubElement(bndbox, "ymax")
ymax.text = str(y+h)
xml_file = os.path.splitext(save_path)[0] + ".xml"
xml_path = os.path.join(output_xml_dir, xml_file)
with open(xml_path, 'wb') as f:
f.write(etree.tostring(root, pretty_print=True))
# 生成TXT文件
txt_file = os.path.splitext(save_path)[0] + ".txt"
txt_path = os.path.join(output_txt_dir, txt_file)
with open(txt_path, 'w', encoding='utf-8') as f:
f.write(f"{os.path.basename(xml_file)}\n")
# 遍历基础图片目录,进行数据增强并保存
for img_path in os.listdir(base_dir):
img = cv2.imread(os.path.join(base_dir, img_path))
augment_image(img, os.path.join(output_xml_dir, img_path.split('.')[0] + '_augmented.jpg'))
```
阅读全文