python实现将一个文件夹中所有图像红色部分二值化为1,其余为0,再进行数据增强,最后使用paddleocr进行数字识别
时间: 2023-06-11 10:07:57 浏览: 65
下面是一个实现该功能的Python代码:
```python
import cv2
import os
import numpy as np
import random
from paddleocr import PaddleOCR
# 将一个文件夹中所有图像红色部分二值化为1,其余为0
def binarize_red(img):
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
lower_red = np.array([0, 50, 50])
upper_red = np.array([10, 255, 255])
mask1 = cv2.inRange(hsv, lower_red, upper_red)
lower_red = np.array([170, 50, 50])
upper_red = np.array([180, 255, 255])
mask2 = cv2.inRange(hsv, lower_red, upper_red)
mask = mask1 + mask2
mask = cv2.bitwise_not(mask)
return mask
# 进行数据增强
def data_augmentation(img):
# 随机旋转
angle = random.randint(-30, 30)
rows, cols, _ = img.shape
M = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1)
img = cv2.warpAffine(img, M, (cols, rows))
# 随机平移
x = random.randint(-20, 20)
y = random.randint(-20, 20)
M = np.float32([[1, 0, x], [0, 1, y]])
img = cv2.warpAffine(img, M, (cols, rows))
# 随机缩放
scale = random.uniform(0.5, 1.5)
img = cv2.resize(img, None, fx=scale, fy=scale)
return img
# 使用paddleocr进行数字识别
def recognize_num(img):
ocr = PaddleOCR()
result = ocr.ocr(img, rec=True)
nums = []
for line in result:
for word in line:
nums.append(word[1])
return nums
# 处理单张图片
def process_image(img_path):
img = cv2.imread(img_path)
mask = binarize_red(img)
img = cv2.bitwise_and(img, img, mask=mask)
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[1]
img = data_augmentation(img)
nums = recognize_num(img)
return nums
# 处理整个文件夹
def process_folder(folder_path):
nums_list = []
for filename in os.listdir(folder_path):
if filename.endswith('.jpg') or filename.endswith('.png'):
img_path = os.path.join(folder_path, filename)
nums = process_image(img_path)
nums_list.append(nums)
return nums_list
# 使用示例
folder_path = 'path/to/your/folder'
nums_list = process_folder(folder_path)
print(nums_list)
```
该代码将一个文件夹中所有图像的红色部分二值化为1,其余为0,并进行了数据增强,最后使用PaddleOCR进行数字识别。你可以根据实际情况修改代码中的参数。
阅读全文