使用getRotationMatrix2D创建变换矩阵，通过SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程

以下是使用SIMD指令集加速warpAffine算子的C++例程，其中使用了getRotationMatrix2D创建变换矩阵，并且应用了WARP_INVERSE_MAP参数： ```cpp #include <opencv2/opencv.hpp> #include <opencv2/core/hal/hal.hpp> using namespace cv; void warpAffine_SIMD(const Mat& src, Mat& dst, const Mat& M) { CV_Assert(src.type() == CV_8UC1); const int w = src.cols; const int h = src.rows; dst.create(h, w, src.type()); const float* Mptr = M.ptr<float>(); const float a11 = Mptr[0], a12 = Mptr[1], b1 = Mptr[2]; const float a21 = Mptr[3], a22 = Mptr[4], b2 = Mptr[5]; const int BLOCK_SIZE = 8; const int BLOCK_NUM_X = (w + BLOCK_SIZE - 1) / BLOCK_SIZE; const int BLOCK_NUM_Y = (h + BLOCK_SIZE - 1) / BLOCK_SIZE; float block_a[BLOCK_SIZE * BLOCK_SIZE], block_b[BLOCK_SIZE * BLOCK_SIZE]; int coords[BLOCK_SIZE * BLOCK_SIZE][2]; for (int by = 0; by < BLOCK_NUM_Y; ++by) { for (int bx = 0; bx < BLOCK_NUM_X; ++bx) { const int x0 = bx * BLOCK_SIZE; const int y0 = by * BLOCK_SIZE; const int x1 = std::min(x0 + BLOCK_SIZE, w); const int y1 = std::min(y0 + BLOCK_SIZE, h); const int bw = x1 - x0; const int bh = y1 - y0; for (int y = 0; y < bh; ++y) { const float* src_row = src.ptr<float>(y0 + y); float* dst_row = dst.ptr<float>(y0 + y); for (int x = 0; x < bw; ++x) { coords[y * bw + x][0] = x0 + x; coords[y * bw + x][1] = y0 + y; block_a[y * bw + x] = a11 * x0 + a12 * y0 + b1; block_b[y * bw + x] = a21 * x0 + a22 * y0 + b2; } } float* block_dst = dst.ptr<float>(y0); const int BLOCK_SIZE2 = BLOCK_SIZE * 2; for (int y = 0; y < bh; ++y) { int x = 0; for (; x <= bw - BLOCK_SIZE2; x += BLOCK_SIZE2) { const float* block_src0 = src.ptr<float>(coords[y * bw + x][1]) + coords[y * bw + x][0]; const float* block_src1 = src.ptr<float>(coords[y * bw + x + BLOCK_SIZE][1]) + coords[y * bw + x + BLOCK_SIZE]; const float* block_src2 = src.ptr<float>(coords[y * bw + x + BLOCK_SIZE2][1]) + coords[y * bw + x + BLOCK_SIZE2]; float32x4_t v_a0 = vdupq_n_f32(block_a[y * bw + x]); float32x4_t v_b0 = vdupq_n_f32(block_b[y * bw + x]); float32x4_t v_a1 = vdupq_n_f32(block_a[y * bw + x + 4]); float32x4_t v_b1 = vdupq_n_f32(block_b[y * bw + x + 4]); float32x4_t v_src0 = vld1q_f32(block_src0); float32x4_t v_src1 = vld1q_f32(block_src1); float32x4_t v_src2 = vld1q_f32(block_src2); float32x4_t v_dst0 = vmlaq_f32(v_a0, v_src0, v_b0); float32x4_t v_dst1 = vmlaq_f32(v_a0, v_src1, v_b0); float32x4_t v_dst2 = vmlaq_f32(v_a0, v_src2, v_b0); v_dst0 = vmlaq_f32(v_dst0, v_a1, v_b1); v_dst1 = vmlaq_f32(v_dst1, v_a1, v_b1); v_dst2 = vmlaq_f32(v_dst2, v_a1, v_b1); vst1q_f32(block_dst + x, v_dst0); vst1q_f32(block_dst + x + 4, v_dst1); vst1q_f32(block_dst + x + 8, v_dst2); } for (; x < bw; ++x) { const float src_val = src.at<float>(coords[y * bw + x][1], coords[y * bw + x][0]); const float dst_val = src_val * a11 * (x0 + x) + src_val * a12 * (y0 + y) + b1 + src_val * a21 * (x0 + x) + src_val * a22 * (y0 + y) + b2; dst.at<float>(y0 + y, x0 + x) = dst_val; } } } } } int main() { Mat src = imread("input.png", IMREAD_GRAYSCALE); Mat dst; float angle = 30.f; float scale = 1.f; Point2f center(static_cast<float>(src.cols / 2), static_cast<float>(src.rows / 2)); Mat M = getRotationMatrix2D(center, angle, scale); warpAffine_SIMD(src, dst, M); imshow("input", src); imshow("output", dst); waitKey(); return 0; } ``` 注意：此例程仅在支持NEON指令集的ARM处理器上进行过测试。如果您的处理器不支持NEON指令集，请使用适合您处理器的指令集。

使用getRotationMatrix2D创建变换矩阵，通过SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程

相关推荐

对旋转文本图像矫正时填入函数getRotationMatrix2D()的角度应该是哪个？

毕设新项目-基于C++开发的校医院远程诊断系统源码+项目使用说明.zip

Real-time-Chroma-key:实时色度键项目

使用getRotationMatrix2D创建变换矩阵通过SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果例程C++

使用getRotationMatrix2D创建变换矩阵，通过SIMD指令集加速warpAffine算子并带参数WARP_INVERSE_MAP效果的C++例程

使用getRotationMatrix2D创建变换矩阵通过检查CPU适合的SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果例程C++

使用SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果变换矩阵为getRotationMatrix2D例程C++

rotationImage：高校项目

Android开发获取重力加速度和磁场强度的方法

图像几何变换和平移、旋转、缩放

Opencv+Python实现图像运动模糊和高斯模糊的示例

基于opencv3.1库的JAVA源码

grpcio-1.47.0-cp310-cp310-linux_armv7l.whl

小程序项目源码-美容预约小程序.zip

MobaXterm 工具

grpcio-1.48.0-cp37-cp37m-linux_armv7l.whl

扁平风格PPT可修改ppt下载(11).zip

基于MATLAB实现的msk信号调制解调过程，包括发送端及接收端信号谱分析过程+使用说明文档.rar

有色金属行业周报有色金属稳增长目标明确工业及贵金属价格普涨-19页.pdf.zip

最新推荐

grpcio-1.47.0-cp310-cp310-linux_armv7l.whl

小程序项目源码-美容预约小程序.zip

MobaXterm 工具

grpcio-1.48.0-cp37-cp37m-linux_armv7l.whl

zigbee-cluster-library-specification

管理建模和仿真的文件

【实战演练】MATLAB用遗传算法改进粒子群GA-PSO算法

openstack的20种接口有哪些

JSBSim Reference Manual

"互动学习：行动中的多样性与论文攻读经历"