使用getRotationMatrix2D创建变换矩阵通过SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果例程C++

下面是一个示例代码，演示如何使用getRotationMatrix2D和SIMD指令加速warpAffine带参数WARP_INVERSE_MAP： ```C++ #include <iostream> #include <opencv2/opencv.hpp> using namespace cv; void warpAffine_SIMD(const Mat& src, Mat& dst, const Mat& M, int flags = INTER_LINEAR) { CV_Assert(!src.empty()); dst.create(src.size(), src.type()); int cn = src.channels(); const int BLOCK_SIZE = 8; const int BLOCK_SIZE_2 = BLOCK_SIZE * BLOCK_SIZE; __m256i x16 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0); for (int i = 0; i < src.rows; i += BLOCK_SIZE) { for (int j = 0; j < src.cols; j += BLOCK_SIZE) { __m256i y16 = _mm256_set1_epi32(i); __m256i M0 = _mm256_set1_epi32(static_cast<int>(M.at<double>(0, 0) * (1 << 16))); __m256i M1 = _mm256_set1_epi32(static_cast<int>(M.at<double>(0, 1) * (1 << 16))); __m256i M2 = _mm256_set1_epi32(static_cast<int>(M.at<double>(0, 2) * (1 << 16))); __m256i M3 = _mm256_set1_epi32(static_cast<int>(M.at<double>(1, 0) * (1 << 16))); __m256i M4 = _mm256_set1_epi32(static_cast<int>(M.at<double>(1, 1) * (1 << 16))); __m256i M5 = _mm256_set1_epi32(static_cast<int>(M.at<double>(1, 2) * (1 << 16))); for (int k = 0; k < cn; ++k) { __m256i buf16[BLOCK_SIZE_2]; for (int l = 0; l < BLOCK_SIZE; ++l) { __m256i x16l = _mm256_add_epi32(_mm256_set1_epi32(j), x16); __m256i mx16l = _mm256_add_epi32( _mm256_add_epi32(_mm256_mullo_epi32(M0, x16l), _mm256_mullo_epi32(M1, y16)), M2); __m256i my16l = _mm256_add_epi32( _mm256_add_epi32(_mm256_mullo_epi32(M3, x16l), _mm256_mullo_epi32(M4, y16)), M5); __m256i ix16l = _mm256_srai_epi32(mx16l, 16); __m256i iy16l = _mm256_srai_epi32(my16l, 16); __m256i bx16l = _mm256_set1_epi32(j + l); __m256i by16l = _mm256_set1_epi32(i + k); __m256i fx16l = _mm256_cvtps_epi32( _mm256_mul_ps( _mm256_mul_ps( _mm256_mul_ps( _mm256_cvtepi32_ps(_mm256_sub_epi32(mx16l, _mm256_slli_epi32(ix16l, 16))), _mm256_cvtepi32_ps(_mm256_sub_epi32(my16l, _mm256_slli_epi32(iy16l, 16))) ), _mm256_cvtepi32_ps(_mm256_sub_epi32( _mm256_set1_epi32(1 << 16), _mm256_mullo_epi32( _mm256_sub_epi32( mx16l, _mm256_slli_epi32(ix16l, 16) ), _mm256_sub_epi32( my16l, _mm256_slli_epi32(iy16l, 16) ) ) )) ), _mm256_rcp_ps( _mm256_cvtepi32_ps(_mm256_set1_epi32(1 << 15)) ) ) ); buf16[l * BLOCK_SIZE + 0] = _mm256_add_epi32(ix16l, fx16l); buf16[l * BLOCK_SIZE + 1] = _mm256_add_epi32(iy16l, fx16l); } for (int l = 0; l < BLOCK_SIZE_2; ++l) { int ix = _mm256_extract_epi32(buf16[l], 0) & (src.cols - 1); int iy = _mm256_extract_epi32(buf16[l], 4) & (src.rows - 1); const uchar* src_data = src.ptr<uchar>(iy); uchar* dst_data = dst.ptr<uchar>(i + (l / BLOCK_SIZE)); dst_data[j + (l % BLOCK_SIZE) * cn + k] = src_data[ix * cn + k]; } } } } } int main() { Mat img = imread("lena.jpg"); if (img.empty()) { std::cerr << "Failed to read image" << std::endl; return -1; } double angle = 30.0; double scale = 1.0; Point2f center(img.cols / 2.0, img.rows / 2.0); Mat M = getRotationMatrix2D(center, angle, scale); Mat dst; warpAffine_SIMD(img, dst, M, WARP_INVERSE_MAP); imshow("Original", img); imshow("Rotated", dst); waitKey(); return 0; } ``` 在此示例代码中，我们使用SIMD指令加速warpAffine带参数WARP_INVERSE_MAP。具体来说，我们使用AVX2指令集中的__m256i类型来处理8个32位整数。我们还使用_mm256_set1_epi32，_mm256_set_epi32，_mm256_add_epi32，_mm256_mullo_epi32和_mm256_srai_epi32等函数来执行AVX2指令。最后，我们使用_mm256_extract_epi32函数从__m256i类型中提取单个整数。请注意，为了使此示例代码更易于理解，我们使用8x8块大小。在实际应用中，您可能需要根据具体情况调整块大小。

使用getRotationMatrix2D创建变换矩阵通过SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果例程C++

相关推荐

使用getRotationMatrix2D创建变换矩阵通过检查CPU适合的SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果例程C++

使用SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果变换矩阵为getRotationMatrix2D例程C++

使用getRotationMatrix2D创建变换矩阵，通过SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程

使用getRotationMatrix2D创建变换矩阵，通过SIMD指令集加速warpAffine算子并带参数WARP_INVERSE_MAP效果的C++例程

组成原理课程实验：MIPS 流水线CPU、实现36条指令、转发、冒险检测-内含源码和说明书.zip

setuptools-50.0.2-py3-none-any.whl

setuptools-1.1.6.tar.gz

CEA二次开发脚本：用于ECSP配比设计

环形数组是一种特殊的数据结构

什么是环形数组以及学习环形数组的意义是什么

母亲节祝福 Python 代码（包含详细介绍）

setuptools-0.7.4.zip

max111111111

攻防世界问题的概要介绍与分析

关于java出租车计价器设计与实现.zip

华为OD机试D卷 - 拼接URL - 免费看解析和代码.html

setuptools-8.2.tar.gz

C语言输出母亲节祝福（内含详细描述）

setuptools-40.4.2.zip

最新推荐

组成原理课程实验：MIPS 流水线CPU、实现36条指令、转发、冒险检测-内含源码和说明书.zip

setuptools-50.0.2-py3-none-any.whl

setuptools-1.1.6.tar.gz

CEA二次开发脚本：用于ECSP配比设计

zigbee-cluster-library-specification

管理建模和仿真的文件

实现实时数据湖架构：Kafka与Hive集成

用 Python 画一个可以动的爱心

JSBSim Reference Manual

"互动学习：行动中的多样性与论文攻读经历"