使用getRotationMatrix2D创建变换矩阵通过SIMD指令加速warpAffine带参数WARP_INVERSE_MAP效果例程C++
时间: 2023-12-04 10:06:15 浏览: 140
以下是使用SIMD指令加速warpAffine函数的示例代码:
```c++
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>
// 定义SIMD指令宏
#if defined(__AVX__)
#define SIMD_SIZE 8
#elif defined(__SSE__)
#define SIMD_SIZE 4
#else
#define SIMD_SIZE 1
#endif
int main()
{
cv::Mat src = cv::imread("input.jpg");
cv::Mat dst(src.size(), src.type());
// 定义变换矩阵
double angle = 45.0;
double scale = 1.0;
cv::Point2f center(src.cols/2.0, src.rows/2.0);
cv::Mat rotation = cv::getRotationMatrix2D(center, angle, scale);
// 将变换矩阵转换为SIMD数据类型
float* data = reinterpret_cast<float*>(rotation.data);
#if SIMD_SIZE == 8
__m256 a = _mm256_set_ps(*(data+24), *(data+20), *(data+16), *(data+12), *(data+8), *(data+4), *(data+0), 0.0f);
__m256 b = _mm256_set_ps(*(data+25), *(data+21), *(data+17), *(data+13), *(data+9), *(data+5), *(data+1), 0.0f);
__m256 c = _mm256_set_ps(*(data+26), *(data+22), *(data+18), *(data+14), *(data+10), *(data+6), *(data+2), 0.0f);
__m256 d = _mm256_set_ps(*(data+27), *(data+23), *(data+19), *(data+15), *(data+11), *(data+7), *(data+3), 0.0f);
#elif SIMD_SIZE == 4
__m128 a = _mm_set_ps(*(data+12), *(data+8), *(data+4), *(data+0));
__m128 b = _mm_set_ps(*(data+13), *(data+9), *(data+5), *(data+1));
__m128 c = _mm_set_ps(*(data+14), *(data+10), *(data+6), *(data+2));
__m128 d = _mm_set_ps(*(data+15), *(data+11), *(data+7), *(data+3));
#else
float a = *(data+0);
float b = *(data+1);
float c = *(data+2);
float d = *(data+3);
float e = *(data+4);
float f = *(data+5);
#endif
// 进行仿射变换
cv::Mat_<cv::Vec3b> src_mat = src;
cv::Mat_<cv::Vec3b> dst_mat = dst;
for (int i = 0; i < src.rows; i++) {
for (int j = 0; j < src.cols; j++) {
cv::Point2f src_pt(j, i);
cv::Point2f dst_pt;
#if SIMD_SIZE == 8
__m256 x = _mm256_set1_ps(src_pt.x);
__m256 y = _mm256_set1_ps(src_pt.y);
__m256 rx = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(a, x), _mm256_mul_ps(b, y)), _mm256_set1_ps(d));
__m256 ry = _mm256_add_ps(_mm256_add_ps(_mm256_mul_ps(c, x), _mm256_mul_ps(d, y)), _mm256_set1_ps(d));
dst_pt.x = _mm256_cvtss_f32(_mm256_extractf128_ps(_mm256_castps256_ps128(rx), 0));
dst_pt.y = _mm256_cvtss_f32(_mm256_extractf128_ps(_mm256_castps256_ps128(ry), 0));
#elif SIMD_SIZE == 4
__m128 x = _mm_set1_ps(src_pt.x);
__m128 y = _mm_set1_ps(src_pt.y);
__m128 rx = _mm_add_ps(_mm_add_ps(_mm_mul_ps(a, x), _mm_mul_ps(b, y)), _mm_set1_ps(d));
__m128 ry = _mm_add_ps(_mm_add_ps(_mm_mul_ps(c, x), _mm_mul_ps(d, y)), _mm_set1_ps(d));
dst_pt.x = _mm_cvtss_f32(_mm_extract_ps(rx, 0));
dst_pt.y = _mm_cvtss_f32(_mm_extract_ps(ry, 0));
#else
dst_pt.x = a*src_pt.x + b*src_pt.y + d;
dst_pt.y = c*src_pt.x + d*src_pt.y + f;
#endif
dst_mat(i, j) = cv::Vec3b(0, 0, 0);
if (dst_pt.x >= 0 && dst_pt.x < dst.cols && dst_pt.y >= 0 && dst_pt.y < dst.rows) {
dst_mat(i, j) = src_mat(cv::Point2f(dst_pt.x, dst_pt.y));
}
}
}
cv::imshow("src", src);
cv::imshow("dst", dst);
cv::waitKey();
return 0;
}
```
在本示例中,我们使用了AVX和SSE指令去加速变换矩阵的计算和仿射变换的执行。由于不同的CPU支持不同的SIMD指令,因此需要根据实际情况进行修改。此外,为了方便起见,我们在仿射变换时使用了暴力的双重循环,而没有采用OpenCV的函数去加速。如果您需要更高的性能,可以尝试使用OpenCV提供的函数进行优化。
阅读全文
相关推荐
![-](https://img-home.csdnimg.cn/images/20250102104920.png)
![-](https://img-home.csdnimg.cn/images/20241231045053.png)
![-](https://img-home.csdnimg.cn/images/20241231044955.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![-](https://img-home.csdnimg.cn/images/20241231045053.png)
![-](https://img-home.csdnimg.cn/images/20241231044955.png)
![pdf](https://img-home.csdnimg.cn/images/20241231044930.png)
![pptx](https://img-home.csdnimg.cn/images/20241231044947.png)
![zip](https://img-home.csdnimg.cn/images/20241231045053.png)
![pdf](https://img-home.csdnimg.cn/images/20241231044930.png)
![pdf](https://img-home.csdnimg.cn/images/20241231044930.png)