使用getRotationMatrix2D创建变换矩阵,通过SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程
时间: 2023-12-10 20:37:30 浏览: 125
二维图形的几何变换 对称平移缩放旋转 矩阵实现 C++
以下是使用SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程:
```c++
#include <iostream>
#include <opencv2/opencv.hpp>
#include <opencv2/core/hal/intrin.hpp>
using namespace cv;
#define BLOCK_SIZE 4
void warpAffineInverseMap_SIMD(const Mat& src, Mat& dst, const Mat& M)
{
CV_Assert(src.type() == CV_8UC1);
int w = src.cols, h = src.rows;
dst.create(h, w, CV_8UC1);
float M0 = static_cast<float>(M.at<double>(0, 0));
float M1 = static_cast<float>(M.at<double>(0, 1));
float M2 = static_cast<float>(M.at<double>(0, 2));
float M3 = static_cast<float>(M.at<double>(1, 0));
float M4 = static_cast<float>(M.at<double>(1, 1));
float M5 = static_cast<float>(M.at<double>(1, 2));
__m128 m0 = _mm_set_ps(M0, M0, M0, M0);
__m128 m1 = _mm_set_ps(M1, M1, M1, M1);
__m128 m2 = _mm_set_ps(M2, M2, M2, M2);
__m128 m3 = _mm_set_ps(M3, M3, M3, M3);
__m128 m4 = _mm_set_ps(M4, M4, M4, M4);
__m128 m5 = _mm_set_ps(M5, M5, M5, M5);
int x, y;
for (y = 0; y < h; y += BLOCK_SIZE) {
for (x = 0; x < w; x += 4) {
__m128 fx = _mm_set_ps(x + 3, x + 2, x + 1, x);
__m128 fy = _mm_set_ps(y + 3, y + 2, y + 1, y);
__m128 rx = _mm_add_ps(_mm_add_ps(_mm_mul_ps(fx, m0), _mm_mul_ps(fy, m1)), m2);
__m128 ry = _mm_add_ps(_mm_add_ps(_mm_mul_ps(fx, m3), _mm_mul_ps(fy, m4)), m5);
__m128i ix = _mm_cvtps_epi32(rx);
__m128i iy = _mm_cvtps_epi32(ry);
__m128i mask_x = _mm_cmplt_epi32(ix, _mm_set1_epi32(0));
__m128i mask_y = _mm_cmplt_epi32(iy, _mm_set1_epi32(0));
__m128i mask = _mm_or_si128(mask_x, mask_y);
ix = _mm_andnot_si128(mask, ix);
iy = _mm_andnot_si128(mask, iy);
__m128i mask_x2 = _mm_cmpgt_epi32(ix, _mm_set1_epi32(w - 1));
__m128i mask_y2 = _mm_cmpgt_epi32(iy, _mm_set1_epi32(h - 1));
__m128i mask2 = _mm_or_si128(mask_x2, mask_y2);
ix = _mm_andnot_si128(mask2, ix);
iy = _mm_andnot_si128(mask2, iy);
__m128i a0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[0]) + ix[0])), _mm_setzero_si128());
__m128i a1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[1]) + ix[1])), _mm_setzero_si128());
__m128i a2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[2]) + ix[2])), _mm_setzero_si128());
__m128i a3 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[3]) + ix[3])), _mm_setzero_si128());
__m128i b0 = _mm_unpackhi_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[0]) + ix[0])), _mm_setzero_si128());
__m128i b1 = _mm_unpackhi_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[1]) + ix[1])), _mm_setzero_si128());
__m128i b2 = _mm_unpackhi_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[2]) + ix[2])), _mm_setzero_si128());
__m128i b3 = _mm_unpackhi_epi8(_mm_loadl_epi64((const __m128i*)(src.ptr(iy[3]) + ix[3])), _mm_setzero_si128());
__m128i c0 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a0, b0), 1), _mm_setzero_si128());
__m128i c1 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a1, b1), 1), _mm_setzero_si128());
__m128i c2 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a2, b2), 1), _mm_setzero_si128());
__m128i c3 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(a3, b3), 1), _mm_setzero_si128());
__m128i d0 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(c0, c1), 1), _mm_setzero_si128());
__m128i d1 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(c2, c3), 1), _mm_setzero_si128());
__m128i e0 = _mm_packus_epi16(_mm_srli_epi16(_mm_add_epi16(d0, d1), 1), _mm_setzero_si128());
*(int*)(dst.ptr(y) + x) = e0[0];
*(int*)(dst.ptr(y + 1) + x) = e0[1];
*(int*)(dst.ptr(y + 2) + x) = e0[2];
*(int*)(dst.ptr(y + 3) + x) = e0[3];
}
}
}
int main()
{
Mat src = imread("test.png", IMREAD_GRAYSCALE);
if (src.empty()) {
std::cerr << "failed to open image" << std::endl;
return -1;
}
Mat dst;
Mat M = getRotationMatrix2D(Point(src.cols / 2, src.rows / 2), 45, 1);
warpAffineInverseMap_SIMD(src, dst, M);
imshow("src", src);
imshow("dst", dst);
waitKey();
return 0;
}
```
其中,warpAffineInverseMap_SIMD函数使用了SSE指令集来加速像素坐标的计算和图像插值的实现。这里使用了4x4的块大小来进行处理,可以根据实际情况进行调整。
阅读全文