使用getRotationMatrix2D创建变换矩阵,通过SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程
时间: 2023-12-10 20:37:44 浏览: 147
以下是一个使用getRotationMatrix2D创建变换矩阵,并通过SIMD指令集加速warpAffine算子参数WARP_INVERSE_MAP效果的C++例程:
```c++
#include <opencv2/opencv.hpp>
#include <opencv2/core/hal/intrin.hpp>
using namespace cv;
void warpAffineInverseMap(const Mat& src, Mat& dst, const Mat& M)
{
CV_Assert(src.type() == CV_8UC1);
int rows = src.rows, cols = src.cols;
int drows = dst.rows, dcols = dst.cols;
CV_Assert(rows > 0 && cols > 0 && drows > 0 && dcols > 0);
const float* m = M.ptr<float>(0);
__m128 mm0 = _mm_set_ps(m[0], m[1], m[2], 0);
__m128 mm1 = _mm_set_ps(m[3], m[4], m[5], 0);
__m128 mm2 = _mm_set_ps(0, 0, 1, 0);
__m128i vddx = _mm_set_epi32(3, 2, 1, 0);
__m128i vddy = _mm_set_epi32(dcols + 3, dcols + 2, dcols + 1, dcols + 0);
for (int y = 0; y < drows; ++y) {
float* pdst = dst.ptr<float>(y);
int* pdx = (int*)pdst;
int* pdy = pdx + 4;
for (int x = 0; x < dcols; x += 4) {
__m128i vmx = _mm_set_epi32(x + 3, x + 2, x + 1, x + 0);
__m128i vmy = _mm_set1_epi32(y);
__m128 mx = _mm_cvtepi32_ps(vmx);
__m128 my = _mm_cvtepi32_ps(vmy);
__m128 mxx = _mm_mul_ps(mm0, mx);
__m128 mxy = _mm_mul_ps(mm1, my);
__m128 mxs = _mm_add_ps(mxx, mxy);
__m128 mys = _mm_add_ps(_mm_mul_ps(mm1, mx), _mm_mul_ps(mm0, my));
__m128 mzs = _mm_add_ps(_mm_mul_ps(mm2, mx), _mm_mul_ps(mm2, my));
__m128i vixs = _mm_cvtps_epi32(mxs);
__m128i viys = _mm_cvtps_epi32(mys);
__m128i vidx = _mm_cvtps_epi32(_mm_div_ps(_mm_castsi128_ps(_mm_sub_epi32(vixs, vmx)), mzs));
__m128i vidy = _mm_cvtps_epi32(_mm_div_ps(_mm_castsi128_ps(_mm_sub_epi32(viys, vmy)), mzs));
__m128i vmaskx = _mm_cmplt_epi32(_mm_add_epi32(vixs, vddx), _mm_set1_epi32(cols));
__m128i vmasky = _mm_cmplt_epi32(_mm_add_epi32(viys, vddy), _mm_set1_epi32(rows));
__m128i vmask = _mm_and_si128(vmaskx, vmasky);
__m128i vidx2 = _mm_and_si128(vidx, vmask);
__m128i vidy2 = _mm_and_si128(vidy, vmask);
pdx[x + 0] = vidx2.m128i_i32[0];
pdx[x + 1] = vidx2.m128i_i32[1];
pdx[x + 2] = vidx2.m128i_i32[2];
pdx[x + 3] = vidx2.m128i_i32[3];
pdy[x + 0] = vidy2.m128i_i32[0];
pdy[x + 1] = vidy2.m128i_i32[1];
pdy[x + 2] = vidy2.m128i_i32[2];
pdy[x + 3] = vidy2.m128i_i32[3];
}
}
for (int y = 0; y < drows; ++y) {
const uchar* psrc = src.ptr(y);
float* pdst = dst.ptr<float>(y);
for (int x = 0; x < dcols; ++x) {
int sx = pdst[x * 2];
int sy = pdst[x * 2 + 1];
if (sx >= 0 && sy >= 0 && sx < cols && sy < rows) {
pdst[x] = psrc[sy * cols + sx];
}
else {
pdst[x] = 0;
}
}
}
}
int main()
{
Mat src = imread("lena.png", IMREAD_GRAYSCALE);
Mat dst(src.size(), CV_8UC1);
double angle = 30;
double scale = 1;
Point2f center(src.cols / 2.f, src.rows / 2.f);
Mat M = getRotationMatrix2D(center, angle, scale);
warpAffineInverseMap(src, dst, M);
imshow("src", src);
imshow("dst", dst);
waitKey();
}
```
该例程中使用了SSE指令集进行加速,因此需要确保编译器开启了SSE指令集的支持。该例程中的warpAffineInverseMap函数实现了warpAffine算子参数WARP_INVERSE_MAP的效果,即通过变换矩阵M计算出每个像素在源图像中的位置,然后从源图像中取出该位置的像素值,赋值给目标图像。
阅读全文