C++ opencv ncc归一化互相关 使用SSE优化 具体实现代码
时间: 2024-05-15 19:13:17 浏览: 144
以下是使用 SSE 优化的归一化互相关(NCC)的 C++ 代码实现:
```cpp
#include <iostream>
#include <opencv2/opencv.hpp>
#include <emmintrin.h>
using namespace cv;
void ncc_sse(const Mat& img1, const Mat& img2, Mat& result)
{
int width = img1.cols - img2.cols + 1;
int height = img1.rows - img2.rows + 1;
result.create(height, width, CV_32FC1);
__m128i zero = _mm_setzero_si128();
__m128 sum1, sum2, sum3, sum4, mean1, mean2, std1, std2, ncc, temp;
__m128i p1, p2, mask;
float* ptr1, * ptr2, * res;
int i, j, k;
for (i = 0; i < height; i++)
{
ptr1 = (float*)img1.ptr(i);
for (j = 0; j < width; j++)
{
sum1 = sum2 = sum3 = sum4 = _mm_set_ps1(0.f);
for (k = 0; k < img2.rows; k++)
{
ptr2 = (float*)img2.ptr(k);
p1 = _mm_loadu_si128((__m128i*)(ptr1 + j));
p2 = _mm_loadu_si128((__m128i*)(ptr2));
sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(ptr1 + j), _mm_loadu_ps(ptr2)));
sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(ptr1 + j + 4), _mm_loadu_ps(ptr2 + 4)));
sum3 = _mm_add_ps(sum3, _mm_mul_ps(_mm_loadu_ps(ptr1 + j + 8), _mm_loadu_ps(ptr2 + 8)));
sum4 = _mm_add_ps(sum4, _mm_mul_ps(_mm_loadu_ps(ptr1 + j + 12), _mm_loadu_ps(ptr2 + 12)));
ptr1 += img1.cols;
ptr2 += img2.cols;
}
mean1 = _mm_set_ps1(cv::mean(img1(cv::Rect(j, i, img2.cols, img2.rows)))[0]);
mean2 = _mm_set_ps1(cv::mean(img2)[0]);
temp = _mm_mul_ps(_mm_sub_ps(sum1, _mm_mul_ps(mean1, mean2)), _mm_rcp_ps(_mm_set_ps1(img2.cols * img2.rows)));
sum2 = _mm_mul_ps(_mm_sub_ps(sum2, _mm_mul_ps(mean1, mean2)), _mm_rcp_ps(_mm_set_ps1(img2.cols * img2.rows)));
sum3 = _mm_mul_ps(_mm_sub_ps(sum3, _mm_mul_ps(mean1, mean2)), _mm_rcp_ps(_mm_set_ps1(img2.cols * img2.rows)));
sum4 = _mm_mul_ps(_mm_sub_ps(sum4, _mm_mul_ps(mean1, mean2)), _mm_rcp_ps(_mm_set_ps1(img2.cols * img2.rows)));
std1 = _mm_sqrt_ps(_mm_sub_ps(_mm_mul_ps(temp, temp), _mm_mul_ps(mean1, mean1)));
std2 = _mm_sqrt_ps(_mm_sub_ps(_mm_mul_ps(sum2, sum2), _mm_mul_ps(mean2, mean2)));
std1 = _mm_mul_ps(_mm_rcp_ps(std1), _mm_rcp_ps(std2));
ncc = _mm_mul_ps(temp, std1);
res = (float*)result.ptr(i);
_mm_storeu_ps(res + j, ncc);
}
}
}
int main()
{
Mat img1 = imread("img1.png", IMREAD_GRAYSCALE);
Mat img2 = imread("img2.png", IMREAD_GRAYSCALE);
Mat result;
ncc_sse(img1, img2, result);
imshow("Result", result);
waitKey(0);
return 0;
}
```
其中,`_mm_set_ps1` 函数用于将一个值复制到 SSE 寄存器的四个数据位置;`_mm_loadu_si128` 函数用于从内存中加载一个 __m128i 类型的值;`_mm_add_ps` 函数用于两个 SSE 浮点数寄存器中的数据相加;`_mm_mul_ps` 函数用于两个 SSE 浮点数寄存器中的数据相乘;`_mm_set_ps` 函数用于将四个单精度浮点数组成一个 SSE 浮点数寄存器;`_mm_storeu_ps` 函数用于将一个 SSE 浮点数寄存器的值存储到内存中。
阅读全文
相关推荐














