加速这一段代码#include <thread> #include <mutex> // 用于保护m_vpdEdgePoints和m_vdEdgeGradient的锁 std::mutex g_mutex; void process_edges(const cv::Mat& RoiMat, const std::vectorcv::Point2d& m_vpdEquinoxPoints, const double m_dMeasureLength, const double m_dMeasureHeight, const double m_dSigma, const int m_nThresholdCircle, const int m_nTranslationCircle, const std::vector<double>& m_vdMeasureAngle, std::vectorcv::Point2d& m_vpdEdgePoints, std::vector<double>& m_vdEdgeGradient, int start_idx, int end_idx, Extract1DEdgeCircle Extract1DEdgeCircle) { std::vector<Edge1D_Result> edges; for (int i = start_idx; i < end_idx; i++) { edges = Extract1DEdgeCircle.Get1DEdge(RoiMat, m_vpdEquinoxPoints[i], m_dMeasureLength, m_dMeasureHeight,m_vdMeasureAngle[i], m_dSigma, m_nThresholdCircle, m_nTranslationCircle == 1 ? Translation::Poisitive : Translation::Negative, Selection::Strongest); // 使用锁保护m_vpdEdgePoints和m_vdEdgeGradient //std::lock_guardstd::mutex lock(g_mutex); for (int j = 0; j < edges.size(); j++) { m_vpdEdgePoints.push_back(edges[j].m_pdEdgePoint); m_vdEdgeGradient.push_back(edges[j].m_dGradient); } } } const int num_threads = 10; std::vectorstd::thread threads(num_threads); std::vector<std::vectorcv::Point2d> edge_points(num_threads); std::vector<std::vector<double>> edge_gradients(num_threads); for (int i = 0; i < num_threads; i++) { int start_idx = i * m / num_threads; int end_idx = (i + 1) * m / num_threads; threads[i] = std::thread(process_edges, std::ref(RoiMat), std::ref(m_vpdEquinoxPoints), m_dMeasureLength, m_dMeasureHeight, m_dSigma, m_nThresholdCircle, m_nTranslationCircle, std::ref(m_vdMeasureAngle), std::ref(edge_points[i]), std::ref(edge_gradients[i]), start_idx, end_idx, Extract1DEdgeCircle); } for (int i = 0; i < num_threads; i++) { threads[i].join(); // 合并结果 m_vpdEdgePoints.insert(m_vpdEdgePoints.end(), edge_points[i].begin(), edge_points[i].end()); m_vdEdgeGradient.insert(m_vdEdgeGradient.end(), edge_gradients[i].begin(), edge_gradients[i].end()); }
时间: 2024-01-21 12:17:22 浏览: 111
C++11 std::shared_ptr总结与使用示例代码详解
可以使用并行化技术来加速这段代码。具体来说,可以使用std::thread和std::mutex来创建多个线程处理不同部分的数据,并通过锁来保护共享数据的访问。下面是一个可能的实现:
```c++
#include <thread>
#include <mutex>
// 用于保护m_vpdEdgePoints和m_vdEdgeGradient的锁
std::mutex g_mutex;
void process_edges(const cv::Mat& RoiMat,
const std::vector<cv::Point2d>& m_vpdEquinoxPoints,
const double m_dMeasureLength,
const double m_dMeasureHeight,
const double m_dSigma,
const int m_nThresholdCircle,
const int m_nTranslationCircle,
const std::vector<double>& m_vdMeasureAngle,
std::vector<cv::Point2d>& m_vpdEdgePoints,
std::vector<double>& m_vdEdgeGradient,
int start_idx,
int end_idx,
Extract1DEdgeCircle Extract1DEdgeCircle) {
std::vector<Edge1D_Result> edges;
for (int i = start_idx; i < end_idx; i++) {
edges = Extract1DEdgeCircle.Get1DEdge(RoiMat,
m_vpdEquinoxPoints[i],
m_dMeasureLength,
m_dMeasureHeight,
m_vdMeasureAngle[i],
m_dSigma,
m_nThresholdCircle,
m_nTranslationCircle == 1 ? Translation::Poisitive : Translation::Negative,
Selection::Strongest);
// 使用锁保护m_vpdEdgePoints和m_vdEdgeGradient
std::lock_guard<std::mutex> lock(g_mutex);
for (int j = 0; j < edges.size(); j++) {
m_vpdEdgePoints.push_back(edges[j].m_pdEdgePoint);
m_vdEdgeGradient.push_back(edges[j].m_dGradient);
}
}
}
void parallel_process_edges(const cv::Mat& RoiMat,
const std::vector<cv::Point2d>& m_vpdEquinoxPoints,
const double m_dMeasureLength,
const double m_dMeasureHeight,
const double m_dSigma,
const int m_nThresholdCircle,
const int m_nTranslationCircle,
const std::vector<double>& m_vdMeasureAngle,
std::vector<cv::Point2d>& m_vpdEdgePoints,
std::vector<double>& m_vdEdgeGradient,
int num_threads,
Extract1DEdgeCircle Extract1DEdgeCircle) {
std::vector<std::thread> threads(num_threads);
std::vector<std::vector<cv::Point2d>> edge_points(num_threads);
std::vector<std::vector<double>> edge_gradients(num_threads);
for (int i = 0; i < num_threads; i++) {
int start_idx = i * m_vpdEquinoxPoints.size() / num_threads;
int end_idx = (i + 1) * m_vpdEquinoxPoints.size() / num_threads;
threads[i] = std::thread(process_edges,
std::ref(RoiMat),
std::ref(m_vpdEquinoxPoints),
m_dMeasureLength,
m_dMeasureHeight,
m_dSigma,
m_nThresholdCircle,
m_nTranslationCircle,
std::ref(m_vdMeasureAngle),
std::ref(edge_points[i]),
std::ref(edge_gradients[i]),
start_idx,
end_idx,
Extract1DEdgeCircle);
}
for (int i = 0; i < num_threads; i++) {
threads[i].join();
// 合并结果
m_vpdEdgePoints.insert(m_vpdEdgePoints.end(), edge_points[i].begin(), edge_points[i].end());
m_vdEdgeGradient.insert(m_vdEdgeGradient.end(), edge_gradients[i].begin(), edge_gradients[i].end());
}
}
```
在这个实现中,我们定义了一个新的函数parallel_process_edges来处理并行化。它接受与原始函数相同的参数,并使用std::thread和std::mutex来创建多个线程执行process_edges函数。每个线程处理输入点集的一个子集,并将结果存储在本地向量中。最后,我们将所有线程的结果合并到主向量中。
请注意,我们使用了一个简单的均匀分割策略来将输入点集划分为不同的子集,但这可能会导致不平衡的工作负载。更好的方法是使用动态划分策略,即根据实际工作负载将任务动态地分配给可用的线程。不过,这需要更复杂的实现。
阅读全文