基于聚类的离群点检测C++
时间: 2023-11-14 12:24:36 浏览: 100
一种基于多重聚类的离群点检测算法 (2013年)
基于聚类的离群点检测算法可以使用C++实现。以下是一个简单的示例代码实现:
```c++
#include <iostream>
#include <vector>
#include <cmath>
#include <algorithm>
using namespace std;
struct Point {
double x;
double y;
};
double euclideanDist(Point p1, Point p2) {
double xDiff = p2.x - p1.x;
double yDiff = p2.y - p1.y;
return sqrt(xDiff * xDiff + yDiff * yDiff);
}
vector<vector<Point>> clustering(vector<Point> points, double epsilon, int minPts) {
vector<vector<Point>> clusters;
int n = points.size();
vector<int> visited(n, 0);
vector<int> noise;
for (int i = 0; i < n; i++) {
if (visited[i] == 1) {
continue;
}
visited[i] = 1;
vector<int> neighborPts;
for (int j = 0; j < n; j++) {
if (i == j) {
continue;
}
double dist = euclideanDist(points[i], points[j]);
if (dist <= epsilon) {
neighborPts.push_back(j);
}
}
if (neighborPts.size() < minPts) {
noise.push_back(i);
} else {
vector<Point> cluster;
cluster.push_back(points[i]);
for (int k = 0; k < neighborPts.size(); k++) {
int index = neighborPts[k];
if (visited[index] == 0) {
visited[index] = 1;
vector<int> neighborPts2;
for (int m = 0; m < n; m++) {
if (index == m) {
continue;
}
double dist2 = euclideanDist(points[index], points[m]);
if (dist2 <= epsilon) {
neighborPts2.push_back(m);
}
}
if (neighborPts2.size() >= minPts) {
neighborPts.insert(neighborPts.end(), neighborPts2.begin(), neighborPts2.end());
}
}
bool isFound = false;
for (int l = 0; l < cluster.size(); l++) {
if (cluster[l].x == points[index].x && cluster[l].y == points[index].y) {
isFound = true;
break;
}
}
if (!isFound) {
cluster.push_back(points[index]);
}
}
clusters.push_back(cluster);
}
}
return clusters;
}
int main() {
vector<Point> points = {{1, 1}, {1, 2}, {2, 1}, {10, 10}, {10, 12}, {12, 10}, {20, 20}};
double epsilon = 3;
int minPts = 2;
vector<vector<Point>> clusters = clustering(points, epsilon, minPts);
for (int i = 0; i < clusters.size(); i++) {
cout << "Cluster " << i << ":" << endl;
for (int j = 0; j < clusters[i].size(); j++) {
cout << "(" << clusters[i][j].x << ", " << clusters[i][j].y << ")" << endl;
}
}
return 0;
}
```
该示例代码实现了基于聚类的离群点检测算法,使用了欧几里得距离计算点之间的距离,将距离小于epsilon的点视为同一聚类,如果聚类中的点数小于minPts,则将该点视为离群点。
阅读全文