java数据挖掘K-Means聚类算法实现
时间: 2024-05-09 11:21:48 浏览: 102
对于Java数据挖掘领域中的K-Means聚类算法实现,通常有以下步骤:
1. 初始化K个聚类中心,可以随机或根据一定规则选取。
2. 针对每个点,找到距离其最近的聚类中心,并将其归类到该聚类中。
3. 针对每个聚类,重新计算其聚类中心,即将该聚类中所有点的坐标平均值作为新的聚类中心。
4. 重复2-3步。 如果聚类中心不再变化,即达到收敛,算法结束。
整体Java实现见下:
```java
public class KMeansClustering {
private List<Point> points; //数据集
private int clusterNum; //聚簇数
private int maxIteration; //最大迭代次数
private List<Point> centroids; //聚簇中心点坐标
private List<List<Point>> clusters; //每个聚簇包含的数据点
private int iteration; //当前迭代次数
//构造函数
public KMeansClustering(List<Point> points, int clusterNum, int maxIteration) {
this.points = points;
this.clusterNum = clusterNum;
this.maxIteration = maxIteration;
this.centroids = new ArrayList<>(clusterNum);
this.clusters = new ArrayList<>(clusterNum);
this.iteration = 0;
}
//初始化聚簇中心点坐标
private void initCentroids() {
Random random = new Random();
for (int i = 0; i < clusterNum; i++) {
Point point = points.get(random.nextInt(points.size()));
centroids.add(new Point(point.getX(), point.getY()));
}
}
//根据欧氏距离计算两点之间的距离
private double euclideanDistance(Point point1, Point point2) {
double dx = point1.getX() - point2.getX();
double dy = point1.getY() - point2.getY();
return Math.sqrt(dx * dx + dy * dy);
}
//将每个数据点归到离其最近的聚簇中心点所在的聚簇中
private void assignPointsToClusters() {
if (iteration == 0) {
for (int i = 0; i < clusterNum; i++) {
clusters.add(new ArrayList<>());
}
} else {
for (List<Point> cluster : clusters) {
cluster.clear();
}
}
for (Point point : points) {
double minDistance = Double.MAX_VALUE;
int clusterIndex = -1;
for (int i = 0; i < centroids.size(); i++) {
double distance = euclideanDistance(point, centroids.get(i));
if (distance < minDistance) {
minDistance = distance;
clusterIndex = i;
}
}
clusters.get(clusterIndex).add(point);
}
}
//重新计算每个聚簇的中心点坐标
private boolean recalculateCentroids() {
boolean converged = true;
for (int i = 0; i < centroids.size(); i++) {
List<Point> cluster = clusters.get(i);
if (cluster.isEmpty()) {
continue;
}
double sumX = 0, sumY = 0;
for (Point point : cluster) {
sumX += point.getX();
sumY += point.getY();
}
Point newCentroid = new Point(sumX / cluster.size(), sumY / cluster.size());
if (!newCentroid.equals(centroids.get(i))) {
centroids.set(i, newCentroid);
converged = false;
}
}
return converged;
}
//执行K-Means聚簇算法
public void cluster() {
initCentroids();
while (iteration < maxIteration) {
assignPointsToClusters();
if (recalculateCentroids()) {
break;
}
iteration++;
}
}
//获取每个聚簇中的数据点
public List<List<Point>> getClusters() {
return clusters;
}
}
```
其中,Point类表示一个二维空间中的数据点,其代码如下:
```java
public class Point {
private double x; //横坐标
private double y; //纵坐标
public Point(double x, double y) {
this.x = x;
this.y = y;
}
public double getX() {
return x;
}
public double getY() {
return y;
}
public boolean equals(Point point) {
return x == point.getX() && y == point.getY();
}
}
```
使用该算法的示例代码如下:
```java
public static void main(String[] args) {
List<Point> points = new ArrayList<>();
points.add(new Point(1, 1));
points.add(new Point(1.5, 2));
points.add(new Point(3, 4));
points.add(new Point(5, 7));
points.add(new Point(3.5, 5));
points.add(new Point(4.5, 5));
points.add(new Point(3.5, 4.5));
KMeansClustering kMeansClustering = new KMeansClustering(points, 3, 100);
kMeansClustering.cluster();
List<List<Point>> clusters = kMeansClustering.getClusters();
for (int i = 0; i < clusters.size(); i++) {
System.out.println("Cluster " + (i + 1) + ": " + clusters.get(i));
}
}
```
执行结果:
```
Cluster 1: [Point{x=1.0, y=1.0}, Point{x=1.5, y=2.0}]
Cluster 2: [Point{x=3.0, y=4.0}, Point{x=5.0, y=7.0}, Point{x=3.5, y=5.0}, Point{x=4.5, y=5.0}, Point{x=3.5, y=4.5}]
Cluster 3: []
```
其中,数据集包含7个数据点,K-Means算法聚成了3个簇,其中第一个簇包含了前两个点,第二个簇包含了剩下的5个点,第三个簇没有包含任何点。
阅读全文