k-means聚类算法java实现
时间: 2023-09-29 20:10:09 浏览: 102
k-means 聚类算法是一种基于中心点的聚类算法,通常用于数据分析、数据挖掘等领域。以下是一个 k-means 聚类算法的 Java 实现示例:
```java
import java.util.*;
public class KMeans {
private int k; // number of clusters
private int iterations; // number of iterations for convergence
private List<double[]> dataPoints; // data points to be clustered
public KMeans(int k, int iterations, List<double[]> dataPoints) {
this.k = k;
this.iterations = iterations;
this.dataPoints = dataPoints;
}
public List<List<double[]>> cluster() {
List<double[]> centroids = initCentroids(); // initialize centroids
List<List<double[]>> clusters = new ArrayList<>();
// perform k-means clustering for the given number of iterations
for (int i = 0; i < iterations; i++) {
// assign each data point to the nearest centroid
clusters = assignDataPointsToCentroids(centroids);
// update centroids based on the mean of data points assigned to each cluster
centroids = updateCentroids(clusters);
}
return clusters;
}
private List<double[]> initCentroids() {
// randomly initialize centroids
List<double[]> centroids = new ArrayList<>();
Random rand = new Random();
for (int i = 0; i < k; i++) {
int index = rand.nextInt(dataPoints.size());
centroids.add(dataPoints.get(index));
}
return centroids;
}
private List<List<double[]>> assignDataPointsToCentroids(List<double[]> centroids) {
// assign each data point to the nearest centroid
List<List<double[]>> clusters = new ArrayList<>();
for (int i = 0; i < k; i++) {
clusters.add(new ArrayList<>());
}
for (double[] dataPoint : dataPoints) {
int closestCentroidIndex = getClosestCentroidIndex(dataPoint, centroids);
clusters.get(closestCentroidIndex).add(dataPoint);
}
return clusters;
}
private List<double[]> updateCentroids(List<List<double[]>> clusters) {
// update centroids based on the mean of data points assigned to each cluster
List<double[]> centroids = new ArrayList<>();
for (List<double[]> cluster : clusters) {
if (cluster.size() == 0) {
centroids.add(new double[dataPoints.get(0).length]);
} else {
double[] centroid = new double[dataPoints.get(0).length];
for (double[] dataPoint : cluster) {
for (int i = 0; i < dataPoint.length; i++) {
centroid[i] += dataPoint[i];
}
}
for (int i = 0; i < centroid.length; i++) {
centroid[i] /= cluster.size();
}
centroids.add(centroid);
}
}
return centroids;
}
private int getClosestCentroidIndex(double[] dataPoint, List<double[]> centroids) {
// find the index of the nearest centroid to the given data point
int closestIndex = 0;
double closestDistance = Double.MAX_VALUE;
for (int i = 0; i < centroids.size(); i++) {
double distance = getDistance(dataPoint, centroids.get(i));
if (distance < closestDistance) {
closestIndex = i;
closestDistance = distance;
}
}
return closestIndex;
}
private double getDistance(double[] a, double[] b) {
// calculate the Euclidean distance between two data points
double sum = 0.0;
for (int i = 0; i < a.length; i++) {
sum += Math.pow(a[i] - b[i], 2.0);
}
return Math.sqrt(sum);
}
}
```
以上是一个基于 Java 的 k-means 聚类算法实现,可以将其用于聚类数据点。
阅读全文