写一段用java实现K-means聚类分析代码
时间: 2024-02-13 16:06:02 浏览: 153
好的,以下是一段使用Java实现K-means聚类分析的示例代码:
```java
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
public class KMeansClustering {
private int k; // 聚类数
private List<Instance> instances; // 数据集
private List<Cluster> clusters; // 聚类集合
public KMeansClustering(int k, List<Instance> instances) {
this.k = k;
this.instances = instances;
this.clusters = new ArrayList<>();
}
// 随机初始化聚类中心
private void initClusters() {
Random random = new Random();
for (int i = 0; i < k; i++) {
int index = random.nextInt(instances.size());
Cluster cluster = new Cluster(i, instances.get(index));
clusters.add(cluster);
}
}
// 计算某个实例到某个聚类中心的距离
private double distance(Instance instance, Cluster cluster) {
double sum = 0;
for (int i = 0; i < instance.getFeatures().size(); i++) {
double diff = instance.getFeatures().get(i) - cluster.getCenter().getFeatures().get(i);
sum += diff * diff;
}
return Math.sqrt(sum);
}
// 计算所有实例到所有聚类中心的距离
private void calculateDistances() {
for (Instance instance : instances) {
for (Cluster cluster : clusters) {
double dist = distance(instance, cluster);
cluster.addDistance(instance, dist);
}
}
}
// 重新计算聚类中心
private void calculateCenters() {
for (Cluster cluster : clusters) {
cluster.calculateCenter();
}
}
// 清空聚类中的实例
private void clearClusters() {
for (Cluster cluster : clusters) {
cluster.clearInstances();
}
}
// 分配实例到最近的聚类中心
private void assignInstances() {
for (Instance instance : instances) {
Cluster nearestCluster = null;
double minDist = Double.MAX_VALUE;
for (Cluster cluster : clusters) {
double dist = cluster.getDistance(instance);
if (dist < minDist) {
nearestCluster = cluster;
minDist = dist;
}
}
nearestCluster.addInstance(instance);
}
}
// 进行K-means聚类分析
public void cluster() {
initClusters();
int iteration = 0;
while (iteration < 100) { // 最多迭代100次
clearClusters();
calculateDistances();
assignInstances();
calculateCenters();
iteration++;
}
}
// 获取聚类结果
public List<Cluster> getClusters() {
return clusters;
}
}
```
上述代码实现了K-means聚类分析的核心算法,包括随机初始化聚类中心、计算距离、重新计算聚类中心、分配实例等步骤。在实现过程中,我们还定义了Instance和Cluster两个类,分别表示数据集中的实例和聚类中心。
使用该代码进行聚类分析的示例代码如下:
```java
import java.util.ArrayList;
import java.util.List;
public class Main {
public static void main(String[] args) {
// 构造数据集
List<Instance> instances = new ArrayList<>();
instances.add(new Instance("A", 1, 2));
instances.add(new Instance("B", 2, 1));
instances.add(new Instance("C", 3, 4));
instances.add(new Instance("D", 4, 3));
instances.add(new Instance("E", 5, 6));
instances.add(new Instance("F", 6, 5));
// 进行聚类分析
KMeansClustering kMeans = new KMeansClustering(2, instances);
kMeans.cluster();
// 输出聚类结果
List<Cluster> clusters = kMeans.getClusters();
for (Cluster cluster : clusters) {
System.out.println("Cluster " + cluster.getId() + ":");
for (Instance instance : cluster.getInstances()) {
System.out.println(instance.getName() + " (" + instance.getFeatures().get(0) + ", " + instance.getFeatures().get(1) + ")");
}
}
}
}
```
以上示例代码演示了如何使用KMeansClustering类进行聚类分析,并输出了聚类结果。在实际应用中,可以将数据集从文件中读取,并根据需要设置聚类数等参数。
阅读全文