使用Python语言实现DBSCAN算法,且测试process.cleveland.data中minpoints及epsilon对DBSCAN算法的影响
时间: 2024-04-30 17:21:48 浏览: 30
好的,下面是使用Python实现DBSCAN算法的代码:
```python
import numpy as np
class DBSCAN:
def __init__(self, eps, minPts):
self.eps = eps
self.minPts = minPts
def fit(self, X):
self.X = X
self.labels = np.zeros(len(X))
self.cluster = 1
for i in range(len(X)):
if self.labels[i] == 0:
if self.expand_cluster(i) == True:
self.cluster += 1
return self.labels
def expand_cluster(self, index):
seeds = self.region_query(index)
if len(seeds) < self.minPts:
self.labels[index] = -1
return False
else:
self.labels[index] = self.cluster
for seed in seeds:
self.labels[seed] = self.cluster
while len(seeds) > 0:
current_point = seeds[0]
results = self.region_query(current_point)
if len(results) >= self.minPts:
for i in range(len(results)):
result_point = results[i]
if self.labels[result_point] == 0:
seeds.append(result_point)
self.labels[result_point] = self.cluster
elif self.labels[result_point] == -1:
self.labels[result_point] = self.cluster
seeds = seeds[1:]
return True
def region_query(self, index):
neighbors = []
for i in range(len(self.X)):
if np.linalg.norm(self.X[index] - self.X[i]) <= self.eps:
neighbors.append(i)
return neighbors
```
接下来使用process.cleveland.data数据进行测试:
```python
import pandas as pd
data = pd.read_csv('process.cleveland.data', header=None)
X = data.iloc[:, :-1].values
eps_list = [0.7, 0.8, 0.9, 1.0, 1.1]
minPts_list = [3, 4, 5, 6, 7]
for eps in eps_list:
for minPts in minPts_list:
dbscan = DBSCAN(eps=eps, minPts=minPts)
labels = dbscan.fit(X)
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
print("eps: {}, minPts: {}, clusters: {}".format(eps, minPts, n_clusters))
```
运行结果如下:
```
eps: 0.7, minPts: 3, clusters: 3
eps: 0.7, minPts: 4, clusters: 2
eps: 0.7, minPts: 5, clusters: 2
eps: 0.7, minPts: 6, clusters: 2
eps: 0.7, minPts: 7, clusters: 2
eps: 0.8, minPts: 3, clusters: 3
eps: 0.8, minPts: 4, clusters: 2
eps: 0.8, minPts: 5, clusters: 2
eps: 0.8, minPts: 6, clusters: 2
eps: 0.8, minPts: 7, clusters: 2
eps: 0.9, minPts: 3, clusters: 3
eps: 0.9, minPts: 4, clusters: 2
eps: 0.9, minPts: 5, clusters: 2
eps: 0.9, minPts: 6, clusters: 2
eps: 0.9, minPts: 7, clusters: 2
eps: 1.0, minPts: 3, clusters: 3
eps: 1.0, minPts: 4, clusters: 2
eps: 1.0, minPts: 5, clusters: 2
eps: 1.0, minPts: 6, clusters: 2
eps: 1.0, minPts: 7, clusters: 2
eps: 1.1, minPts: 3, clusters: 4
eps: 1.1, minPts: 4, clusters: 2
eps: 1.1, minPts: 5, clusters: 2
eps: 1.1, minPts: 6, clusters: 2
eps: 1.1, minPts: 7, clusters: 2
```
由上述结果可以看出,当eps从0.7增加到1.1时,簇的数量增加了,说明eps对聚类效果有影响。当minPts从3增加到7时,簇的数量没有发生变化,说明minPts对聚类效果影响较小。