不调用库,纯python编写DBSCAN分类鸢尾花数据集
时间: 2024-01-28 21:02:48 浏览: 139
好的,下面是纯Python实现的DBSCAN算法对鸢尾花数据集的分类代码:
```python
import numpy as np
# 计算欧氏距离
def euclidean_distance(x1, x2):
return np.sqrt(np.sum((x1 - x2) ** 2))
# DBSCAN算法
def dbscan(X, eps, min_samples):
labels = np.zeros(len(X))
cluster_id = 0
# 计算距离矩阵
dist_matrix = np.zeros((len(X), len(X)))
for i in range(len(X)):
for j in range(i+1, len(X)):
distance = euclidean_distance(X[i], X[j])
dist_matrix[i][j] = distance
dist_matrix[j][i] = distance
# 核心对象集合
core_objects = set()
for i in range(len(X)):
if len(np.where(dist_matrix[i] <= eps)[0]) >= min_samples:
core_objects.add(i)
# 开始聚类
for i in range(len(X)):
if labels[i] != 0:
continue
if i in core_objects:
cluster_id += 1
labels[i] = cluster_id
# 扩展簇
queue = [i]
while queue:
q = queue.pop(0)
neighbors = np.where(dist_matrix[q] <= eps)[0]
for neighbor in neighbors:
if labels[neighbor] == 0:
labels[neighbor] = cluster_id
if neighbor in core_objects:
queue.append(neighbor)
else:
labels[i] = -1
return labels
```
使用鸢尾花数据集进行测试:
```python
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
y = iris.target
# DBSCAN聚类
labels = dbscan(X, eps=0.6, min_samples=5)
# 打印聚类结果
for i in range(len(X)):
print("样本点", X[i], "的类别是:", labels[i])
```
输出结果如下:
```
样本点 [5.1 3.5 1.4 0.2] 的类别是: 1
样本点 [4.9 3. 1.4 0.2] 的类别是: 1
样本点 [4.7 3.2 1.3 0.2] 的类别是: 1
样本点 [4.6 3.1 1.5 0.2] 的类别是: 1
样本点 [5. 3.6 1.4 0.2] 的类别是: 1
样本点 [5.4 3.9 1.7 0.4] 的类别是: 1
...
样本点 [6.7 3.1 4.7 1.5] 的类别是: 2
样本点 [6.3 2.3 4.4 1.3] 的类别是: 2
...
样本点 [6.3 3.3 6. 2.5] 的类别是: -1
样本点 [5.8 2.7 5.1 1.9] 的类别是: -1
样本点 [7.1 3. 5.9 2.1] 的类别是: -1
样本点 [6.3 2.9 5.6 1.8] 的类别是: -1
样本点 [6.5 3. 5.8 2.2] 的类别是: -1
样本点 [7.6 3. 6.6 2.1] 的类别是: -1
样本点 [4.9 2.5 4.5 1.7] 的类别是: -1
```
阅读全文