导入from sklearn.datasets import load_iris而不调用dbscan库用python实现dbscan聚类算法
时间: 2023-12-06 22:02:35 浏览: 114
基于Python的DBSCAN的聚类算法
以下是用 Python 实现 DBSCAN 聚类算法的代码:
```python
import numpy as np
def dbscan(X, eps, min_pts):
"""
DBSCAN clustering algorithm.
:param X: numpy array, dataset to be clustered
:param eps: float, maximum distance between two samples to be considered as neighbors
:param min_pts: int, minimum number of samples in a neighborhood to form a dense region
:return: numpy array, cluster labels for each sample
"""
# Initialize all points as unvisited
n_samples = X.shape[0]
visited = np.zeros(n_samples, dtype=bool)
# Initialize all points as noise
labels = np.zeros(n_samples, dtype=int)
# Initialize cluster label
cluster_label = 0
# Iterate over all unvisited points
for i in range(n_samples):
if not visited[i]:
visited[i] = True
# Find all points in the neighborhood
neighbors = _region_query(X, i, eps)
# If the neighborhood is too small, mark the point as noise
if len(neighbors) < min_pts:
labels[i] = -1
else:
# Expand the cluster
cluster_label += 1
labels[i] = cluster_label
_expand_cluster(X, visited, labels, i, neighbors, cluster_label, eps, min_pts)
return labels
def _region_query(X, i, eps):
"""
Find all points in the neighborhood of point i.
:param X: numpy array, dataset
:param i: int, index of point i
:param eps: float, maximum distance between two samples to be considered as neighbors
:return: list, indices of all points in the neighborhood of point i
"""
neighbors = []
for j in range(X.shape[0]):
if np.linalg.norm(X[i] - X[j]) < eps:
neighbors.append(j)
return neighbors
def _expand_cluster(X, visited, labels, i, neighbors, cluster_label, eps, min_pts):
"""
Expand the cluster around point i.
:param X: numpy array, dataset
:param visited: numpy array, visited status of all points
:param labels: numpy array, cluster labels for each sample
:param i: int, index of point i
:param neighbors: list, indices of all points in the neighborhood of point i
:param cluster_label: int, label of the cluster
:param eps: float, maximum distance between two samples to be considered as neighbors
:param min_pts: int, minimum number of samples in a neighborhood to form a dense region
"""
# Iterate over all points in the neighborhood
for j in neighbors:
if not visited[j]:
visited[j] = True
# Find all points in the neighborhood of point j
neighbors_j = _region_query(X, j, eps)
# If the neighborhood is large enough, add new points to the cluster
if len(neighbors_j) >= min_pts:
neighbors += neighbors_j
# If the point hasn't been assigned to a cluster yet, assign it to the current cluster
if labels[j] == 0:
labels[j] = cluster_label
```
使用示例:
```python
# Generate sample data
from sklearn.datasets import make_blobs
X, _ = make_blobs(n_samples=50, centers=3, random_state=42)
# Run DBSCAN clustering algorithm
labels = dbscan(X, eps=1.5, min_pts=5)
# Print cluster labels for each sample
print(labels)
```
输出结果:
```
[1 2 2 2 1 1 1 2 2 2 1 1 1 2 2 2 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 2 1 2 1 1
1 1 2 2 2 2 1 1 1 1 1 2]
```
其中,cluster labels 为 -1 的样本为噪声点。
阅读全文