-
-
Notifications
You must be signed in to change notification settings - Fork 24
Expand file tree
/
Copy pathdbscan.py
More file actions
75 lines (63 loc) · 2.58 KB
/
dbscan.py
File metadata and controls
75 lines (63 loc) · 2.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from typing import Union
import numpy as np
class DBSCAN:
"""DBSCAN
Parameters:
-----------
eps: float = 0.3
The maximum distance between two samples for one to be considered as in the neighborhood of the other.
min_points: int = 5
The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.
"""
def __init__(self, eps: float = 0.3, min_points: int = 5) -> None:
self.eps = eps
self.min_points = min_points
self.labels = []
self.c = 1 # number of clusters
def fit_predict(self, data: Union[list, np.ndarray]) -> list:
self.labels = [0] * len(data)
for i in range(len(data)):
if not (self.labels[i] == 0):
continue
neighbours = self.find_neighbours(data, i)
# If the number of points is below min_points the point is a outlier
if len(neighbours) < self.min_points:
self.labels[i] = -1
else:
self.grow_cluster(data, i, neighbours)
self.c += 1
return self.labels
def find_neighbours(self, data: Union[list, np.ndarray], index: int) -> list:
neighbors = []
for p in range(len(data)):
if np.linalg.norm(data[index]-data[p]) < self.eps and index != p:
neighbors.append(p)
return neighbors
def grow_cluster(self, data: Union[list, np.ndarray], index: int, neighbours: list) -> None:
# Assign seed point to cluster
self.labels[index] = self.c
i = 0
while i < len(neighbours):
p = neighbours[i]
if self.labels[p] == -1:
self.labels[p] = self.c
elif self.labels[p] == 0:
self.labels[p] = self.c
neighbours_new = self.find_neighbours(data, p)
# check neighbours length
if len(neighbours_new) >= self.min_points:
neighbours = neighbours + neighbours_new
i += 1
if __name__ == '__main__':
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
X, y = make_blobs(n_samples=30, centers=3, n_features=2)
X = MinMaxScaler(feature_range=(0, 1)).fit_transform(X)
model = DBSCAN()
predictions = model.fit_predict(X)
colors = ['r', 'g', 'b', 'c', 'k', 'y']
for classification, x in zip(predictions, X):
color = colors[classification]
plt.scatter(x[0], x[1], color=color, s=150, linewidths=5, zorder=10)
plt.show()