@@ -109,3 +109,73 @@ def transform(self, X):
109109 for i , centroid in enumerate (self .cluster_centers_ ):
110110 distances [:, i ] = np .sum ((X - centroid ) ** 2 , axis = 1 )
111111 return distances
112+
113+
114+ class DBSCAN :
115+ def __init__ (self , eps = 0.5 , min_samples = 5 , metric = 'euclidean' ):
116+ self .eps = eps
117+ self .min_samples = min_samples
118+ self .metric = metric
119+ self .labels_ = None
120+ self .core_sample_indices_ = None
121+ self .components_ = None
122+ self .n_clusters_ = 0
123+
124+ def _get_neighbors (self , X , sample_idx ):
125+ if self .metric == 'euclidean' :
126+ distances = np .sum ((X - X [sample_idx ]) ** 2 , axis = 1 )
127+ return np .nonzero (distances <= self .eps ** 2 )[0 ]
128+ else :
129+ raise ValueError ("Only euclidean metric is supported" )
130+
131+ def fit (self , X ):
132+ n_samples = X .shape [0 ]
133+ self .labels_ = np .full (n_samples , - 1 )
134+
135+ visited = np .zeros (n_samples , dtype = bool )
136+ core_samples = np .zeros (n_samples , dtype = bool )
137+
138+ cluster_label = 0
139+
140+ for i in range (n_samples ):
141+ if visited [i ]:
142+ continue
143+
144+ visited [i ] = True
145+ neighbors = self ._get_neighbors (X , i )
146+
147+ if len (neighbors ) < self .min_samples :
148+ self .labels_ [i ] = - 1
149+ continue
150+
151+ core_samples [i ] = True
152+ self .labels_ [i ] = cluster_label
153+
154+ neighbors = list (neighbors )
155+ j = 0
156+ while j < len (neighbors ):
157+ neighbor = neighbors [j ]
158+ if not visited [neighbor ]:
159+ visited [neighbor ] = True
160+ new_neighbors = self ._get_neighbors (X , neighbor )
161+
162+ if len (new_neighbors ) >= self .min_samples :
163+ core_samples [neighbor ] = True
164+ neighbors .extend (set (new_neighbors ) - set (neighbors ))
165+
166+ if self .labels_ [neighbor ] == - 1 :
167+ self .labels_ [neighbor ] = cluster_label
168+
169+ j += 1
170+
171+ cluster_label += 1
172+
173+ self .core_sample_indices_ = np .nonzero (core_samples )[0 ]
174+ self .components_ = X [core_samples ]
175+ self .n_clusters_ = cluster_label
176+
177+ return self
178+
179+ def fit_predict (self , X ):
180+ self .fit (X )
181+ return self .labels_
0 commit comments