Skip to content

Commit 415d18d

Browse files
committed
feat(cluster): add DBSCAN
1 parent ec838e2 commit 415d18d

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## 📝 Description
44

5-
This is a handmade deep learning framework library, made in python, **using numpy as its only external dependency**.
5+
This is a handmade machine and deep learning framework library, made in python, **using numpy as its only external dependency**.
66

77
I made it to challenge myself and to learn more about deep neural networks, how they work _in depth_.
88

@@ -23,6 +23,7 @@ This project will be maintained as long as I have ideas to improve it, and as lo
2323
- Many optimizers (sgd, momentum, rmsprop, adam) 📊
2424
- Supports binary classification, multiclass classification, regression and text generation 📚
2525
- Preprocessing tools (tokenizer, pca, ngram, standardscaler, pad_sequences, one_hot_encode and more) 🛠
26+
- Machine learning tools (isolation forest, kmeans, pca, t-sne, k-means) 🧮
2627
- Callbacks and regularizers (early stopping, l1/l2 regularization) 📉
2728
- Save and load models 📁
2829
- Simple to use 📚

neuralnetlib/cluster.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,73 @@ def transform(self, X):
109109
for i, centroid in enumerate(self.cluster_centers_):
110110
distances[:, i] = np.sum((X - centroid) ** 2, axis=1)
111111
return distances
112+
113+
114+
class DBSCAN:
115+
def __init__(self, eps=0.5, min_samples=5, metric='euclidean'):
116+
self.eps = eps
117+
self.min_samples = min_samples
118+
self.metric = metric
119+
self.labels_ = None
120+
self.core_sample_indices_ = None
121+
self.components_ = None
122+
self.n_clusters_ = 0
123+
124+
def _get_neighbors(self, X, sample_idx):
125+
if self.metric == 'euclidean':
126+
distances = np.sum((X - X[sample_idx]) ** 2, axis=1)
127+
return np.nonzero(distances <= self.eps ** 2)[0]
128+
else:
129+
raise ValueError("Only euclidean metric is supported")
130+
131+
def fit(self, X):
132+
n_samples = X.shape[0]
133+
self.labels_ = np.full(n_samples, -1)
134+
135+
visited = np.zeros(n_samples, dtype=bool)
136+
core_samples = np.zeros(n_samples, dtype=bool)
137+
138+
cluster_label = 0
139+
140+
for i in range(n_samples):
141+
if visited[i]:
142+
continue
143+
144+
visited[i] = True
145+
neighbors = self._get_neighbors(X, i)
146+
147+
if len(neighbors) < self.min_samples:
148+
self.labels_[i] = -1
149+
continue
150+
151+
core_samples[i] = True
152+
self.labels_[i] = cluster_label
153+
154+
neighbors = list(neighbors)
155+
j = 0
156+
while j < len(neighbors):
157+
neighbor = neighbors[j]
158+
if not visited[neighbor]:
159+
visited[neighbor] = True
160+
new_neighbors = self._get_neighbors(X, neighbor)
161+
162+
if len(new_neighbors) >= self.min_samples:
163+
core_samples[neighbor] = True
164+
neighbors.extend(set(new_neighbors) - set(neighbors))
165+
166+
if self.labels_[neighbor] == -1:
167+
self.labels_[neighbor] = cluster_label
168+
169+
j += 1
170+
171+
cluster_label += 1
172+
173+
self.core_sample_indices_ = np.nonzero(core_samples)[0]
174+
self.components_ = X[core_samples]
175+
self.n_clusters_ = cluster_label
176+
177+
return self
178+
179+
def fit_predict(self, X):
180+
self.fit(X)
181+
return self.labels_

0 commit comments

Comments
 (0)