From c460cce337da187bdce98b40a70cb409f31f6c0f Mon Sep 17 00:00:00 2001 From: Ashot Vardanian <1983160+ashvardanian@users.noreply.github.com> Date: Sat, 27 May 2023 14:01:18 +0200 Subject: [PATCH 1/3] Add: USearch implementation --- .github/workflows/benchmarks.yml | 1 + algos.yaml | 84 ++++++++++++++++++++++++++++ ann_benchmarks/algorithms/usearch.py | 54 ++++++++++++++++++ install/Dockerfile.usearch | 8 +++ 4 files changed, 147 insertions(+) create mode 100644 ann_benchmarks/algorithms/usearch.py create mode 100644 install/Dockerfile.usearch diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 4f345e7ea..565e57d63 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -54,6 +54,7 @@ jobs: - sklearn - sptag - tinyknn + - usearch - vald - vearch - vespa diff --git a/algos.yaml b/algos.yaml index 7de437df2..5d8515ca6 100644 --- a/algos.yaml +++ b/algos.yaml @@ -142,6 +142,90 @@ float: arg-groups: - {"M": 96, "efConstruction": 500} query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + usearch-f32: + docker-tag: ann-benchmarks-usearch + module: ann_benchmarks.algorithms.usearch + constructor: USearch + base-args: ["@metric", "f32"] + run-groups: + M-4: + arg-groups: + - {"M": 4, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-8: + arg-groups: + - {"M": 8, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-12: + arg-groups: + - {"M": 12, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-16: + arg-groups: + - {"M": 16, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-24: + arg-groups: + - {"M": 24, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-36: + arg-groups: + - {"M": 36, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-48: + arg-groups: + - {"M": 48, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-64: + arg-groups: + - {"M": 64, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-96: + arg-groups: + - {"M": 96, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + usearch-f8: + docker-tag: ann-benchmarks-usearch + module: ann_benchmarks.algorithms.usearch + constructor: USearch + base-args: ["@metric", "f8"] + run-groups: + M-4: + arg-groups: + - {"M": 4, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-8: + arg-groups: + - {"M": 8, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-12: + arg-groups: + - {"M": 12, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-16: + arg-groups: + - {"M": 16, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-24: + arg-groups: + - {"M": 24, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-36: + arg-groups: + - {"M": 36, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-48: + arg-groups: + - {"M": 48, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-64: + arg-groups: + - {"M": 64, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] + M-96: + arg-groups: + - {"M": 96, "efConstruction": 500} + query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] hnsw(faiss): docker-tag: ann-benchmarks-faiss diff --git a/ann_benchmarks/algorithms/usearch.py b/ann_benchmarks/algorithms/usearch.py new file mode 100644 index 000000000..6613e8191 --- /dev/null +++ b/ann_benchmarks/algorithms/usearch.py @@ -0,0 +1,54 @@ +import usearch +import numpy as np + +from .base import BaseANN + + +class USearch(BaseANN): + + def __init__(self, metric: str, accuracy: str, method_param: dict): + assert accuracy in ['f64', 'f32', 'f16', 'f8'] + assert metric in ['angular', 'euclidean'] + assert 'M' in method_param + assert 'efConstruction' in method_param + + self._metric = {'angular': 'cos', 'euclidean': 'l2sq'}[metric] + self._method_param = method_param + self._accuracy = accuracy + + def __str__(self): + connectivity = self._method_param['M'] + expansion_add = self._method_param['efConstruction'] + return f'USearch(connecitivity={connectivity}, expansion_add={expansion_add})' + + def fit(self, X): + connectivity = self._method_param['M'] + expansion_add = self._method_param['efConstruction'] + self._index = usearch.Index( + ndim=len(X[0]), + capacity=len(X), + metric=self._metric, + accuracy=self._accuracy, + connectivity=connectivity, + expansion_add=expansion_add, + ) + labels = np.arange(len(X), dtype=np.longlong) + self._index.add(labels, np.asarray(X)) + + def get_memory_usage(self) -> int: + return self._index.memory_usage / 1024 + + def set_query_arguments(self, ef: int): + self._index.expansion_search = ef + + def freeIndex(self): + del self._index + + def query(self, v, n): + return self._index.search(np.expand_dims(v, axis=0), k=n)[0][0] + + def batch_query(self, X, n): + self._batch_results = self._index.search(np.asarray(X), n) + + def get_batch_results(self): + return self._batch_results diff --git a/install/Dockerfile.usearch b/install/Dockerfile.usearch new file mode 100644 index 000000000..298730723 --- /dev/null +++ b/install/Dockerfile.usearch @@ -0,0 +1,8 @@ +FROM ann-benchmarks + +RUN apt-get install -y python-setuptools python-pip +RUN pip3 install pybind11 numpy setuptools numba +RUN git clone https://github.com/unum-cloud/usearch.git && cd usearch && git submodule update --init --recursive +RUN cd usearch && python3 setup.py install + +RUN python3 -c 'import usearch' From 19637a6142d2fc707ffa65bd90867b77d221eb39 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Mon, 3 Jul 2023 16:33:13 +0400 Subject: [PATCH 2/3] Fix: using new API --- ann_benchmarks/algorithms/usearch.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/ann_benchmarks/algorithms/usearch.py b/ann_benchmarks/algorithms/usearch.py index 6613e8191..b0d12bb9f 100644 --- a/ann_benchmarks/algorithms/usearch.py +++ b/ann_benchmarks/algorithms/usearch.py @@ -1,9 +1,8 @@ -import usearch +from usearch.index import Index, MetricKind import numpy as np from .base import BaseANN - class USearch(BaseANN): def __init__(self, metric: str, accuracy: str, method_param: dict): @@ -12,7 +11,7 @@ def __init__(self, metric: str, accuracy: str, method_param: dict): assert 'M' in method_param assert 'efConstruction' in method_param - self._metric = {'angular': 'cos', 'euclidean': 'l2sq'}[metric] + self._metric = {'angular': MetricKind.Cos, 'euclidean': MetricKind.L2sq}[metric] self._method_param = method_param self._accuracy = accuracy @@ -24,18 +23,22 @@ def __str__(self): def fit(self, X): connectivity = self._method_param['M'] expansion_add = self._method_param['efConstruction'] - self._index = usearch.Index( + + self._index = Index( ndim=len(X[0]), - capacity=len(X), metric=self._metric, - accuracy=self._accuracy, + dtype=self._accuracy, connectivity=connectivity, expansion_add=expansion_add, + jit=True ) labels = np.arange(len(X), dtype=np.longlong) self._index.add(labels, np.asarray(X)) def get_memory_usage(self) -> int: + if not hasattr(self, '_index'): + return 0 + return self._index.memory_usage / 1024 def set_query_arguments(self, ef: int): From f4f3bd09aa2b28ab18a17509d6285a654bd23370 Mon Sep 17 00:00:00 2001 From: Mikhail Date: Wed, 12 Jul 2023 14:58:45 +0000 Subject: [PATCH 3/3] Fix: jit metrics --- ann_benchmarks/algorithms/usearch.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ann_benchmarks/algorithms/usearch.py b/ann_benchmarks/algorithms/usearch.py index b0d12bb9f..036b0b1d6 100644 --- a/ann_benchmarks/algorithms/usearch.py +++ b/ann_benchmarks/algorithms/usearch.py @@ -1,4 +1,5 @@ -from usearch.index import Index, MetricKind +from usearch.index import Index, MetricKind, ScalarKind +from usearch.numba import jit import numpy as np from .base import BaseANN @@ -11,9 +12,9 @@ def __init__(self, metric: str, accuracy: str, method_param: dict): assert 'M' in method_param assert 'efConstruction' in method_param - self._metric = {'angular': MetricKind.Cos, 'euclidean': MetricKind.L2sq}[metric] self._method_param = method_param - self._accuracy = accuracy + self._accuracy = {'f64': ScalarKind.F64, 'f32': ScalarKind.F32, 'f8': ScalarKind.F8}[accuracy] + self._metric = {'angular': MetricKind.Cos, 'euclidean': MetricKind.L2sq}[metric] def __str__(self): connectivity = self._method_param['M'] @@ -23,15 +24,20 @@ def __str__(self): def fit(self, X): connectivity = self._method_param['M'] expansion_add = self._method_param['efConstruction'] + metric = jit( + X.shape[1], + self._metric, + self._accuracy + ) self._index = Index( ndim=len(X[0]), - metric=self._metric, + metric=metric, dtype=self._accuracy, connectivity=connectivity, - expansion_add=expansion_add, - jit=True + expansion_add=expansion_add ) + labels = np.arange(len(X), dtype=np.longlong) self._index.add(labels, np.asarray(X))