criteo · rom1504 · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -10,13 +10,13 @@ on:
 
 jobs:
   lint:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v2
-      - name: Set up Python 3.8
-        uses: actions/setup-python@v2
+      - uses: actions/checkout@v4
+      - name: Set up Python 3.10
+        uses: actions/setup-python@v4
         with:
-          python-version: 3.8
+          python-version: '3.10'
       - name: Install
         run: |
           python3 -m venv .env
@@ -28,15 +28,21 @@ jobs:
           source .env/bin/activate
           make lint
   tests:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
+      fail-fast: false
       matrix:
-        python-version: [3.8, 3.9, '3.10', 3.11]
+        python-version: ['3.10', 3.11, 3.12]
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
+    - name: Set up Java
+      uses: actions/setup-java@v4
+      with:
+        distribution: 'temurin'
+        java-version: '17'
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install

diff --git a/.github/workflows/doc.yml b/.github/workflows/doc.yml
@@ -11,15 +11,15 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: [3.9]
+        python-version: ['3.10']
 
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install pandoc

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -9,14 +9,19 @@ on:
     - master
 jobs:
   build-pex:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     strategy:
       matrix:
-        python-version: [3.8]
+        python-version: ['3.10']
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
+    - name: Set up Java
+      uses: actions/setup-java@v4
+      with:
+        distribution: 'temurin'
+        java-version: '17'
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
@@ -27,28 +32,33 @@ jobs:
       run: |
         make build-pex
         mv autofaiss.pex autofaiss-${{ matrix.python-version }}.pex
-    - uses: actions/upload-artifact@v2
+    - uses: actions/upload-artifact@v4
       with:
         name: autofaiss_pex
         path: autofaiss-${{ matrix.python-version }}.pex
   deploy:
     needs: build-pex
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v4
+    - name: Set up Java
+      uses: actions/setup-java@v4
+      with:
+        distribution: 'temurin'
+        java-version: '17'
     - uses: actions-ecosystem/action-regex-match@v2
       id: regex-match
       with:
         text: ${{ github.event.head_commit.message }}
         regex: '^Release ([^ ]+)'
     - name: Download pex
-      uses: actions/download-artifact@v2
+      uses: actions/download-artifact@v4
       with:
         name: autofaiss_pex
     - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       with:
-        python-version: '3.8'
+        python-version: '3.10'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
@@ -58,7 +68,7 @@ jobs:
       uses: softprops/action-gh-release@v1
       with:
         files: |
-          autofaiss-3.8.pex
+          autofaiss-3.10.pex
         tag_name: ${{ steps.regex-match.outputs.group1 }}
     - name: Build and publish
       if: ${{ steps.regex-match.outputs.match != '' && github.event_name != 'pull_request' }}

diff --git a/Makefile b/Makefile
@@ -30,7 +30,7 @@ build-dist: ## [Continuous integration] Build package for pypi
 build-pex:
 	python3 -m venv .pexing
 	. .pexing/bin/activate && python -m pip install -U pip && python -m pip install pex
-	. .pexing/bin/activate && python -m pex setuptools pyspark==3.2.1 s3fs>=2022.1.0 . -o autofaiss.pex -v
+	. .pexing/bin/activate && python -m pex setuptools "pyspark>=4.0.0,<5.0.0" s3fs>=2022.1.0 "numpy>=1.19.5,<2" . -o autofaiss.pex -v
 	rm -rf .pexing
 
 .PHONY: help

diff --git a/autofaiss/external/optimize.py b/autofaiss/external/optimize.py
@@ -10,6 +10,7 @@
 import faiss
 import fsspec
 import numpy as np
+from autofaiss.utils.json_encoder import NumpyEncoder
 from autofaiss.external.metadata import IndexMetadata, compute_memory_necessary_for_training_wrapper
 from autofaiss.external.scores import compute_fast_metrics
 from autofaiss.indices.index_utils import set_search_hyperparameters, speed_test_ms_per_query
@@ -569,6 +570,6 @@ def optimize_and_measure_index(
             with fsspec.open(index_path, "wb").open() as f:
                 faiss.write_index(index, faiss.PyCallbackIOWriter(f.write))
             with fsspec.open(index_infos_path, "w").open() as f:
-                json.dump(metric_infos, f)
+                json.dump(metric_infos, f, cls=NumpyEncoder)
 
     return metric_infos
diff --git a/autofaiss/external/quantize.py b/autofaiss/external/quantize.py
@@ -14,6 +14,7 @@
 import fire
 import fsspec
 import numpy as np
+from autofaiss.utils.json_encoder import NumpyEncoder
 from autofaiss.indices.build import get_write_ids_df_to_parquet_fn, get_optimize_index_fn
 from autofaiss.external.build import (
     create_index,
@@ -596,7 +597,7 @@ def score_index(
 
     if save_on_disk:
         with fsspec.open(output_index_info_path, "w").open() as f:
-            json.dump(infos, f)
+            json.dump(infos, f, cls=NumpyEncoder)
 
     return infos
 

diff --git a/autofaiss/external/scores.py b/autofaiss/external/scores.py
@@ -99,10 +99,10 @@ def compute_medium_metrics(
 
     infos: Dict[str, float] = {}
 
-    infos["1-recall@20"] = one_recall[20 - 1]
-    infos["1-recall@40"] = one_recall[40 - 1]
-    infos["20-recall@20"] = intersection_recall[20 - 1]
-    infos["40-recall@40"] = intersection_recall[40 - 1]
+    infos["1-recall@20"] = float(one_recall[20 - 1])
+    infos["1-recall@40"] = float(one_recall[40 - 1])
+    infos["20-recall@20"] = float(intersection_recall[20 - 1])
+    infos["40-recall@40"] = float(intersection_recall[40 - 1])
 
     return infos
 

diff --git a/autofaiss/indices/index_utils.py b/autofaiss/indices/index_utils.py
@@ -89,8 +89,8 @@ def search_speed_test(
     # avg2 = 1000 * (time.perf_counter() - test_start_time_s) / len(speed_list_ms)
 
     speed_infos = {
-        "avg_search_speed_ms": np.average(speed_list_ms2),
-        "99p_search_speed_ms": np.quantile(speed_list_ms2, 0.99),
+        "avg_search_speed_ms": float(np.average(speed_list_ms2)),
+        "99p_search_speed_ms": float(np.quantile(speed_list_ms2, 0.99)),
     }
 
     return speed_infos

diff --git a/autofaiss/metrics/reconstruction.py b/autofaiss/metrics/reconstruction.py
@@ -8,9 +8,9 @@
 
 def reconstruction_error(before, after, avg_norm_before: Optional[float] = None) -> float:
     """Computes the average reconstruction error"""
-    diff = np.mean(np.linalg.norm(after - before, axis=1))
+    diff = float(np.mean(np.linalg.norm(after - before, axis=1)))
     if avg_norm_before is None:
-        avg_norm_before = np.mean(np.linalg.norm(before, axis=1))
+        avg_norm_before = float(np.mean(np.linalg.norm(before, axis=1)))
     return diff / avg_norm_before
 
 

diff --git a/autofaiss/utils/json_encoder.py b/autofaiss/utils/json_encoder.py
@@ -0,0 +1,16 @@
+"""JSON encoder for numpy types"""
+import json
+import numpy as np
+
+
+class NumpyEncoder(json.JSONEncoder):
+    """Custom JSON encoder for numpy types"""
+
+    def default(self, o):
+        if isinstance(o, np.integer):
+            return int(o)
+        elif isinstance(o, np.floating):
+            return float(o)
+        elif isinstance(o, np.ndarray):
+            return o.tolist()
+        return super().default(o)
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -4,5 +4,4 @@ pylint==3.0.3
 pytest-cov==4.1.0
 pytest-xdist==3.5.0
 pytest==8.0.1
-pyspark==3.2.2; python_version < "3.11"
-pyspark<3.6.0; python_version >= "3.11"
+pyspark>=4.0.0,<5.0.0
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,8 @@
-dataclasses>=0.6,<1.0.0; python_version < "3.7"
-fire>=0.4.0,<0.6.0
+fire>=0.4.0,<0.7.0
 numpy>=1.19.5,<2
 pandas>=1.1.5,<3
-pyarrow>=6.0.1,<16
+pyarrow>=6.0.1,<30
 tqdm>=4.62.3,<5
-faiss-cpu<1.7.3; python_version < "3.7"
-faiss-cpu>=1,<2; python_version >= "3.7"
+faiss-cpu>=1,<2
 fsspec>=2022.1.0
-embedding_reader>=1.5.1,<2
+embedding_reader>=1.8.0,<2
diff --git a/setup.py b/setup.py
@@ -33,14 +33,12 @@ def _read_reqs(relpath):
             "License :: OSI Approved :: Apache Software License",
             "Operating System :: OS Independent",
             "Programming Language :: Python :: 3",
-            "Programming Language :: Python :: 3.6",
-            "Programming Language :: Python :: 3.7",
-            "Programming Language :: Python :: 3.8",
-            "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
+            "Programming Language :: Python :: 3.12",
             "Intended Audience :: Developers",
         ],
+        python_requires=">=3.10",
         long_description=long_description,
         long_description_content_type="text/markdown",
         description=long_description.split("\n")[0],