1.0.0 (#154)

usamoi · web-flow · commit 2b919743f045 · 2025-11-12T12:29:28.000+08:00
Signed-off-by: usamoi &lt;usamoi@outlook.com&gt;
diff --git a/.vitepress/config.mts b/.vitepress/config.mts
@@ -199,7 +199,7 @@ export default defineConfig({
             { text: 'Prewarm', link: '/vectorchord/usage/prewarm' },
             { text: 'Prefilter', link: '/vectorchord/usage/prefilter' },
             { text: 'Prefetch', link: '/vectorchord/usage/prefetch' },
-            { text: 'Rerank In Table', link: '/vectorchord/usage/rerank-in-table' },
+            { text: 'Rerank in Table', link: '/vectorchord/usage/rerank-in-table' },
             { text: 'External Build', link: '/vectorchord/usage/external-index-precomputation' },
           ]
         },
diff --git a/src/vectorchord/admin/kubernetes.md b/src/vectorchord/admin/kubernetes.md
@@ -151,7 +151,7 @@ tensorchord=> \dx
   Name   | Version |   Schema   |                                         Description                                         
 ---------+---------+------------+---------------------------------------------------------------------------------------------
  plpgsql | 1.0     | pg_catalog | PL/pgSQL procedural language
- vchord  | 0.5.3   | public     | vchord: Vector database plugin for Postgres, written in Rust, specifically designed for LLM
+ vchord  | 1.0.0   | public     | vchord: Vector database plugin for Postgres, written in Rust, specifically designed for LLM
  vector  | 0.8.1   | public     | vector data type and ivfflat and hnsw access methods
 (3 rows)
 ```
@@ -216,7 +216,7 @@ spec:
     extensions:
       - name: vchord
         image:
-          reference: ghcr.io/tensorchord/vchord-scratch:pg18-v0.5.3
+          reference: ghcr.io/tensorchord/vchord-scratch:pg18-v1.0.0
         dynamic_library_path:
           - /usr/lib/postgresql/18/lib/
         extension_control_path:
@@ -243,7 +243,7 @@ tensorchord=> \dx
   Name   | Version |   Schema   |                                         Description                                         
 ---------+---------+------------+---------------------------------------------------------------------------------------------
  plpgsql | 1.0     | pg_catalog | PL/pgSQL procedural language
- vchord  | 0.5.3   | public     | vchord: Vector database plugin for Postgres, written in Rust, specifically designed for LLM
+ vchord  | 1.0.0   | public     | vchord: Vector database plugin for Postgres, written in Rust, specifically designed for LLM
  vector  | 0.8.1   | public     | vector data type and ivfflat and hnsw access methods
 (3 rows)
 
diff --git a/src/vectorchord/getting-started/installation.md b/src/vectorchord/getting-started/installation.md
@@ -28,7 +28,7 @@ docker run \
   --name vchord-demo \
   -e POSTGRES_PASSWORD=mysecretpassword \
   -p 5432:5432 \
-  -d tensorchord/vchord-postgres:pg18-v0.5.3
+  -d tensorchord/vchord-postgres:pg18-v1.0.0
 ```
 
 2. Connect to the database using the `psql` command line tool. The default username is `postgres`.
@@ -81,7 +81,7 @@ Other sections may align with the above.
 We provide a scratch image that contains only the files of VectorChord. You can install VectorChord in any image using Docker's `COPY`, like
 
 ```dockerfile
-FROM tensorchord/vchord-scratch:pg18-v0.5.3 AS vchord_scratch
+FROM tensorchord/vchord-scratch:pg18-v1.0.0 AS vchord_scratch
 FROM postgres:18-bookworm
 RUN apt-get update && apt-get install -y postgresql-18-pgvector
 COPY --from=vchord_scratch / /
@@ -118,8 +118,8 @@ If you have not installed PostgreSQL yet, please install PostgreSQL. If you have
 1. Download the source code, build and install it with `make`.
 
 ```sh
-curl -fsSL https://github.com/tensorchord/VectorChord/archive/refs/tags/0.5.3.tar.gz | tar -xz
-cd VectorChord-0.5.3
+curl -fsSL https://github.com/tensorchord/VectorChord/archive/refs/tags/1.0.0.tar.gz | tar -xz
+cd VectorChord-1.0.0
 make build
 make install # or `sudo make install`
 ```
@@ -161,7 +161,7 @@ Please note that binaries generated with a specific `target-cpu` are not compati
 You can also do it by using Cargo's configuration.
 
 ```sh
-cd VectorChord-0.5.3
+cd VectorChord-1.0.0
 mkdir -p .cargo
 touch .cargo/config.toml
 echo 'build.rustflags = ["-Ctarget-cpu=icelake"]' >> ./cargo/config.toml
@@ -174,8 +174,8 @@ If you have not installed PostgreSQL yet, please install PostgreSQL following ht
 1. Download Debian packages in [the release page](https://github.com/tensorchord/VectorChord/releases/latest), and install them by `apt`.
 
 ```sh
-wget https://github.com/tensorchord/VectorChord/releases/download/0.5.3/postgresql-18-vchord_0.5.3-1_$(dpkg --print-architecture).deb
-sudo apt install ./postgresql-18-vchord_0.5.3-1_$(dpkg --print-architecture).deb
+wget https://github.com/tensorchord/VectorChord/releases/download/1.0.0/postgresql-18-vchord_1.0.0-1_$(dpkg --print-architecture).deb
+sudo apt install ./postgresql-18-vchord_1.0.0-1_$(dpkg --print-architecture).deb
 ```
 
 2. Configure your PostgreSQL by modifying the `shared_preload_libraries` to include the extension. And then restart the PostgreSQL cluster.
@@ -198,8 +198,8 @@ If you have not installed PostgreSQL yet, please install PostgreSQL following ht
 1. Download Debian packages in [the release page](https://github.com/tensorchord/VectorChord/releases/latest), and install them by `apt`.
 
 ```sh
-wget https://github.com/tensorchord/VectorChord/releases/download/0.5.3/postgresql-18-vchord_0.5.3-1_$(dpkg --print-architecture).deb
-sudo apt install ./postgresql-18-vchord_0.5.3-1_$(dpkg --print-architecture).deb
+wget https://github.com/tensorchord/VectorChord/releases/download/1.0.0/postgresql-18-vchord_1.0.0-1_$(dpkg --print-architecture).deb
+sudo apt install ./postgresql-18-vchord_1.0.0-1_$(dpkg --print-architecture).deb
 ```
 
 2. Configure your PostgreSQL by modifying the `shared_preload_libraries` to include the extension. And then restart the PostgreSQL cluster.
diff --git a/src/vectorchord/getting-started/overview.md b/src/vectorchord/getting-started/overview.md
@@ -37,7 +37,7 @@ docker run \
   --name vectorchord-demo \
   -e POSTGRES_PASSWORD=mysecretpassword \
   -p 5432:5432 \
-  -d tensorchord/vchord-postgres:pg18-v0.5.3
+  -d tensorchord/vchord-postgres:pg18-v1.0.0
 ```
 > In addition to the base image with the VectorChord extension, we provide an all-in-one image, `tensorchord/vchord-suite:pg18-latest`. This comprehensive image includes all official TensorChord extensions. Developers should select an image tag that is compatible with their extension's version, as indicated in [the support matrix](https://github.com/tensorchord/VectorChord-images?tab=readme-ov-file#support-matrix).
 
@@ -86,7 +86,7 @@ For more usage, please read:
 - [Prewarm](/vectorchord/usage/prewarm)
 - [Prefilter](/vectorchord/usage/prefilter)
 - [Prefetch](/vectorchord/usage/prefetch)
-- [Rerank In Table](/vectorchord/usage/rerank-in-table)
+- [Rerank in Table](/vectorchord/usage/rerank-in-table)
 - [External Build](/vectorchord/usage/external-index-precomputation)
 
 ## License
diff --git a/src/vectorchord/getting-started/vectorchord-suite.md b/src/vectorchord/getting-started/vectorchord-suite.md
@@ -57,7 +57,7 @@ CREATE EXTENSION IF NOT EXISTS pg_tokenizer CASCADE;
 CREATE EXTENSION IF NOT EXISTS vchord_bm25 CASCADE;
 \dx
 pg_tokenizer | 0.1.1   | tokenizer_catalog | pg_tokenizer
-vchord       | 0.5.3   | public            | vchord: Vector database plugin for Postgres, written in Rust, specifically designed for LLM
+vchord       | 1.0.0   | public            | vchord: Vector database plugin for Postgres, written in Rust, specifically designed for LLM
 vchord_bm25  | 0.2.2   | bm25_catalog      | vchord_bm25: A postgresql extension for bm25 ranking algorithm
 vector       | 0.8.1   | public            | vector data type and ivfflat and hnsw access methods
 ```
diff --git a/src/vectorchord/index.md b/src/vectorchord/index.md
@@ -18,7 +18,7 @@
 - [Prewarm](/vectorchord/usage/prewarm)
 - [Prefilter](/vectorchord/usage/prefilter)
 - [Prefetch](/vectorchord/usage/prefetch)
-- [Rerank In Table](/vectorchord/usage/rerank-in-table)
+- [Rerank in Table](/vectorchord/usage/rerank-in-table)
 - [External Build](/vectorchord/usage/external-index-precomputation)
 
 ## Use Cases
diff --git a/src/vectorchord/usage/indexing.md b/src/vectorchord/usage/indexing.md
@@ -88,21 +88,23 @@ SET vchordrq.probes TO '10';
 SELECT * FROM items ORDER BY embedding <=> '[3,1,2]' LIMIT 10;
 ```
 
-For large tables, the `build.internal` process costs huge time and memory. You can refer to [External Build](external-index-precomputation) to have a better experience.
-
-For large tables, you may opt to use more shared memory to accelerate the process by setting `build.pin` to `true`.
+For large tables, you may opt to use more shared memory to accelerate the process by setting `build.pin` to `2`.
 
 ```sql
 CREATE INDEX ON items USING vchordrq (embedding vector_l2_ops) WITH (options = $$
 residual_quantization = true
-build.pin = true
+build.pin = 2
 [build.internal]
 lists = [1000]
 spherical_centroids = true
 build_threads = 8
 $$);
 ```
 
+For large tables, the `build.internal` process costs significant time and memory. Let `build.internal.kmeans_dimension` or the dimension be $D$, `build.internal.lists[-1]` be $C$, `build.internal.sampling_factor` be $F$, and `build.internal.build_threads` be $T$. The memory consumption is approximately $4CD(F + T + 1)$ bytes. You can moderately reduce these options for lower memory usage.
+
+You can also refer to [External Build](external-index-precomputation) to offload the indexing workload to other machines.
+
 ## Reference
 
 ### Operator Classes <badge type="info" text="vchordrq" /> {#operator-classes}
@@ -140,21 +142,40 @@ The operator classes for `MaxSim` are available since version `0.3.0`.
 
 #### `residual_quantization`
 
-- Description: This index parameter determines whether residual quantization is used. If you not familiar with residual quantization, you can read this [blog](https://drscotthawley.github.io/blog/posts/2023-06-12-RVQ.html) for more information. In short, residual quantization is a technique that improves the accuracy of vector search by quantizing the residuals of the vectors.
+- Description: This option determines whether residual quantization is used. If you are not familiar with residual quantization, you can read this [blog](https://drscotthawley.github.io/blog/posts/2023-06-12-RVQ.html) for more information. In short, residual quantization is a technique that improves the accuracy of vector search by quantizing the residuals of the vectors.
 - Type: boolean
 - Default: `false`
 - Example:
     - `residual_quantization = false` means that residual quantization is not used.
     - `residual_quantization = true` means that residual quantization is used.
 
+#### `degree_of_parallelism` <badge type="tip" text="since v1.0.0" />
+
+- Description: This option is a hint that specifies the degree of parallelism. In most cases, you do not need to change it. If you are using a CPU with more than `32` threads and wish to utilize more threads for PostgreSQL, you may set it to the number of threads for better performance.
+- Type: integer
+- Default: `32`
+- Domain: `[1, 256]`
+- Example:
+    - `degree_of_parallelism = 32` hints to the index that `32` or less processes may access on the index concurrently.
+    - `degree_of_parallelism = 64` hints to the index that `64` or less processes may access on the index concurrently.
+
 #### `build.pin` <badge type="tip" text="since v0.2.1" />
 
-- Description: This index parameter determines whether shared memory is used for indexing. For large tables, you can choose to enable this option to speed up the build process.
-- Type: boolean
-- Default: `false`
+- Description: This option determines whether shared memory is used for indexing. For large tables, you can choose to enable this option to speed up the build process.
+- Type: union of integer and boolean
+- Default:
+    - `-1` <badge type="tip" text="since v1.0.0" />
+    - `false` <badge type="tip" text="until v0.5.3" />
+- Domain:
+    - `{-1, 0, 1, 2, false, true}` <badge type="tip" text="since v1.0.0" />
+    - `{false, true}` <badge type="tip" text="until v0.5.3" />
 - Example:
-    - `build.pin = false` means that shared memory is not used.
-    - `build.pin = true` means that shared memory is used.
+    - `build.pin = 2` means the hot portion of the index is cached in memory. 
+    - `build.pin = 1` means a subset of the hot portion of the index is cached in memory, consuming less memory.
+    - `build.pin = 0` means that this feature is enabled but nothing is actually cached. This option is for debugging purposes only.
+    - `build.pin = -1` means that this feature is disabled.
+    - `build.pin = false` is the legacy form of `build.pin = -1`.
+    - `build.pin = true` is the legacy form of `build.pin = 1`.
 
 ### Default Build Options <badge type="tip" text="since v0.5.3" />
 
@@ -164,7 +185,7 @@ This is the default value of index building. The index will not be partitioned.
 
 #### `build.internal.lists`
 
-- Description: This index parameter determines the hierarchical structure of the vector space partitioning.
+- Description: This option determines the hierarchical structure of the vector space partitioning.
 - Type: list of integers
 - Default:
     - `[]` <badge type="tip" text="since v0.3.0" />
@@ -173,11 +194,11 @@ This is the default value of index building. The index will not be partitioned.
     - `build.internal.lists = []` means that the vector space is not partitioned.
     - `build.internal.lists = [4096]` means the vector space is divided into $4096$ cells.
     - `build.internal.lists = [4096, 262144]` means the vector space is divided into $4096$ cells, and those cells are further divided into $262144$ smaller cells.
-- Note: The index partitions the vector space into multiple Voronoi cells based on centroids, iteratively creating a hierarchical space partition tree. Each leaf node in this tree represents a region with an associated list storing vectors in that region. During insertion, vectors are placed in lists corresponding to their appropriate leaf nodes. For queries, the index optimizes search by excluding lists whose leaf nodes are distant from the query vector, effectively pruning the search space. If the length of `lists` is $1$, the `lists` option should be no less than $4 * \sqrt{N}$, where $N$ is the number of vectors in the table.
+- Note: The index partitions the vector space into multiple Voronoi cells based on centroids, iteratively creating a hierarchical space partition tree. Each leaf node in this tree represents a region with an associated list storing vectors in that region. During insertion, vectors are placed in lists corresponding to their appropriate leaf nodes. For queries, the index optimizes search by excluding lists whose leaf nodes are distant from the query vector, effectively pruning the search space. If the length of `lists` is $1$, the `lists` option should be no less than $4\sqrt{N}$, where $N$ is the number of vectors in the table.
 
 #### `build.internal.spherical_centroids`
 
-- Description: This index parameter determines whether to perform spherical K-means -- the centroids are L2 normalized after each iteration, you can refer to option `spherical` in [here](https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization#additional-options).
+- Description: This option determines whether to perform spherical K-means -- the centroids are L2 normalized after each iteration, you can refer to option `spherical` in [here](https://github.com/facebookresearch/faiss/wiki/Faiss-building-blocks:-clustering,-PCA,-quantization#additional-options).
 - Type: boolean
 - Default: `false`
 - Example:
@@ -187,17 +208,17 @@ This is the default value of index building. The index will not be partitioned.
 
 #### `build.internal.sampling_factor` <badge type="tip" text="since v0.2.0" />
 
-- Description: This index parameter determines the number of vectors the K-means algorithm samples per cluster. The higher this value, the slower the build, the greater the memory consumption in building, and the better search performance.
+- Description: This option determines the number of vectors the K-means algorithm samples per cluster. The higher this value, the slower the build, the greater the memory consumption in building, and the better search performance.
 - Type: integer
 - Domain: `[0, 1024]`
 - Default: `256`
 - Example:
     - `build.internal.sampling_factor = 256` means that the K-means algorithm samples $256 C$ vectors, where $C$ is the maximum value in `build.internal.lists`.
-    - `build.internal.sampling_factor = 1024` means that the K-means algorithm samples $1024 C$ vectors, where $C$ is the maximum value in `build.internal.lists`.
+    - `build.internal.sampling_factor = 32` means that the K-means algorithm samples $32 C$ vectors, where $C$ is the maximum value in `build.internal.lists`. This reduces K-means' time and memory usage to approximately $\frac{1}{8}$ of what it would be with the default value of `256`.
 
 #### `build.internal.kmeans_iterations` <badge type="tip" text="since v0.2.2" />
 
-- Description: This index parameter determines the number of iterations for K-means algorithm. The higher this value, the slower the build.
+- Description: This option determines the number of iterations for K-means algorithm. The higher this value, the slower the build.
 - Type: integer
 - Domain: `[0, 1024]`
 - Default: `10`
@@ -207,14 +228,31 @@ This is the default value of index building. The index will not be partitioned.
 
 #### `build.internal.build_threads`
 
-- Description: This index parameter determines the number of threads used by K-means algorithm. The higher this value, the faster the build, and greater load on the server in building.
+- Description: This option determines the number of threads used by K-means algorithm. The higher this value, the faster the build, and greater load on the server in building.
 - Type: integer
 - Domain: `[1, 255]`
 - Default: `1`
 - Example:
     - `build.internal.build_threads = 1` means that the K-means algorithm uses $1$ thread.
     - `build.internal.build_threads = 4` means that the K-means algorithm uses $4$ threads.
 
+#### `build.internal.kmeans_algorithm` <badge type="tip" text="since v1.0.0" />
+
+- Description: This option determines the K-means algorithm to be used.
+- Type: object
+- Example:
+    - `build.internal.kmeans_algorithm.lloyd = {}`. This uses Lloyd's algorithm. This is the default value.
+    - `build.internal.kmeans_algorithm.hierarchical = {}`. This uses hierarchical clustering. Compared to Lloyd's algorithm, this approach is much faster, but it may cause a loss of accuracy.
+
+#### `build.internal.kmeans_dimension` <badge type="tip" text="since v1.0.0" />
+
+- Description: This option determines the dimension to use for K-means input and output. This feature employs dimensionality reduction and expansion via resampling, effectively reducing K-means' time and memory consumption, but it may cause a loss of accuracy.
+- Type: union of integer and null
+- Default: null
+- Example:
+    - If this option is not set, this feature is disabled.
+    - `build.internal.kmeans_dimension = 100` means that K-means will process vectors with $100$ dimensions. For original vectors of $900$ dimensions, this reduces K-means' time and memory usage to approximately $\frac{1}{9}$ of what it would be without this feature.
+
 ### Search Parameters <badge type="info" text="vchordrq" /> {#search-parameters}
 
 #### `vchordrq.enable_scan` <badge type="tip" text="since v0.5.0" />
@@ -250,7 +288,7 @@ This is the default value of index building. The index will not be partitioned.
     - `SET vchordrq.epsilon = 0.1` indicates you are using a very optimistic lower bound estimation. You set it this way because your dataset is not sensitive to the lower bound estimation, for the precision you need.
     - `SET vchordrq.epsilon = 4.0` indicates you are using a very pessimistic lower bound estimation. You set it this way because your dataset is not very sensitive to the lower bound estimation, for the precision you need.
 
-#### `vchordrq.prewarm_dim` <badge type="danger" text="deprecated in v0.4.0" />	
+#### `vchordrq.prewarm_dim` <badge type="danger" text="removed in v0.4.0" />	
 
 - Description: The `vchordrq.prewarm_dim` GUC parameter is used to precompute the RaBitQ projection matrix for the specified dimensions. This can help to reduce the latency of the first query after the PostgreSQL cluster is started.
 - Type: list of integers
diff --git a/src/vectorchord/usage/measure-recall.md b/src/vectorchord/usage/measure-recall.md
@@ -1,4 +1,4 @@
-# Measure Recall
+# Measure Recall <badge type="tip" text="since v0.5.0" />
 
 In the context of vector search, recall is the ratio of true nearest neighbors to approximate nearest neighbors returned by the index. For example, if the index retrieves $100$ approximate nearest neighbors and $97$ of them are true nearest neighbors, then the recall is $\frac{97}{100} = 0.97$.
 
diff --git a/src/vectorchord/usage/rerank-in-table.md b/src/vectorchord/usage/rerank-in-table.md

Original file line number	Diff line number	Diff line change
`@@ -199,7 +199,7 @@ export default defineConfig({`
`199`	`199`	`{ text: 'Prewarm', link: '/vectorchord/usage/prewarm' },`
`200`	`200`	`{ text: 'Prefilter', link: '/vectorchord/usage/prefilter' },`
`201`	`201`	`{ text: 'Prefetch', link: '/vectorchord/usage/prefetch' },`
`202`		`- { text: 'Rerank In Table', link: '/vectorchord/usage/rerank-in-table' },`
	`202`	`+ { text: 'Rerank in Table', link: '/vectorchord/usage/rerank-in-table' },`
`203`	`203`	`{ text: 'External Build', link: '/vectorchord/usage/external-index-precomputation' },`
`204`	`204`	`]`
`205`	`205`	`},`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-# Measure Recall`
	`1`	`+# Measure Recall <badge type="tip" text="since v0.5.0" />`
`2`	`2`
`3`	`3`	`In the context of vector search, recall is the ratio of true nearest neighbors to approximate nearest neighbors returned by the index. For example, if the index retrieves $100$ approximate nearest neighbors and $97$ of them are true nearest neighbors, then the recall is $\frac{97}{100} = 0.97$.`
`4`	`4`