tensorchord
diff --git a/‎Cargo.lock‎
Lines changed: 343 additions & 360 deletions b/‎Cargo.lock‎
Lines changed: 343 additions & 360 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 19 additions & 11 deletions b/‎Cargo.toml‎
Lines changed: 19 additions & 11 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 7 deletions b/‎README.md‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎crates/bm25/Cargo.toml‎
Lines changed: 6 additions & 3 deletions b/‎crates/bm25/Cargo.toml‎
Lines changed: 6 additions & 3 deletions
@@ -22,20 +22,18 @@ pg18 = ["pgrx/pg18"]
 [dependencies]
 bm25 = { path = "./crates/bm25" }
 
-arrayvec = "0.7.6"
-bitflags = "2.10.0"
-bitpacking = { version = "0.9.3", default-features = false, features = [
-    "bitpacker4x",
-] }
-bytemuck = "1.25.0"
-generator = "0.8.8"
-lending-iterator = "0.1.7"
+always_equal.workspace = true
+bumpalo = "3.20.2"
+index.workspace = true
+ordered-float.workspace = true
 pgrx = "=0.17.0"
+pgrx-catalog = "0.3.2"
 serde.workspace = true
-thiserror = "2.0.18"
+toml = "1.0.6"
+validator.workspace = true
 
-[dev-dependencies]
-rand.workspace = true
+[target.'cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), any(target_os = "linux", target_os = "macos")))'.dependencies]
+mimalloc = { version = "0.1.48", features = ["local_dynamic_tls"] }
 
 [lints]
 workspace = true
@@ -49,12 +47,22 @@ version = "0.0.0"
 edition = "2024"
 
 [workspace.dependencies]
+always_equal = { git = "https://github.com/usamoi/VectorChord.git" }
+index = { git = "https://github.com/usamoi/VectorChord.git" }
+ordered-float = { version = "5.1.0", default-features = false }
 rand = "0.10.0"
 serde = { version = "1.0.228", features = ["derive"] }
+validator = { version = "0.20.0", features = ["derive"] }
+zerocopy = { version = "0.8.42", features = ["derive"] }
 
 [workspace.lints]
 # complexity
+clippy.manual_is_multiple_of = "allow"
 clippy.too_many_arguments = "allow"
+clippy.type_complexity = "allow"
+# style
+clippy.collapsible_if = "allow"
+clippy.needless_range_loop = "allow"
 # unsafe
 rust.unsafe_code = "deny"
 rust.unsafe_op_in_unsafe_fn = "deny"
 
@@ -94,14 +94,14 @@ CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_op
 Now we can calculate the BM25 score between the query and the vectors. Note that the BM25 score in VectorChord-BM25 is negative, which means the more negative the score, the more relevant the document is. We intentionally make it negative so that you can use the default order by to get the most relevant documents first.
 
 ```sql
--- to_bm25query(index_name, query, tokenizer_name)
+-- bm25query(index_name, query, tokenizer_name)
 -- <&> is the operator to compute the bm25 score
-SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS bm25_score FROM documents;
+SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS bm25_score FROM documents;
 ```
 
 And you can use the order by to utilize the index to get the most relevant documents first and faster.
 ```sql
-SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS rank
+SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS rank
 FROM documents
 ORDER BY rank
 LIMIT 10;
@@ -163,7 +163,7 @@ INSERT INTO documents (passage) VALUES
 
 CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_ops);
 
-SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'tokenizer1')) AS rank
+SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'tokenizer1')) AS rank
 FROM documents
 ORDER BY rank
 LIMIT 10;
@@ -217,7 +217,7 @@ INSERT INTO documents (passage) VALUES
 
 CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_ops);
 
-SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('人', 'tokenizer1')) AS rank
+SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('人', 'tokenizer1')) AS rank
 FROM documents
 ORDER BY rank
 LIMIT 10;
@@ -335,7 +335,7 @@ UPDATE documents SET embedding = tokenize(passage, 'lindera_ipadic');
 
 CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_ops);
 
-SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('書生', 'lindera_ipadic')) AS rank
+SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('書生', 'lindera_ipadic')) AS rank
 FROM documents
 ORDER BY rank
 LIMIT 10;
@@ -447,7 +447,7 @@ In contrast, Vectorchord-bm25 focuses exclusively on BM25 ranking within Postgre
 
 ### Functions
 
-- `to_bm25query(index_name regclass, query_vector bm25vector) RETURNS bm25query`: Convert the input text into a BM25 query.
+- `bm25query(regclass, bm25vector) RETURNS bm25query`: Convert the input text into a BM25 query.
 
 ### Operators
 
 
@@ -5,12 +5,15 @@ edition.workspace = true
 publish = false
 
 [dependencies]
+always_equal.workspace = true
 bitpacking = { version = "0.9.3", default-features = false, features = [
     "bitpacker4x",
 ] }
-
-[dev-dependencies]
-rand.workspace = true
+index.workspace = true
+ordered-float.workspace = true
+serde.workspace = true
+validator.workspace = true
+zerocopy.workspace = true
 
 [lints]
 workspace = true