Skip to content

Commit b1ca778

Browse files
committed
refactor: rewrite, version 1
Signed-off-by: usamoi <usamoi@outlook.com>
1 parent 3348361 commit b1ca778

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+5017
-7733
lines changed

Cargo.lock

Lines changed: 343 additions & 360 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,20 +22,18 @@ pg18 = ["pgrx/pg18"]
2222
[dependencies]
2323
bm25 = { path = "./crates/bm25" }
2424

25-
arrayvec = "0.7.6"
26-
bitflags = "2.10.0"
27-
bitpacking = { version = "0.9.3", default-features = false, features = [
28-
"bitpacker4x",
29-
] }
30-
bytemuck = "1.25.0"
31-
generator = "0.8.8"
32-
lending-iterator = "0.1.7"
25+
always_equal.workspace = true
26+
bumpalo = "3.20.2"
27+
index.workspace = true
28+
ordered-float.workspace = true
3329
pgrx = "=0.17.0"
30+
pgrx-catalog = "0.3.2"
3431
serde.workspace = true
35-
thiserror = "2.0.18"
32+
toml = "1.0.6"
33+
validator.workspace = true
3634

37-
[dev-dependencies]
38-
rand.workspace = true
35+
[target.'cfg(all(any(target_arch = "x86_64", target_arch = "aarch64"), any(target_os = "linux", target_os = "macos")))'.dependencies]
36+
mimalloc = { version = "0.1.48", features = ["local_dynamic_tls"] }
3937

4038
[lints]
4139
workspace = true
@@ -49,12 +47,22 @@ version = "0.0.0"
4947
edition = "2024"
5048

5149
[workspace.dependencies]
50+
always_equal = { git = "https://github.com/usamoi/VectorChord.git" }
51+
index = { git = "https://github.com/usamoi/VectorChord.git" }
52+
ordered-float = { version = "5.1.0", default-features = false }
5253
rand = "0.10.0"
5354
serde = { version = "1.0.228", features = ["derive"] }
55+
validator = { version = "0.20.0", features = ["derive"] }
56+
zerocopy = { version = "0.8.42", features = ["derive"] }
5457

5558
[workspace.lints]
5659
# complexity
60+
clippy.manual_is_multiple_of = "allow"
5761
clippy.too_many_arguments = "allow"
62+
clippy.type_complexity = "allow"
63+
# style
64+
clippy.collapsible_if = "allow"
65+
clippy.needless_range_loop = "allow"
5866
# unsafe
5967
rust.unsafe_code = "deny"
6068
rust.unsafe_op_in_unsafe_fn = "deny"

README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_op
9494
Now we can calculate the BM25 score between the query and the vectors. Note that the BM25 score in VectorChord-BM25 is negative, which means the more negative the score, the more relevant the document is. We intentionally make it negative so that you can use the default order by to get the most relevant documents first.
9595

9696
```sql
97-
-- to_bm25query(index_name, query, tokenizer_name)
97+
-- bm25query(index_name, query, tokenizer_name)
9898
-- <&> is the operator to compute the bm25 score
99-
SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS bm25_score FROM documents;
99+
SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS bm25_score FROM documents;
100100
```
101101

102102
And you can use the order by to utilize the index to get the most relevant documents first and faster.
103103
```sql
104-
SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS rank
104+
SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'bert')) AS rank
105105
FROM documents
106106
ORDER BY rank
107107
LIMIT 10;
@@ -163,7 +163,7 @@ INSERT INTO documents (passage) VALUES
163163

164164
CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_ops);
165165

166-
SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'tokenizer1')) AS rank
166+
SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('PostgreSQL', 'tokenizer1')) AS rank
167167
FROM documents
168168
ORDER BY rank
169169
LIMIT 10;
@@ -217,7 +217,7 @@ INSERT INTO documents (passage) VALUES
217217

218218
CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_ops);
219219

220-
SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('', 'tokenizer1')) AS rank
220+
SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('', 'tokenizer1')) AS rank
221221
FROM documents
222222
ORDER BY rank
223223
LIMIT 10;
@@ -335,7 +335,7 @@ UPDATE documents SET embedding = tokenize(passage, 'lindera_ipadic');
335335

336336
CREATE INDEX documents_embedding_bm25 ON documents USING bm25 (embedding bm25_ops);
337337

338-
SELECT id, passage, embedding <&> to_bm25query('documents_embedding_bm25', tokenize('書生', 'lindera_ipadic')) AS rank
338+
SELECT id, passage, embedding <&> bm25query('documents_embedding_bm25', tokenize('書生', 'lindera_ipadic')) AS rank
339339
FROM documents
340340
ORDER BY rank
341341
LIMIT 10;
@@ -447,7 +447,7 @@ In contrast, Vectorchord-bm25 focuses exclusively on BM25 ranking within Postgre
447447

448448
### Functions
449449

450-
- `to_bm25query(index_name regclass, query_vector bm25vector) RETURNS bm25query`: Convert the input text into a BM25 query.
450+
- `bm25query(regclass, bm25vector) RETURNS bm25query`: Convert the input text into a BM25 query.
451451

452452
### Operators
453453

crates/bm25/Cargo.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,15 @@ edition.workspace = true
55
publish = false
66

77
[dependencies]
8+
always_equal.workspace = true
89
bitpacking = { version = "0.9.3", default-features = false, features = [
910
"bitpacker4x",
1011
] }
11-
12-
[dev-dependencies]
13-
rand.workspace = true
12+
index.workspace = true
13+
ordered-float.workspace = true
14+
serde.workspace = true
15+
validator.workspace = true
16+
zerocopy.workspace = true
1417

1518
[lints]
1619
workspace = true

0 commit comments

Comments
 (0)