Skip to content

Commit 3e08af3

Browse files
authored
feat: safe sql (#8)
* - remove legacy code - support jsonb for pg, fix datetime to timestamptz - use psycopg string composition to make it safe - fix some type errors Signed-off-by: Keming <kemingyang@tensorchord.ai> * fix typo Signed-off-by: Keming <kemingyang@tensorchord.ai> --------- Signed-off-by: Keming <kemingyang@tensorchord.ai>
1 parent 4af7006 commit 3e08af3

22 files changed

+224
-847
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,7 @@ cython_debug/
173173

174174
# test data
175175
data/
176+
datasets/
176177
*.txt
177178
*.jpeg
178179

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ PY_SOURCE=.
33
lint:
44
@uv run ruff check ${PY_SOURCE}
55

6+
typecheck:
7+
@uv run -- mypy --non-interactive --install-types ${PY_SOURCE}
8+
69
format:
710
@uv run ruff check --fix ${PY_SOURCE}
811
@uv run ruff format ${PY_SOURCE}

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,14 @@ timeline
3535
## Examples
3636

3737
- [beir.py](./examples/beir.py): the most flexible way to use the library (loading, indexing, querying and evaluation)
38+
- [web.py](./examples/web.py): build a web application with from the defined tables and pipeline
39+
- [essay.py](./examples/essay.py): extract the content from Paul Graham's essays and evaluate the search results from LLM generated queries
40+
- [contextual.py](./examples/contextual.py): contextual retrieval example
3841

3942
## Development
4043

4144
```bash
42-
docker run --rm -d -e POSTGRES_PASSWORD=postgres -p 5432:5432 tensorchord/vchord-postgres:pg17-v0.2.0
45+
docker run --rm -d --name vechord -e POSTGRES_PASSWORD=postgres -p 5432:5432 tensorchord/vchord-postgres:pg17-v0.2.1
4346
envd up
4447
# inside the envd env, sync all the dependencies
4548
make sync

build.envd

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,5 @@
33

44
def build():
55
base(dev=True, image="ubuntu:22.04")
6-
install.conda(use_mamba=True)
7-
install.python()
8-
install.python_packages(name=["uv"])
6+
install.uv()
97
shell("fish")

examples/basic.py

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,16 @@ def context_embedding(uid: int, text: str) -> list[ContextChunk]:
8484
for (context, origin) in zip(
8585
augmentor.augment_context([c.text for c in chunks]),
8686
[c.text for c in chunks],
87+
strict=False,
8788
)
8889
]
8990
return [
9091
ContextChunk(
9192
chunk_uid=chunk_uid, text=augmented, vector=dense.vectorize_chunk(augmented)
9293
)
93-
for (chunk_uid, augmented) in zip([c.uid for c in chunks], context_chunks)
94+
for (chunk_uid, augmented) in zip(
95+
[c.uid for c in chunks], context_chunks, strict=False
96+
)
9497
]
9598

9699

@@ -139,4 +142,7 @@ def evaluate(uid: int, doc_uid: int, text: str):
139142
scores = evaluate()
140143
print(sum(scores) / len(scores))
141144

145+
chunks = query_context_chunk("vector search")
146+
print(chunks)
147+
142148
vr.clear_storage()

examples/essay.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def create_query(uid: int, text: str) -> Query:
8383

8484

8585
@vr.inject(input=Query)
86-
def evalute(cid: int, vector: Vector[768]) -> Evaluation:
86+
def evaluate(cid: int, vector: Vector[768]) -> Evaluation:
8787
chunks: list[Chunk] = vr.search(Chunk, vector, topk=TOP_K)
8888
score = evaluator.evaluate_one(cid, [chunk.uid for chunk in chunks])
8989
return Evaluation(
@@ -95,6 +95,6 @@ def evalute(cid: int, vector: Vector[768]) -> Evaluation:
9595
segment_essay()
9696
create_query()
9797

98-
res: list[Evaluation] = evalute()
98+
res: list[Evaluation] = evaluate()
9999
print("ndcg", sum(r.ndcg for r in res) / len(res))
100100
print(f"recall@{TOP_K}", sum(r.recall for r in res) / len(res))

examples/gemini.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

pyproject.toml

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ name = "vechord"
33
dynamic = ["version"]
44
description = "VectorChord Python SDK"
55
readme = "README.md"
6-
requires-python = ">=3.9"
6+
requires-python = ">=3.10"
77
dependencies = [
88
"falcon>=4.0.2",
99
"httpx>=0.28.1",
@@ -41,13 +41,14 @@ build-backend = "pdm.backend"
4141

4242
[dependency-groups]
4343
dev = [
44+
"mypy>=1.15.0",
4445
"pdm-backend>=2.4.3",
4546
"pytest>=8.3.5",
4647
"ruff>=0.9.1",
4748
]
4849

4950
[tool.ruff]
50-
target-version = "py39"
51+
target-version = "py310"
5152
[tool.ruff.lint]
5253
select = ["E", "F", "G", "B", "I", "SIM", "TID", "PL", "RUF"]
5354
ignore = ["E501"]
@@ -56,6 +57,21 @@ known-first-party = ["vechord"]
5657
[tool.ruff.lint.pylint]
5758
max-args = 5
5859

60+
[tool.mypy]
61+
python_version = "3.10"
62+
warn_redundant_casts = true
63+
warn_unreachable = true
64+
pretty = true
65+
66+
[[tool.mypy.overrides]]
67+
module = [
68+
"pgvector.psycopg",
69+
"pytrec_eval",
70+
"pypdfium2",
71+
"wordllama",
72+
]
73+
ignore_missing_imports = true
74+
5975
[tool.pdm]
6076
distribution = true
6177
[tool.pdm.version]
@@ -64,5 +80,8 @@ fallback_version = "0.0.0"
6480
write_to = "vechord/__version__.py"
6581
write_template = "__version__ = '{}'"
6682

83+
[tool.typos.default.extend-words]
84+
typ = "typ"
85+
6786
[tool.uv.sources]
6887
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }

0 commit comments

Comments
 (0)