tensorchord
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Makefile‎
Lines changed: 3 additions & 0 deletions b/‎Makefile‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 1 deletion b/‎README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎build.envd‎
Lines changed: 1 addition & 3 deletions b/‎build.envd‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎examples/basic.py‎
Lines changed: 0 additions & 24 deletions b/‎examples/basic.py‎
Lines changed: 0 additions & 24 deletions
diff --git a/‎examples/decorator.py‎ ‎examples/contextual.py‎examples/decorator.py renamed to examples/contextual.py
Lines changed: 7 additions & 1 deletion b/‎examples/decorator.py‎ ‎examples/contextual.py‎examples/decorator.py renamed to examples/contextual.py
Lines changed: 7 additions & 1 deletion
diff --git a/‎examples/essay.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/essay.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/gemini.py‎
Lines changed: 0 additions & 30 deletions b/‎examples/gemini.py‎
Lines changed: 0 additions & 30 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 21 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 21 additions & 2 deletions
@@ -173,6 +173,7 @@ cython_debug/
 
 # test data
 data/
+datasets/
 *.txt
 *.jpeg
 
 
@@ -3,6 +3,9 @@ PY_SOURCE=.
 lint:
 	@uv run ruff check ${PY_SOURCE}
 
+typecheck:
+	@uv run -- mypy --non-interactive --install-types ${PY_SOURCE}
+
 format:
 	@uv run ruff check --fix ${PY_SOURCE}
 	@uv run ruff format ${PY_SOURCE}
 
@@ -35,11 +35,14 @@ timeline
 ## Examples
 
 - [beir.py](./examples/beir.py): the most flexible way to use the library (loading, indexing, querying and evaluation)
+- [web.py](./examples/web.py): build a web application with from the defined tables and pipeline
+- [essay.py](./examples/essay.py): extract the content from Paul Graham's essays and evaluate the search results from LLM generated queries
+- [contextual.py](./examples/contextual.py): contextual retrieval example
 
 ## Development
 
 ```bash
-docker run --rm -d -e POSTGRES_PASSWORD=postgres -p 5432:5432 tensorchord/vchord-postgres:pg17-v0.2.0
+docker run --rm -d --name vechord -e POSTGRES_PASSWORD=postgres -p 5432:5432 tensorchord/vchord-postgres:pg17-v0.2.1
 envd up
 # inside the envd env, sync all the dependencies
 make sync
 
@@ -3,7 +3,5 @@
 
 def build():
     base(dev=True, image="ubuntu:22.04")
-    install.conda(use_mamba=True)
-    install.python()
-    install.python_packages(name=["uv"])
+    install.uv()
     shell("fish")
@@ -84,13 +84,16 @@ def context_embedding(uid: int, text: str) -> list[ContextChunk]:
         for (context, origin) in zip(
             augmentor.augment_context([c.text for c in chunks]),
             [c.text for c in chunks],
+            strict=False,
         )
     ]
     return [
         ContextChunk(
             chunk_uid=chunk_uid, text=augmented, vector=dense.vectorize_chunk(augmented)
         )
-        for (chunk_uid, augmented) in zip([c.uid for c in chunks], context_chunks)
+        for (chunk_uid, augmented) in zip(
+            [c.uid for c in chunks], context_chunks, strict=False
+        )
     ]
 
 
@@ -139,4 +142,7 @@ def evaluate(uid: int, doc_uid: int, text: str):
     scores = evaluate()
     print(sum(scores) / len(scores))
 
+    chunks = query_context_chunk("vector search")
+    print(chunks)
+
     vr.clear_storage()
@@ -83,7 +83,7 @@ def create_query(uid: int, text: str) -> Query:
 
 
 @vr.inject(input=Query)
-def evalute(cid: int, vector: Vector[768]) -> Evaluation:
+def evaluate(cid: int, vector: Vector[768]) -> Evaluation:
     chunks: list[Chunk] = vr.search(Chunk, vector, topk=TOP_K)
     score = evaluator.evaluate_one(cid, [chunk.uid for chunk in chunks])
     return Evaluation(
@@ -95,6 +95,6 @@ def evalute(cid: int, vector: Vector[768]) -> Evaluation:
     segment_essay()
     create_query()
 
-    res: list[Evaluation] = evalute()
+    res: list[Evaluation] = evaluate()
     print("ndcg", sum(r.ndcg for r in res) / len(res))
     print(f"recall@{TOP_K}", sum(r.recall for r in res) / len(res))
@@ -3,7 +3,7 @@ name = "vechord"
 dynamic = ["version"]
 description = "VectorChord Python SDK"
 readme = "README.md"
-requires-python = ">=3.9"
+requires-python = ">=3.10"
 dependencies = [
     "falcon>=4.0.2",
     "httpx>=0.28.1",
@@ -41,13 +41,14 @@ build-backend = "pdm.backend"
 
 [dependency-groups]
 dev = [
+    "mypy>=1.15.0",
     "pdm-backend>=2.4.3",
     "pytest>=8.3.5",
     "ruff>=0.9.1",
 ]
 
 [tool.ruff]
-target-version = "py39"
+target-version = "py310"
 [tool.ruff.lint]
 select = ["E", "F", "G", "B", "I", "SIM", "TID", "PL", "RUF"]
 ignore = ["E501"]
@@ -56,6 +57,21 @@ known-first-party = ["vechord"]
 [tool.ruff.lint.pylint]
 max-args = 5
 
+[tool.mypy]
+python_version = "3.10"
+warn_redundant_casts = true
+warn_unreachable = true
+pretty = true
+
+[[tool.mypy.overrides]]
+module = [
+  "pgvector.psycopg",
+  "pytrec_eval",
+  "pypdfium2",
+  "wordllama",
+]
+ignore_missing_imports = true
+
 [tool.pdm]
 distribution = true
 [tool.pdm.version]
@@ -64,5 +80,8 @@ fallback_version = "0.0.0"
 write_to = "vechord/__version__.py"
 write_template = "__version__ = '{}'"
 
+[tool.typos.default.extend-words]
+typ = "typ"
+
 [tool.uv.sources]
 en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
Original file line number	Diff line number	Diff line change
`@@ -84,13 +84,16 @@ def context_embedding(uid: int, text: str) -> list[ContextChunk]:`
`84`	`84`	`for (context, origin) in zip(`
`85`	`85`	`augmentor.augment_context([c.text for c in chunks]),`
`86`	`86`	`[c.text for c in chunks],`
	`87`	`+ strict=False,`
`87`	`88`	`)`
`88`	`89`	`]`
`89`	`90`	`return [`
`90`	`91`	`ContextChunk(`
`91`	`92`	`chunk_uid=chunk_uid, text=augmented, vector=dense.vectorize_chunk(augmented)`
`92`	`93`	`)`
`93`		`- for (chunk_uid, augmented) in zip([c.uid for c in chunks], context_chunks)`
	`94`	`+ for (chunk_uid, augmented) in zip(`
	`95`	`+ [c.uid for c in chunks], context_chunks, strict=False`
	`96`	`+ )`
`94`	`97`	`]`
`95`	`98`
`96`	`99`
`@@ -139,4 +142,7 @@ def evaluate(uid: int, doc_uid: int, text: str):`
`139`	`142`	`scores = evaluate()`
`140`	`143`	`print(sum(scores) / len(scores))`
`141`	`144`
	`145`	`+ chunks = query_context_chunk("vector search")`
	`146`	`+ print(chunks)`
	`147`	`+`
`142`	`148`	`vr.clear_storage()`