Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ cython_debug/

# test data
data/
datasets/
*.txt
*.jpeg

Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ PY_SOURCE=.
lint:
@uv run ruff check ${PY_SOURCE}

typecheck:
@uv run -- mypy --non-interactive --install-types ${PY_SOURCE}

format:
@uv run ruff check --fix ${PY_SOURCE}
@uv run ruff format ${PY_SOURCE}
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,14 @@ timeline
## Examples

- [beir.py](./examples/beir.py): the most flexible way to use the library (loading, indexing, querying and evaluation)
- [web.py](./examples/web.py): build a web application with from the defined tables and pipeline
- [essay.py](./examples/essay.py): extract the content from Paul Graham's essays and evaluate the search results from LLM generated queries
- [contextual.py](./examples/contextual.py): contextual retrieval example

## Development

```bash
docker run --rm -d -e POSTGRES_PASSWORD=postgres -p 5432:5432 tensorchord/vchord-postgres:pg17-v0.2.0
docker run --rm -d --name vechord -e POSTGRES_PASSWORD=postgres -p 5432:5432 tensorchord/vchord-postgres:pg17-v0.2.1
envd up
# inside the envd env, sync all the dependencies
make sync
Expand Down
4 changes: 1 addition & 3 deletions build.envd
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,5 @@

def build():
base(dev=True, image="ubuntu:22.04")
install.conda(use_mamba=True)
install.python()
install.python_packages(name=["uv"])
install.uv()
shell("fish")
24 changes: 0 additions & 24 deletions examples/basic.py

This file was deleted.

8 changes: 7 additions & 1 deletion examples/decorator.py → examples/contextual.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,16 @@ def context_embedding(uid: int, text: str) -> list[ContextChunk]:
for (context, origin) in zip(
augmentor.augment_context([c.text for c in chunks]),
[c.text for c in chunks],
strict=False,
)
]
return [
ContextChunk(
chunk_uid=chunk_uid, text=augmented, vector=dense.vectorize_chunk(augmented)
)
for (chunk_uid, augmented) in zip([c.uid for c in chunks], context_chunks)
for (chunk_uid, augmented) in zip(
[c.uid for c in chunks], context_chunks, strict=False
)
]


Expand Down Expand Up @@ -139,4 +142,7 @@ def evaluate(uid: int, doc_uid: int, text: str):
scores = evaluate()
print(sum(scores) / len(scores))

chunks = query_context_chunk("vector search")
print(chunks)

vr.clear_storage()
4 changes: 2 additions & 2 deletions examples/essay.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def create_query(uid: int, text: str) -> Query:


@vr.inject(input=Query)
def evalute(cid: int, vector: Vector[768]) -> Evaluation:
def evaluate(cid: int, vector: Vector[768]) -> Evaluation:
chunks: list[Chunk] = vr.search(Chunk, vector, topk=TOP_K)
score = evaluator.evaluate_one(cid, [chunk.uid for chunk in chunks])
return Evaluation(
Expand All @@ -95,6 +95,6 @@ def evalute(cid: int, vector: Vector[768]) -> Evaluation:
segment_essay()
create_query()

res: list[Evaluation] = evalute()
res: list[Evaluation] = evaluate()
print("ndcg", sum(r.ndcg for r in res) / len(res))
print(f"recall@{TOP_K}", sum(r.recall for r in res) / len(res))
30 changes: 0 additions & 30 deletions examples/gemini.py

This file was deleted.

23 changes: 21 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "vechord"
dynamic = ["version"]
description = "VectorChord Python SDK"
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
dependencies = [
"falcon>=4.0.2",
"httpx>=0.28.1",
Expand Down Expand Up @@ -41,13 +41,14 @@ build-backend = "pdm.backend"

[dependency-groups]
dev = [
"mypy>=1.15.0",
"pdm-backend>=2.4.3",
"pytest>=8.3.5",
"ruff>=0.9.1",
]

[tool.ruff]
target-version = "py39"
target-version = "py310"
[tool.ruff.lint]
select = ["E", "F", "G", "B", "I", "SIM", "TID", "PL", "RUF"]
ignore = ["E501"]
Expand All @@ -56,6 +57,21 @@ known-first-party = ["vechord"]
[tool.ruff.lint.pylint]
max-args = 5

[tool.mypy]
python_version = "3.10"
warn_redundant_casts = true
warn_unreachable = true
pretty = true

[[tool.mypy.overrides]]
module = [
"pgvector.psycopg",
"pytrec_eval",
"pypdfium2",
"wordllama",
]
ignore_missing_imports = true

[tool.pdm]
distribution = true
[tool.pdm.version]
Expand All @@ -64,5 +80,8 @@ fallback_version = "0.0.0"
write_to = "vechord/__version__.py"
write_template = "__version__ = '{}'"

[tool.typos.default.extend-words]
typ = "typ"

[tool.uv.sources]
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
Loading