Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,20 @@ jobs:
- name: Lint
run: make lint
- name: Test
run: make test
env:
PYTEST_ADDOPTS: -s
run: |
docker run --rm -d -p 5432:5432 --name vdb -e POSTGRES_PASSWORD=postgres --health-cmd="pg_isready -U postgres" --health-interval=1s --health-timeout=1s --health-retries=5 ghcr.io/tensorchord/vchord_bm25-postgres:pg17-v0.1.1

# Wait for the container to be healthy
for i in {1..10}; do
if [ "$(docker inspect --format='{{.State.Health.Status}}' vdb)" == "healthy" ]; then
echo "Database container is healthy."
break
fi
echo "Waiting for database container to become healthy... ($i/10)"
sleep 1
done

make test
docker stop vdb
66 changes: 66 additions & 0 deletions .github/workflows/pages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: Pages

on:
pull_request:
paths:
- 'vechord/**'
- 'docs/**'
- '.github/workflows/pages.yml'
- 'examples/**'
- '**.md'
push:
branches: [ main ]
paths:
- 'vechord/**'
- 'docs/**'
- '.github/workflows/pages.yml'
- 'examples/**'
- '**.md'
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

concurrency:
group: ${{ github.ref }}-${{ github.workflow }}
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
python-version: "3.12"
- name: Set up Rust
uses: dtolnay/rust-toolchain@stable
- name: Install dependencies
run: |
make sync
- name: Generate docs
run: |
cd docs && make html
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
# Upload entire repository
path: 'docs/build/html'

deploy:
runs-on: ubuntu-latest
needs: build
if: ${{ github.event_name == 'push' }}
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
14 changes: 10 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
PY_SOURCE=.

lint:
@uv run ruff check ${PY_SOURCE}
@uv run -- ruff check ${PY_SOURCE}

typecheck:
@uv run -- mypy --non-interactive --install-types ${PY_SOURCE}

format:
@uv run ruff check --fix ${PY_SOURCE}
@uv run ruff format ${PY_SOURCE}
@uv run -- ruff check --fix ${PY_SOURCE}
@uv run -- ruff format ${PY_SOURCE}

clean:
@-rm -rf dist build */__pycache__ *.egg-info vechord/__version__.py
Expand All @@ -20,7 +20,13 @@ publish: build
@uv publish

test:
@uv run pytest -v tests
@uv run -- pytest -v tests

sync:
@uv sync --all-extras --all-groups

doc:
@cd docs && make html && cd ..
@uv run -m http.server -d docs/build/html -b 127.0.0.1 8000

.PHONY: lint format test doc
40 changes: 7 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,43 +1,17 @@
# vechord

Python RAG framework built on top of PostgreSQL and [VectorChord](https://github.com/tensorchord/VectorChord/).

## Diagram
## Installation

```mermaid
timeline
title RAG
section Ingestion
Source: Local
: Google Drive
: Dropbox
: Notion
File: Document
: Image
: Audio
Chunk: Text
: Entities
: Embedding
section Query
Analysis: Expansion
: Keyword
: Embedding
Search: Vector Search
: Full Text Search
: Filter
Rerank: ColBERT
section Evaluation
Metric: MAP
: Recall
: NDCG
```sh
pip install vechord
```

## Examples

- [beir.py](./examples/beir.py): the most flexible way to use the library (loading, indexing, querying and evaluation)
- [web.py](./examples/web.py): build a web application with from the defined tables and pipeline
- [essay.py](./examples/essay.py): extract the content from Paul Graham's essays and evaluate the search results from LLM generated queries
- [contextual.py](./examples/contextual.py): contextual retrieval example
- [beir.py](examples/beir.py): the most flexible way to use the library (loading, indexing, querying and evaluation)
- [web.py](examples/web.py): build a web application with from the defined tables and pipeline
- [essay.py](examples/essay.py): extract the content from Paul Graham's essays and evaluate the search results from LLM generated queries
- [contextual.py](examples/contextual.py): contextual retrieval example

## Development

Expand Down
29 changes: 29 additions & 0 deletions design.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
## Diagram

```mermaid
timeline
title RAG
section Ingestion
Source: Local
: Google Drive
: Dropbox
: Notion
File: Document
: Image
: Audio
Chunk: Text
: Entities
: Embedding
section Query
Analysis: Expansion
: Keyword
: Embedding
Search: Vector Search
: Full Text Search
: Filter
Rerank: ColBERT
section Evaluation
Metric: MAP
: Recall
: NDCG
```
20 changes: 20 additions & 0 deletions docs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Minimal makefile for Sphinx documentation
#

# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= uv run sphinx-build
SOURCEDIR = source
BUILDDIR = build

# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
35 changes: 35 additions & 0 deletions docs/make.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
@ECHO OFF

pushd %~dp0

REM Command file for Sphinx documentation

if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build

%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)

if "%1" == "" goto help

%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end

:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%

:end
popd
70 changes: 70 additions & 0 deletions docs/source/api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Interface

## VechordRegistry

```{eval-rst}
.. automodule:: vechord.registry
:members: VechordRegistry
```

## Types

```{eval-rst}
.. automodule:: vechord.spec
:members: Vector,ForeignKey,PrimaryKeyAutoIncrease,Table
```

## Augment

```{eval-rst}
.. automodule:: vechord.augment
:members:
:show-inheritance:
```

## Chunk

```{eval-rst}
.. automodule:: vechord.chunk
:members:
:show-inheritance:
```

## Embedding

```{eval-rst}
.. automodule:: vechord.embedding
:members:
:show-inheritance:
```

## Evaluate

```{eval-rst}
.. automodule:: vechord.evaluate
:members:
:show-inheritance:
```

## Extract

```{eval-rst}
.. automodule:: vechord.extract
:members:
:show-inheritance:
```

## Load

```{eval-rst}
.. automodule:: vechord.load
:members:
:show-inheritance:
```

## Service

```{eval-rst}
.. automodule:: vechord.service
:members:
```
57 changes: 57 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

project = "vechord"
copyright = "2025, TensorChord"
author = "TensorChord"
release = "latest"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = [
"sphinx.ext.autodoc",
"sphinx.ext.napoleon",
"sphinx.ext.viewcode",
"sphinx.ext.githubpages",
"myst_parser",
"sphinx_autodoc_typehints",
"sphinxext.opengraph",
"sphinx_sitemap",
]

templates_path = ["_templates"]
exclude_patterns = []
source_suffix = [".rst", ".md"]

# Extensions
myst_heading_anchors = 3
autodoc_member_order = "bysource"
# napoleon
napoleon_attr_annotations = True
napoleon_include_init_with_doc = True
napoleon_use_admonition_for_references = True
# opengraph
ogp_site_url = "https://github.com/tensorchord/vechord"
ogp_image = "https://github.com/tensorchord/vechord"
# sitemap
html_baseurl = "https://tensorchord.github.io/vechord/"
html_extra_path = ["robots.txt"]

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = "furo"
html_static_path = ["_static"]
html_theme_options = {
"sidebar_hide_name": True,
"navigation_with_keys": True,
"source_repository": "https://github.com/tensorchord/vechord",
"source_branch": "main",
"source_directory": "docs/source",
}
Loading
Loading