Skip to content

Commit e6f84c6

Browse files
feat(rig-841): support HelixDB (#805)
* feat(rig-841): support helixdb * docs: license, update cargo.toml, readme * docs: minor docs update * feat: impl InsertDocuments for HelixDBVectorStore * refactor: update docs * chore: update deps * refactor: fix stuff * docs: update docs, licensing * refactor: add docstrings
1 parent 6942a4f commit e6f84c6

File tree

11 files changed

+467
-62
lines changed

11 files changed

+467
-62
lines changed

Cargo.lock

Lines changed: 122 additions & 61 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ members = [
1616
"rig-bedrock",
1717
"rig-milvus",
1818
"rig-wasm",
19-
"rig-s3vectors",
19+
"rig-s3vectors", "rig-helixdb",
2020
]
2121

2222
[profile.release]

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ Vector stores are available as separate companion-crates:
122122
- Milvus: [`rig-milvus`](https://github.com/0xPlaygrounds/rig/tree/main/rig-milvus)
123123
- ScyllaDB: [`rig-scylladb`](https://github.com/0xPlaygrounds/rig/tree/main/rig-scylladb)
124124
- AWS S3Vectors: [`rig-s3vectors`](https://github.com/0xPlaygrounds/rig/tree/main/rig-s3vectors)
125+
- HelixDB: [`rig-helixdb`](https://github.com/0xPlaygrounds/rig/tree/main/rig-helixdb)
125126

126127
The following providers are available as separate companion-crates:
127128
- Fastembed: [`rig-fastembed`](https://github.com/0xPlaygrounds/rig/tree/main/rig-fastembed)

rig-helixdb/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.helix

rig-helixdb/Cargo.toml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
[package]
2+
name = "rig-helixdb"
3+
version = "0.1.0"
4+
edition.workspace = true
5+
license = "MIT"
6+
readme = "README.md"
7+
description = "Rig vector store index integration for HelixDB."
8+
repository = "https://github.com/0xPlaygrounds/rig"
9+
10+
[dependencies]
11+
helix-rs = "0.1.9"
12+
serde = { workspace = true, features = ["derive"] }
13+
serde_json.workspace = true
14+
rig-core = { path = "../rig-core", version = "0.21.0" }
15+
16+
[dev-dependencies]
17+
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
18+
19+
[[example]]
20+
name = "vector_search_helixdb"
21+
required-features = ["rig-core/derive"]

rig-helixdb/README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Rig HelixDB integration
2+
This crate integrates HelixDB into Rig, allowing you to easily use RAG with this database.
3+
4+
## Installation
5+
To install this crate, run the following command in a Rust project directory which will add `rig-helixdb` as a dependency (requires `rig-core` added for intended usage):
6+
```bash
7+
cargo add rig-helixdb
8+
```
9+
10+
There's a few different ways you can run HelixDB:
11+
- Through HelixDB's cloud offering
12+
- Running it locally through their `helix start` command (requires the Helix CLI to be installed).
13+
- For local dev, you will likely want to use `helix push dev` for continuous iteration.
14+
15+
## How to run the example
16+
Before running the example, you will need to ensure that you are running an instance of HelixDB which you can do with `helix dockerdev run`.
17+
18+
Once done, you will then need to deploy your queries/schema. **The queries/schema in the `examples/helixdb-cfg` folder are a required minimum to be use this integration.** `rig-helixdb` also additionally provides a way to get a manual handle on the client yourself so that you can invoke your own queries should you need to.
19+
20+
Assuming `rig-helixdb` is your current working directory, to deploy a minimum viable configuration for HelixDB (with `rig-helixdb`) you will need to `cd` into the `helixdb-cfg` folder and then run the following:
21+
```bash
22+
helix push dev
23+
```
24+
25+
This will then deploy the queries/schema into your instance.
26+
27+
To run the example, add your OpenAI API key as an environment variable:
28+
```bash
29+
export OPENAI_API_KEY=my_key
30+
```
31+
32+
Finally, use the following command below to run the example:
33+
```bash
34+
cargo run --example vector_search_helixdb --features rig-core/derive
35+
```
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
QUERY InsertVector (vector: [F64], doc: String, json_payload: String) =>
2+
AddV<Document>(vector, { doc: doc, json_payload: json_payload })
3+
RETURN doc
4+
5+
QUERY VectorSearch(vector: [F64], limit: U64, threshold: F64) =>
6+
vec_docs <- SearchV<Document>(vector, limit)
7+
RETURN vec_docs
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
V::Document {
2+
doc: String,
3+
json_payload: String
4+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
[project]
2+
name = "helixdb-cfg"
3+
queries = "./db/"
4+
5+
[local.dev]
6+
port = 6969
7+
build_mode = "debug"
8+
9+
[local.dev.vector_config]
10+
m = 16
11+
ef_construction = 128
12+
ef_search = 768
13+
db_max_size_gb = 10
14+
15+
[cloud]
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
use helix_rs::{HelixDB, HelixDBClient};
2+
use rig::{
3+
Embed,
4+
client::{EmbeddingsClient, ProviderClient},
5+
embeddings::EmbeddingsBuilder,
6+
vector_store::{InsertDocuments, VectorSearchRequest, VectorStoreIndex},
7+
};
8+
use rig_helixdb::HelixDBVectorStore;
9+
use serde::{Deserialize, Serialize};
10+
11+
// A vector search needs to be performed on the `definitions` field, so we derive the `Embed` trait for `WordDefinition`
12+
// and tag that field with `#[embed]`.
13+
// We are not going to store the definitions on our database so we skip the `Serialize` trait
14+
#[derive(Embed, Serialize, Deserialize, Clone, Debug, Eq, PartialEq, Default)]
15+
struct WordDefinition {
16+
word: String,
17+
#[serde(skip)] // we don't want to serialize this field, we use only to create embeddings
18+
#[embed]
19+
definition: String,
20+
}
21+
22+
impl std::fmt::Display for WordDefinition {
23+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24+
write!(f, "{}", self.word)
25+
}
26+
}
27+
28+
#[tokio::main]
29+
async fn main() {
30+
let openai_model =
31+
rig::providers::openai::Client::from_env().embedding_model("text-embedding-ada-002");
32+
33+
let helixdb_client = HelixDB::new(None, Some(6969), None); // Uses default port 6969
34+
let vector_store = HelixDBVectorStore::new(helixdb_client, openai_model.clone());
35+
36+
let words = vec![
37+
WordDefinition {
38+
word: "flurbo".to_string(),
39+
definition: "1. *flurbo* (name): A fictional digital currency that originated in the animated series Rick and Morty.".to_string()
40+
},
41+
WordDefinition {
42+
word: "glarb-glarb".to_string(),
43+
definition: "1. *glarb-glarb* (noun): A fictional creature found in the distant, swampy marshlands of the planet Glibbo in the Andromeda galaxy.".to_string()
44+
},
45+
WordDefinition {
46+
word: "linglingdong".to_string(),
47+
definition: "1. *linglingdong* (noun): A term used by inhabitants of the far side of the moon to describe humans.".to_string(),
48+
}];
49+
50+
let documents = EmbeddingsBuilder::new(openai_model)
51+
.documents(words)
52+
.unwrap()
53+
.build()
54+
.await
55+
.expect("Failed to create embeddings");
56+
57+
vector_store.insert_documents(documents).await.unwrap();
58+
59+
let query = "What is a flurbo?";
60+
let vector_req = VectorSearchRequest::builder()
61+
.query(query)
62+
.samples(5)
63+
.build()
64+
.unwrap();
65+
66+
let docs = vector_store
67+
.top_n::<WordDefinition>(vector_req)
68+
.await
69+
.unwrap();
70+
71+
for doc in docs {
72+
println!(
73+
"Vector found with id: {id} and score: {score} and word def: {doc}",
74+
id = doc.1,
75+
score = doc.0,
76+
doc = doc.2
77+
)
78+
}
79+
}

0 commit comments

Comments
 (0)