Skip to content

Commit dc989ff

Browse files
committed
feat: add regenerate all embeddings endpoint
1 parent 9e2f790 commit dc989ff

File tree

8 files changed

+315
-139
lines changed

8 files changed

+315
-139
lines changed

init_db.sql

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ CREATE TABLE IF NOT EXISTS issues (
1414
number INT NOT NULL,
1515
html_url VARCHAR NOT NULL,
1616
url VARCHAR NOT NULL,
17-
embedding vector(1024) NOT NULL,
17+
embedding halfvec(2560) NOT NULL,
1818
created_at timestamp with time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC'),
1919
updated_at timestamp with time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
2020
);
@@ -31,14 +31,19 @@ CREATE TABLE IF NOT EXISTS comments (
3131

3232
CREATE INDEX IF NOT EXISTS issues_source_id_idx ON issues (source_id);
3333
CREATE INDEX IF NOT EXISTS comments_source_id_idx ON comments (source_id);
34-
CREATE INDEX IF NOT EXISTS issues_embedding_hnsw_idx ON issues USING hnsw (embedding vector_cosine_ops);
34+
CREATE INDEX IF NOT EXISTS issues_embedding_hnsw_idx ON issues USING hnsw (embedding halfvec_cosine_ops);
35+
36+
CREATE TYPE job_type AS ENUM ('embeddings_regeneration', 'issue_indexation');
3537

3638
CREATE TABLE IF NOT EXISTS jobs (
3739
id SERIAL PRIMARY KEY,
38-
repository_id VARCHAR NOT NULL UNIQUE,
40+
job_type job_type NOT NULL,
41+
repository_id VARCHAR UNIQUE,
3942
data JSONB NOT NULL,
4043
created_at timestamp with time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC'),
4144
updated_at timestamp with time zone NOT NULL DEFAULT (current_timestamp AT TIME ZONE 'UTC')
4245
);
4346

4447
CREATE INDEX IF NOT EXISTS jobs_repository_id_idx ON jobs (repository_id);
48+
CREATE INDEX IF NOT EXISTS jobs_job_type_idx ON jobs (job_type);
49+
CREATE UNIQUE INDEX jobs_type_special_idx ON jobs (job_type) WHERE job_type = 'embeddings_regeneration';

issue-bot/Cargo.lock

Lines changed: 90 additions & 48 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

issue-bot/Cargo.toml

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,20 @@ hex = "0.4"
1818
# hf-hub = { version = "0.4", features = ["tokio"] }
1919
hmac = "0.12"
2020
metrics = "0.24"
21-
metrics-exporter-prometheus = "0.16"
21+
metrics-exporter-prometheus = "0.17"
2222
nanoid = "0.4"
2323
once_cell = "1.20"
2424
pgvector = { version = "0.4", features = ["sqlx"] }
2525
reqwest = { version = "0.12", features = ["json"] }
2626
serde = { version = "1.0", features = ["derive"] }
2727
serde_json = { version = "1", features = ["raw_value"] }
2828
sha2 = "0.10"
29-
sqlx = { version = "0.8", features = ["chrono", "postgres", "runtime-tokio"] }
29+
sqlx = { version = "0.8", features = [
30+
"chrono",
31+
"macros",
32+
"postgres",
33+
"runtime-tokio",
34+
] }
3035
thiserror = "2"
3136
# tokenizers = { version = "0.21", default-features = false, features = ["onig"] }
3237
tokio = { version = "1.0", features = ["full"] }

0 commit comments

Comments
 (0)