Skip to content

Commit 8b422e3

Browse files
committed
feat(planner): match vector indexes
Signed-off-by: Alex Chi <iskyzh@gmail.com>
1 parent 1d646b8 commit 8b422e3

File tree

18 files changed

+292
-11
lines changed

18 files changed

+292
-11
lines changed

src/binder/create_index.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,47 @@ use serde::{Deserialize, Serialize};
1010
use super::*;
1111
use crate::catalog::{ColumnId, SchemaId, TableId};
1212

13+
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Serialize, Deserialize)]
14+
pub enum VectorDistance {
15+
Cosine,
16+
L2,
17+
NegativeDotProduct,
18+
}
19+
20+
impl FromStr for VectorDistance {
21+
type Err = String;
22+
23+
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
24+
match s {
25+
"cosine" => Ok(VectorDistance::Cosine),
26+
"<=>" => Ok(VectorDistance::Cosine),
27+
"l2" => Ok(VectorDistance::L2),
28+
"<->" => Ok(VectorDistance::L2),
29+
"dotproduct" => Ok(VectorDistance::NegativeDotProduct),
30+
"<#>" => Ok(VectorDistance::NegativeDotProduct),
31+
_ => Err(format!("invalid vector distance: {}", s)),
32+
}
33+
}
34+
}
35+
36+
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Serialize, Deserialize)]
37+
pub enum IndexType {
38+
Hnsw,
39+
IvfFlat {
40+
distance: VectorDistance,
41+
nlists: usize,
42+
nprobe: usize,
43+
},
44+
Btree,
45+
}
46+
1347
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Serialize, Deserialize)]
1448
pub struct CreateIndex {
1549
pub schema_id: SchemaId,
1650
pub index_name: String,
1751
pub table_id: TableId,
1852
pub columns: Vec<ColumnId>,
53+
pub index_type: IndexType,
1954
}
2055

2156
impl fmt::Display for CreateIndex {
@@ -48,6 +83,79 @@ impl FromStr for Box<CreateIndex> {
4883
}
4984

5085
impl Binder {
86+
fn parse_index_type(&self, using: Option<Ident>, with: Vec<Expr>) -> Result<IndexType> {
87+
let Some(using) = using else {
88+
return Err(ErrorKind::InvalidIndex("using clause is required".to_string()).into());
89+
};
90+
match using.to_string().to_lowercase().as_str() {
91+
"hnsw" => Ok(IndexType::Hnsw),
92+
"ivfflat" => {
93+
let mut distfn = None;
94+
let mut nlists = None;
95+
let mut nprobe = None;
96+
for expr in with {
97+
let Expr::BinaryOp { left, op, right } = expr else {
98+
return Err(
99+
ErrorKind::InvalidIndex("invalid with clause".to_string()).into()
100+
);
101+
};
102+
if op != BinaryOperator::Eq {
103+
return Err(
104+
ErrorKind::InvalidIndex("invalid with clause".to_string()).into()
105+
);
106+
}
107+
let Expr::Identifier(Ident { value: key, .. }) = *left else {
108+
return Err(
109+
ErrorKind::InvalidIndex("invalid with clause".to_string()).into()
110+
);
111+
};
112+
let key = key.to_lowercase();
113+
let Expr::Value(v) = *right else {
114+
return Err(
115+
ErrorKind::InvalidIndex("invalid with clause".to_string()).into()
116+
);
117+
};
118+
let v: DataValue = v.into();
119+
match key.as_str() {
120+
"distfn" => {
121+
let v = v.as_str();
122+
distfn = Some(v.to_lowercase());
123+
}
124+
"nlists" => {
125+
let Some(v) = v.as_usize().unwrap() else {
126+
return Err(ErrorKind::InvalidIndex(
127+
"invalid with clause".to_string(),
128+
)
129+
.into());
130+
};
131+
nlists = Some(v);
132+
}
133+
"nprobe" => {
134+
let Some(v) = v.as_usize().unwrap() else {
135+
return Err(ErrorKind::InvalidIndex(
136+
"invalid with clause".to_string(),
137+
)
138+
.into());
139+
};
140+
nprobe = Some(v);
141+
}
142+
_ => {
143+
return Err(
144+
ErrorKind::InvalidIndex("invalid with clause".to_string()).into()
145+
);
146+
}
147+
}
148+
}
149+
Ok(IndexType::IvfFlat {
150+
distance: VectorDistance::from_str(distfn.unwrap().as_str()).unwrap(),
151+
nlists: nlists.unwrap(),
152+
nprobe: nprobe.unwrap(),
153+
})
154+
}
155+
_ => Err(ErrorKind::InvalidIndex("invalid index type".to_string()).into()),
156+
}
157+
}
158+
51159
pub(super) fn bind_create_index(&mut self, stat: crate::parser::CreateIndex) -> Result {
52160
let Some(ref name) = stat.name else {
53161
return Err(
@@ -57,6 +165,8 @@ impl Binder {
57165
let crate::parser::CreateIndex {
58166
table_name,
59167
columns,
168+
using,
169+
with,
60170
..
61171
} = stat;
62172
let index_name = lower_case_name(name);
@@ -94,6 +204,7 @@ impl Binder {
94204
index_name: index_name.into(),
95205
table_id: table.id(),
96206
columns: column_ids,
207+
index_type: self.parse_index_type(using, with)?,
97208
})));
98209
Ok(create)
99210
}

src/binder/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ mod select;
2929
mod table;
3030

3131
pub use self::create_function::CreateFunction;
32-
pub use self::create_index::CreateIndex;
32+
pub use self::create_index::{CreateIndex, IndexType, VectorDistance};
3333
pub use self::create_table::CreateTable;
3434
pub use self::error::BindError;
3535
use self::error::ErrorKind;

src/catalog/index.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,31 @@
11
// Copyright 2025 RisingLight Project Authors. Licensed under Apache-2.0.
22

33
use super::*;
4+
use crate::binder::IndexType;
45

56
/// The catalog of an index.
67
pub struct IndexCatalog {
78
id: IndexId,
89
name: String,
910
table_id: TableId,
1011
column_idxs: Vec<ColumnId>,
12+
index_type: IndexType,
1113
}
1214

1315
impl IndexCatalog {
14-
pub fn new(id: IndexId, name: String, table_id: TableId, column_idxs: Vec<ColumnId>) -> Self {
16+
pub fn new(
17+
id: IndexId,
18+
name: String,
19+
table_id: TableId,
20+
column_idxs: Vec<ColumnId>,
21+
index_type: IndexType,
22+
) -> Self {
1523
Self {
1624
id,
1725
name,
1826
table_id,
1927
column_idxs,
28+
index_type,
2029
}
2130
}
2231

@@ -35,4 +44,8 @@ impl IndexCatalog {
3544
pub fn name(&self) -> &str {
3645
&self.name
3746
}
47+
48+
pub fn index_type(&self) -> IndexType {
49+
self.index_type.clone()
50+
}
3851
}

src/catalog/root.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::sync::{Arc, Mutex};
55

66
use super::function::FunctionCatalog;
77
use super::*;
8+
use crate::binder::IndexType;
89
use crate::parser;
910
use crate::planner::RecExpr;
1011

@@ -104,10 +105,11 @@ impl RootCatalog {
104105
index_name: String,
105106
table_id: TableId,
106107
column_idxs: &[ColumnId],
108+
index_type: &IndexType,
107109
) -> Result<IndexId, CatalogError> {
108110
let mut inner = self.inner.lock().unwrap();
109111
let schema = inner.schemas.get_mut(&schema_id).unwrap();
110-
schema.add_index(index_name, table_id, column_idxs.to_vec())
112+
schema.add_index(index_name, table_id, column_idxs.to_vec(), index_type)
111113
}
112114

113115
pub fn get_index_on_table(&self, schema_id: SchemaId, table_id: TableId) -> Vec<IndexId> {

src/catalog/schema.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::sync::Arc;
55

66
use super::function::FunctionCatalog;
77
use super::*;
8+
use crate::binder::IndexType;
89
use crate::planner::RecExpr;
910

1011
/// The catalog of a schema.
@@ -62,13 +63,20 @@ impl SchemaCatalog {
6263
name: String,
6364
table_id: TableId,
6465
columns: Vec<ColumnId>,
66+
index_type: &IndexType,
6567
) -> Result<IndexId, CatalogError> {
6668
if self.indexes_idxs.contains_key(&name) {
6769
return Err(CatalogError::Duplicated("index", name));
6870
}
6971
let index_id = self.next_id;
7072
self.next_id += 1;
71-
let index_catalog = Arc::new(IndexCatalog::new(index_id, name.clone(), table_id, columns));
73+
let index_catalog = Arc::new(IndexCatalog::new(
74+
index_id,
75+
name.clone(),
76+
table_id,
77+
columns,
78+
index_type.clone(),
79+
));
7280
self.indexes_idxs.insert(name, index_id);
7381
self.indexes.insert(index_id, index_catalog);
7482
Ok(index_id)

src/executor/create_index.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ impl<S: Storage> CreateIndexExecutor<S> {
2121
&self.index.index_name,
2222
self.index.table_id,
2323
&self.index.columns,
24+
&self.index.index_type,
2425
)
2526
.await?;
2627

src/planner/cost.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ impl egg::CostFunction<Expr> for CostFn<'_> {
3131

3232
let c = match enode {
3333
// plan nodes
34-
Scan(_) | Values(_) => build(),
34+
Scan(_) | Values(_) | IndexScan(_) => build(),
3535
Order([_, c]) => nlogn(rows(c)) + build() + costs(c),
3636
Filter([exprs, c]) => costs(exprs) * rows(c) + build() + costs(c),
3737
Proj([exprs, c]) | Window([exprs, c]) => costs(exprs) * rows(c) + costs(c),

src/planner/explain.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,17 @@ impl<'a> Explain<'a> {
248248
("filter", self.expr(filter).pretty()),
249249
]),
250250
),
251+
IndexScan([table, columns, filter, op, key, vector]) => Pretty::childless_record(
252+
"IndexScan",
253+
with_meta(vec![
254+
("table", self.expr(table).pretty()),
255+
("columns", self.expr(columns).pretty()),
256+
("filter", self.expr(filter).pretty()),
257+
("op", self.expr(op).pretty()),
258+
("key", self.expr(key).pretty()),
259+
("vector", self.expr(vector).pretty()),
260+
]),
261+
),
251262
Values(values) => Pretty::simple_record(
252263
"Values",
253264
with_meta(vec![("rows", Pretty::display(&values.len()))]),

src/planner/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ define_language! {
9898

9999
// plans
100100
"scan" = Scan([Id; 3]), // (scan table [column..] filter)
101+
"vector_index_scan" = IndexScan([Id; 6]), // (vector_index_scan table [column..] filter <op> key vector)
101102
"values" = Values(Box<[Id]>), // (values [expr..]..)
102103
"proj" = Proj([Id; 2]), // (proj [expr..] child)
103104
"filter" = Filter([Id; 2]), // (filter expr child)

src/planner/optimizer.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,13 +121,14 @@ static STAGE1_RULES: LazyLock<Vec<Rewrite>> = LazyLock::new(|| {
121121
});
122122

123123
/// Stage2 rules in the optimizer.
124-
/// - pushdown predicate and projection
124+
/// - pushdown predicate, projection, and index scan
125125
static STAGE2_RULES: LazyLock<Vec<Rewrite>> = LazyLock::new(|| {
126126
let mut rules = vec![];
127127
rules.append(&mut rules::expr::rules());
128128
rules.append(&mut rules::plan::always_better_rules());
129129
rules.append(&mut rules::plan::predicate_pushdown_rules());
130130
rules.append(&mut rules::plan::projection_pushdown_rules());
131+
rules.append(&mut rules::plan::index_scan_rules());
131132
rules
132133
});
133134

0 commit comments

Comments
 (0)