Skip to content

Commit 9a9b60d

Browse files
authored
fix: panic in relation not found in backfill order (#23573)
Signed-off-by: lyang24 <[email protected]>
1 parent c8378d1 commit 9a9b60d

File tree

2 files changed

+152
-4
lines changed

2 files changed

+152
-4
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
statement ok
2+
SET RW_IMPLICIT_FLUSH TO TRUE;
3+
4+
statement ok
5+
drop table if exists car_sales cascade;
6+
7+
statement ok
8+
drop table if exists car_info cascade;
9+
10+
statement ok
11+
drop table if exists car_regions cascade;
12+
13+
statement ok
14+
drop table if exists t cascade;
15+
16+
statement ok
17+
create table car_sales(id int, car_id int, region_id int, price int);
18+
19+
statement ok
20+
create table car_info(id int, name varchar);
21+
22+
statement ok
23+
create table car_regions(id int, region varchar);
24+
25+
# Create table t that is NOT used in the query
26+
statement ok
27+
create table t(a int, b int, c int);
28+
29+
# Test 1: Should fail because 't' is specified in backfill_order but not used in query
30+
statement error Table or source 't' specified in backfill_order is not used in the query
31+
create materialized view m1
32+
with (backfill_order = FIXED(car_regions -> car_sales, t -> car_sales))
33+
as
34+
with price_ranges as (
35+
select
36+
car_info.name as name,
37+
car_sales.price as price,
38+
round(log10(1 + car_sales.price)::numeric, 1) as price_range
39+
from car_sales join car_info
40+
on car_sales.car_id = car_info.id
41+
join car_regions
42+
on car_sales.region_id = car_regions.id
43+
)
44+
select
45+
name,
46+
price_range,
47+
count(*) as sales_count,
48+
sum(price) as sales_volume,
49+
avg(price) as sales_avg,
50+
min(price) as sales_min,
51+
max(price) as sales_max,
52+
approx_percentile(0.5) WITHIN GROUP (ORDER BY price) as sales_est_median,
53+
approx_percentile(0.01) WITHIN GROUP (ORDER BY price) as sales_est_bottom_1_percent,
54+
approx_percentile(0.99) WITHIN GROUP (ORDER BY price) as sales_est_top_1_percent
55+
FROM
56+
price_ranges
57+
GROUP BY name, price_range;
58+
59+
# Test 2: Should also fail when 't' is on the right side of the arrow
60+
statement error Table or source 't' specified in backfill_order is not used in the query
61+
create materialized view m2
62+
with (backfill_order = FIXED(car_sales -> t))
63+
as
64+
select
65+
car_info.name as name,
66+
car_sales.price as price
67+
from car_sales join car_info
68+
on car_sales.car_id = car_info.id
69+
join car_regions
70+
on car_sales.region_id = car_regions.id;
71+
72+
# Test 3: Should succeed when only actual tables from the query are specified
73+
statement ok
74+
create materialized view m3
75+
with (backfill_order = FIXED(car_regions -> car_sales))
76+
as
77+
select
78+
car_info.name as name,
79+
car_sales.price as price
80+
from car_sales join car_info
81+
on car_sales.car_id = car_info.id
82+
join car_regions
83+
on car_sales.region_id = car_regions.id;
84+
85+
# Cleanup
86+
statement ok
87+
drop materialized view m3;
88+
89+
statement ok
90+
drop table car_sales;
91+
92+
statement ok
93+
drop table car_info;
94+
95+
statement ok
96+
drop table car_regions;
97+
98+
statement ok
99+
drop table t;

src/frontend/src/optimizer/backfill_order_strategy.rs

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ pub mod auto {
232232
}
233233

234234
mod fixed {
235-
use std::collections::HashMap;
235+
use std::collections::{HashMap, HashSet};
236236

237237
use risingwave_common::bail;
238238
use risingwave_common::catalog::ObjectId;
@@ -243,16 +243,65 @@ mod fixed {
243243
use crate::optimizer::backfill_order_strategy::common::{
244244
bind_backfill_relation_id_by_name, has_cycle,
245245
};
246+
use crate::optimizer::plan_node::{StreamPlanNodeType, StreamPlanRef};
246247
use crate::session::SessionImpl;
247248

249+
/// Collect all relation IDs (tables and sources) that are scanned in the plan.
250+
fn collect_scanned_relation_ids(plan: StreamPlanRef) -> HashSet<ObjectId> {
251+
let mut relation_ids = HashSet::new();
252+
253+
fn visit(plan: StreamPlanRef, relation_ids: &mut HashSet<ObjectId>) {
254+
match plan.node_type() {
255+
StreamPlanNodeType::StreamTableScan => {
256+
let table_scan = plan.as_stream_table_scan().expect("table scan");
257+
let relation_id = table_scan.core().table_catalog.id().into();
258+
relation_ids.insert(relation_id);
259+
}
260+
StreamPlanNodeType::StreamSourceScan => {
261+
let source_scan = plan.as_stream_source_scan().expect("source scan");
262+
let relation_id = source_scan.source_catalog().id;
263+
relation_ids.insert(relation_id);
264+
}
265+
_ => {}
266+
}
267+
268+
// Recursively visit all inputs
269+
for child in plan.inputs() {
270+
visit(child, relation_ids);
271+
}
272+
}
273+
274+
visit(plan, &mut relation_ids);
275+
relation_ids
276+
}
277+
248278
pub(super) fn plan_fixed_strategy(
249279
session: &SessionImpl,
250280
orders: Vec<(ObjectName, ObjectName)>,
281+
plan: StreamPlanRef,
251282
) -> Result<HashMap<ObjectId, Uint32Vector>> {
283+
// Collect all scanned relation IDs from the plan
284+
let scanned_relation_ids = collect_scanned_relation_ids(plan);
285+
252286
let mut order: HashMap<ObjectId, Uint32Vector> = HashMap::new();
253287
for (start_name, end_name) in orders {
254-
let start_relation_id = bind_backfill_relation_id_by_name(session, start_name)?;
255-
let end_relation_id = bind_backfill_relation_id_by_name(session, end_name)?;
288+
let start_relation_id = bind_backfill_relation_id_by_name(session, start_name.clone())?;
289+
let end_relation_id = bind_backfill_relation_id_by_name(session, end_name.clone())?;
290+
291+
// Validate that both relations are present in the query plan
292+
if !scanned_relation_ids.contains(&start_relation_id) {
293+
bail!(
294+
"Table or source '{}' specified in backfill_order is not used in the query",
295+
start_name
296+
);
297+
}
298+
if !scanned_relation_ids.contains(&end_relation_id) {
299+
bail!(
300+
"Table or source '{}' specified in backfill_order is not used in the query",
301+
end_name
302+
);
303+
}
304+
256305
order
257306
.entry(start_relation_id)
258307
.or_default()
@@ -423,7 +472,7 @@ pub fn plan_backfill_order(
423472
let order = match backfill_order_strategy {
424473
BackfillOrderStrategy::Default | BackfillOrderStrategy::None => Default::default(),
425474
BackfillOrderStrategy::Auto => plan_auto_strategy(session, plan),
426-
BackfillOrderStrategy::Fixed(orders) => plan_fixed_strategy(session, orders)?,
475+
BackfillOrderStrategy::Fixed(orders) => plan_fixed_strategy(session, orders, plan)?,
427476
};
428477
Ok(BackfillOrder { order })
429478
}

0 commit comments

Comments
 (0)