From 653c95d015453f36ece16a6f36d4791be9f3fb24 Mon Sep 17 00:00:00 2001 From: Zhang Mingli Date: Mon, 28 Jul 2025 22:23:07 +0800 Subject: [PATCH] Implement parallel processing for window functions. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PostgreSQL's parallel processing cannot handle window functions. In contrast, our distributed environment enables parallel execution of window functions across multiple processes on multiple segments. For example: sum(a) over(partition by b order by c) The window function can be processed by redistributing data based on column b to ensure all rows with the same b value are processed by the same worker, significantly improving efficiency. Even without PARTITION BY clauses, we can still enable parallelism by allowing partial_path for window functions and subpaths, with parallel scanning of underlying tables for data filtering. Exclude CASE WHEN expressions in window functions (as they complicate parallelization and make it difficult to guarantee correct data ordering) Example non-parallel execution plan: SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); QUERY PLAN ---------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> WindowAgg Partition By: depname Order By: salary -> Sort Sort Key: depname, salary DESC -> Seq Scan on empsalary Parallel execution plan (4-parallel): SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); QUERY PLAN --------------------------------------------------------------------- Gather Motion 12:1 (slice1; segments: 12) -> WindowAgg Partition By: depname Order By: salary -> Sort Sort Key: depname, salary DESC -> Redistribute Motion 12:12 (slice2; segments: 12) Hash Key: depname Hash Module: 3 -> Parallel Seq Scan on empsalary In complex queries containing window functions, parallel processing may sometimes be inhibited due to cost considerations or other constraints. However, our approach still provides valuable parallelization opportunities for window function subpaths, delivering measurable query efficiency improvements. We have observed significant performance gains in TPC-DS benchmarks through this partial parallelization capability. TPC-DS queries via parallel execution plans (50G AOCS, 4 workers): | Query | Before(ms) | After(ms) | Saved(ms) | Gain | Plan Change | |-------|-----------:|----------:|----------:|------:|-----------------| | q12 | 10,439.08 | 4,613.52 | 5,825.56 | 55.8% | serial→parallel | | q20 | 21,487.08 | 8,723.74 | 12,763.34 | 59.4% | serial→parallel | | q44 | 33,816.75 | 22,515.03 | 11,301.72 | 33.4% | better parallel | | q49 | 60,039.45 | 28,603.51 | 31,435.95 | 52.4% | serial→parallel | | q98 | 40,114.21 | 17,052.78 | 23,061.43 | 57.5% | serial→parallel | changes: - Enabled parallel plans for q12/q20/q49/q98 (prev. serial) - Optimized parallel plan for q44 - Avg gain: 52% (best: q20 59.4%, saved 12.7s) Authored-by: Zhang Mingli avamingli@gmail.com --- src/backend/cdb/cdbgroupingpaths.c | 5 +- src/backend/cdb/cdbpath.c | 2 +- src/backend/optimizer/plan/planner.c | 165 ++++ src/backend/optimizer/util/clauses.c | 8 +- src/test/regress/expected/cluster.out | 6 +- src/test/regress/expected/select_parallel.out | 18 +- .../expected/statement_mem_for_windowagg.out | 2 + src/test/regress/expected/window.out | 308 +++---- .../regress/expected/window_optimizer.out | 698 +++++++------- src/test/regress/expected/window_parallel.out | 869 ++++++++++++++++++ src/test/regress/greenplum_schedule | 3 + src/test/regress/sql/cluster.sql | 6 +- src/test/regress/sql/select_parallel.sql | 1 + .../sql/statement_mem_for_windowagg.sql | 2 + src/test/regress/sql/window.sql | 32 +- src/test/regress/sql/window_parallel.sql | 223 +++++ 16 files changed, 1813 insertions(+), 535 deletions(-) create mode 100644 src/test/regress/expected/window_parallel.out create mode 100644 src/test/regress/sql/window_parallel.sql diff --git a/src/backend/cdb/cdbgroupingpaths.c b/src/backend/cdb/cdbgroupingpaths.c index 2462cd5e9b0..7ed1fec34b9 100644 --- a/src/backend/cdb/cdbgroupingpaths.c +++ b/src/backend/cdb/cdbgroupingpaths.c @@ -2651,7 +2651,7 @@ cdb_prepare_path_for_sorted_agg(PlannerInfo *root, return subpath; } - if (is_sorted && group_pathkeys) + if (is_sorted && group_pathkeys && (subpath->locus.parallel_workers <= 1)) { /* * The input is already conveniently sorted. We could redistribute @@ -2665,7 +2665,8 @@ cdb_prepare_path_for_sorted_agg(PlannerInfo *root, group_pathkeys, false, locus); } - else if (!is_sorted && group_pathkeys) + else if ((is_sorted && group_pathkeys && (subpath->locus.parallel_workers > 1)) || + (!is_sorted && group_pathkeys)) { /* * If we need to redistribute, it's usually best to redistribute diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index d19bb141a4b..89d15437661 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -3661,7 +3661,7 @@ cdbpath_motion_for_parallel_join(PlannerInfo *root, int sp; /* small rel parallel workers */ /* Consider locus when parallel_ware. */ - if(parallel_aware) + if (parallel_aware) { /* can't parallel join if both are Hashed, it should be in non-parallel path */ if (CdbPathLocus_IsHashed(outer.locus) && diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 463b24ba903..9728f4f34ef 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -300,6 +300,20 @@ static split_rollup_data *make_new_rollups_for_hash_grouping_set(PlannerInfo *ro Path *path, grouping_sets_data *gd); +static bool +contain_case_expr(Node *clause); + +static bool +contain_case_expr_walker(Node *node, void *context); + +static void create_partial_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + PathTarget *input_target, + PathTarget *output_target, + WindowFuncLists *wflists, + List *activeWindows); + /***************************************************************************** * @@ -4876,6 +4890,27 @@ create_window_paths(PlannerInfo *root, activeWindows); } + /* + * Unlike Upstream, we could make window function parallel by redistributing + * the tuples according to the PARTITION BY clause which is similar to Group By. + * Even there is no PARTITION BY, window function could be parallel from + * sub partial paths. + */ + if (window_rel->consider_parallel && + input_rel->partial_pathlist) + { + /* For partial, only the best one if enough. */ + Path *path = (Path *) linitial(input_rel->partial_pathlist); + + create_partial_window_path(root, + window_rel, + path, + input_target, + output_target, + wflists, + activeWindows); + } + /* * If there is an FDW that's responsible for all baserels of the query, * let it consider adding ForeignPaths. @@ -9103,3 +9138,133 @@ make_new_rollups_for_hash_grouping_set(PlannerInfo *root, return srd; } + +static bool +contain_case_expr(Node *clause) +{ + return contain_case_expr_walker(clause, NULL); +} + +static bool +contain_case_expr_walker(Node *node, void *context) +{ + if (node == NULL) + return false; + + if (IsA(node, CaseExpr)) + return true; + + return expression_tree_walker(node, contain_case_expr_walker, + context); +} + +/* + * Parallel processing of window functions. + * + * NB: it may produce non-deterministic results if the window function + * lacks ORDER BY and PARTITION BY clause. + * SQL:2011 has clarified this behavior. + */ +static void +create_partial_window_path(PlannerInfo *root, + RelOptInfo *window_rel, + Path *path, + PathTarget *input_target, + PathTarget *output_target, + WindowFuncLists *wflists, + List *activeWindows) +{ + PathTarget *window_target; + ListCell *l; + Bitmapset *sgrefs; + + window_target = input_target; + + sgrefs = NULL; + + foreach(l, activeWindows) + { + WindowClause *wc = lfirst_node(WindowClause, l); + ListCell *lc2; + + foreach(lc2, wc->partitionClause) + { + SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2); + + sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef); + } + foreach(lc2, wc->orderClause) + { + SortGroupClause *sortcl = lfirst_node(SortGroupClause, lc2); + + sgrefs = bms_add_member(sgrefs, sortcl->tleSortGroupRef); + } + } + + int x = -1; + while ((x = bms_next_member(sgrefs, x)) >= 0) + { + Index sgref = get_pathtarget_sortgroupref(input_target, x); + if (sgref != 0) + { + ListCell *lc; + foreach(lc, input_target->exprs) + { + Expr *expr = (Expr *) lfirst(lc); + if (contain_case_expr((Node*)expr)) + return; + } + } + } + + foreach(l, activeWindows) + { + WindowClause *wc = lfirst_node(WindowClause, l); + List *window_pathkeys; + int presorted_keys; + bool is_sorted; + + window_pathkeys = make_pathkeys_for_window(root, + wc, + root->processed_tlist); + + is_sorted = pathkeys_count_contained_in(window_pathkeys, + path->pathkeys, + &presorted_keys); + + path = cdb_prepare_path_for_sorted_agg(root, + is_sorted, + presorted_keys, + window_rel, + path, + path->pathtarget, + window_pathkeys, + -1.0, + wc->partitionClause, + NIL); + if (lnext(activeWindows, l)) + { + ListCell *lc2; + + window_target = copy_pathtarget(window_target); + foreach(lc2, wflists->windowFuncs[wc->winref]) + { + WindowFunc *wfunc = lfirst_node(WindowFunc, lc2); + + add_column_to_pathtarget(window_target, (Expr *) wfunc, 0); + window_target->width += get_typavgwidth(wfunc->wintype, -1); + } + } + else + { + window_target = output_target; + } + + path = (Path *) + create_windowagg_path(root, window_rel, path, window_target, + wflists->windowFuncs[wc->winref], + wc); + } + + add_partial_path(window_rel, path); +} diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index a48f2ed8433..1fd03a05e87 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -807,7 +807,13 @@ max_parallel_hazard_walker(Node *node, max_parallel_hazard_context *context) */ else if (IsA(node, WindowFunc)) { - if (max_parallel_hazard_test(PROPARALLEL_RESTRICTED, context)) + /* + * In Cloudberry, we proess window fuctions by redistributeing the tuples + * if there is Partition By clause. + * Each partition is processed individually, whether in a single process + * or distributed parallel workers setup. + */ + if (max_parallel_hazard_test(PROPARALLEL_SAFE, context)) return true; } diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out index f62380f80f4..f68bb39a620 100644 --- a/src/test/regress/expected/cluster.out +++ b/src/test/regress/expected/cluster.out @@ -467,9 +467,9 @@ set enable_indexscan = off; set maintenance_work_mem = '1MB'; cluster clstr_4 using cluster_sort; select * from -(select hundred, lag(hundred) over () as lhundred, - thousand, lag(thousand) over () as lthousand, - tenthous, lag(tenthous) over () as ltenthous from clstr_4) ss +(select hundred, lag(hundred) over (order by hundred) as lhundred, + thousand, lag(thousand) over (order by hundred) as lthousand, + tenthous, lag(tenthous) over (order by hundred) as ltenthous from clstr_4) ss where row(hundred, thousand, tenthous) <= row(lhundred, lthousand, ltenthous); hundred | lhundred | thousand | lthousand | tenthous | ltenthous ---------+----------+----------+-----------+----------+----------- diff --git a/src/test/regress/expected/select_parallel.out b/src/test/regress/expected/select_parallel.out index eb36bb11f88..4de01f4f632 100644 --- a/src/test/regress/expected/select_parallel.out +++ b/src/test/regress/expected/select_parallel.out @@ -1056,6 +1056,7 @@ select count(*) from tenk1; reset force_parallel_mode; reset role; -- Window function calculation can't be pushed to workers. +-- CBDB_PARALLEL: window function's subpath could be parallel. explain (costs off, verbose) select count(*) from tenk1 a where (unique1, two) in (select unique1, row_number() over() from tenk1 b); @@ -1063,28 +1064,29 @@ explain (costs off, verbose) --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- Finalize Aggregate Output: count(*) - -> Gather Motion 3:1 (slice1; segments: 3) + -> Gather Motion 6:1 (slice1; segments: 6) Output: (PARTIAL count(*)) -> Partial Aggregate Output: PARTIAL count(*) - -> Hash Semi Join + -> Parallel Hash Semi Join Hash Cond: ((a.unique1 = b.unique1) AND (a.two = (row_number() OVER (?)))) - -> Seq Scan on public.tenk1 a + -> Parallel Seq Scan on public.tenk1 a Output: a.unique1, a.unique2, a.two, a.four, a.ten, a.twenty, a.hundred, a.thousand, a.twothousand, a.fivethous, a.tenthous, a.odd, a.even, a.stringu1, a.stringu2, a.string4 - -> Hash + -> Parallel Hash Output: b.unique1, (row_number() OVER (?)) - -> Redistribute Motion 1:3 (slice2; segments: 1) + -> Redistribute Motion 1:6 (slice2; segments: 1) Output: b.unique1, (row_number() OVER (?)) Hash Key: b.unique1 + Hash Module: 3 -> WindowAgg Output: b.unique1, row_number() OVER (?) - -> Gather Motion 3:1 (slice3; segments: 3) + -> Gather Motion 6:1 (slice3; segments: 6) Output: b.unique1 - -> Seq Scan on public.tenk1 b + -> Parallel Seq Scan on public.tenk1 b Output: b.unique1 Settings: enable_parallel = 'on', min_parallel_table_scan_size = '0', optimizer = 'off', parallel_setup_cost = '0', parallel_tuple_cost = '0' Optimizer: Postgres query optimizer -(23 rows) +(24 rows) -- LIMIT/OFFSET within sub-selects can't be pushed to workers. explain (costs off) diff --git a/src/test/regress/expected/statement_mem_for_windowagg.out b/src/test/regress/expected/statement_mem_for_windowagg.out index d41610f6a30..fd1d066b7bd 100644 --- a/src/test/regress/expected/statement_mem_for_windowagg.out +++ b/src/test/regress/expected/statement_mem_for_windowagg.out @@ -2,6 +2,7 @@ CREATE TABLE dummy_table(x int, y int) DISTRIBUTED BY (y); INSERT INTO dummy_table SELECT generate_series(0, 20000), 0; INSERT INTO dummy_table SELECT generate_series(0, 20000), 3; INSERT INTO dummy_table SELECT generate_series(0, 20000), 10; +set enable_parallel = off; -- 1. Test that if we set statement_mem to a larger value, the tuplestore -- for caching the tuples in partition used in WindowAgg is able to be fitted -- in memory. @@ -172,5 +173,6 @@ SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid) (3 rows) -- Do some clean-ups. +reset enable_parallel; DROP TABLE dummy_table; RESET statement_mem; diff --git a/src/test/regress/expected/window.out b/src/test/regress/expected/window.out index 27b2a1aaf80..36ea794ff8a 100644 --- a/src/test/regress/expected/window.out +++ b/src/test/regress/expected/window.out @@ -1276,259 +1276,259 @@ FROM tenk1 WHERE unique1 < 10; (10 rows) set search_path=singleseg, public; -SELECT sum(unique1) over (rows between current row and unbounded following), +SELECT sum(unique1) over (order by unique1 rows between current row and unbounded following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 45 | 4 | 0 - 41 | 2 | 2 - 39 | 1 | 1 - 38 | 6 | 2 - 32 | 9 | 1 - 23 | 8 | 0 - 15 | 5 | 1 - 10 | 3 | 3 - 7 | 7 | 3 - 0 | 0 | 0 + 45 | 0 | 0 + 45 | 1 | 1 + 44 | 2 | 2 + 42 | 3 | 3 + 39 | 4 | 0 + 35 | 5 | 1 + 30 | 6 | 2 + 24 | 7 | 3 + 17 | 8 | 0 + 9 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 7 | 4 | 0 - 13 | 2 | 2 - 22 | 1 | 1 - 26 | 6 | 2 - 29 | 9 | 1 - 31 | 8 | 0 - 32 | 5 | 1 - 23 | 3 | 3 - 15 | 7 | 3 - 10 | 0 | 0 + 3 | 0 | 0 + 6 | 1 | 1 + 10 | 2 | 2 + 15 | 3 | 3 + 20 | 4 | 0 + 25 | 5 | 1 + 30 | 6 | 2 + 35 | 7 | 3 + 30 | 8 | 0 + 24 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude no others), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 7 | 4 | 0 - 13 | 2 | 2 - 22 | 1 | 1 - 26 | 6 | 2 - 29 | 9 | 1 - 31 | 8 | 0 - 32 | 5 | 1 - 23 | 3 | 3 - 15 | 7 | 3 - 10 | 0 | 0 + 3 | 0 | 0 + 6 | 1 | 1 + 10 | 2 | 2 + 15 | 3 | 3 + 20 | 4 | 0 + 25 | 5 | 1 + 30 | 6 | 2 + 35 | 7 | 3 + 30 | 8 | 0 + 24 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 3 | 4 | 0 - 11 | 2 | 2 - 21 | 1 | 1 - 20 | 6 | 2 - 20 | 9 | 1 - 23 | 8 | 0 - 27 | 5 | 1 - 20 | 3 | 3 - 8 | 7 | 3 - 10 | 0 | 0 + 3 | 0 | 0 + 5 | 1 | 1 + 8 | 2 | 2 + 12 | 3 | 3 + 16 | 4 | 0 + 20 | 5 | 1 + 24 | 6 | 2 + 28 | 7 | 3 + 22 | 8 | 0 + 15 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 4 | 0 - | 2 | 2 - | 1 | 1 - | 6 | 2 - | 9 | 1 - | 8 | 0 - | 5 | 1 - | 3 | 3 - | 7 | 3 - | 0 | 0 + 3 | 0 | 0 + 5 | 1 | 1 + 8 | 2 | 2 + 12 | 3 | 3 + 16 | 4 | 0 + 20 | 5 | 1 + 24 | 6 | 2 + 28 | 7 | 3 + 22 | 8 | 0 + 15 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 4 | 4 | 0 - 2 | 2 | 2 - 1 | 1 | 1 - 6 | 6 | 2 - 9 | 9 | 1 - 8 | 8 | 0 - 5 | 5 | 1 - 3 | 3 | 3 - 7 | 7 | 3 - 0 | 0 | 0 + 3 | 0 | 0 + 6 | 1 | 1 + 10 | 2 | 2 + 15 | 3 | 3 + 20 | 4 | 0 + 25 | 5 | 1 + 30 | 6 | 2 + 35 | 7 | 3 + 30 | 8 | 0 + 24 | 9 | 1 (10 rows) -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude current row), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; first_value | unique1 | four -------------+---------+------ - 8 | 0 | 0 - 4 | 8 | 0 - 5 | 4 | 0 + 4 | 0 | 0 + 8 | 4 | 0 + 1 | 8 | 0 + 5 | 1 | 1 9 | 5 | 1 - 1 | 9 | 1 - 6 | 1 | 1 - 2 | 6 | 2 - 3 | 2 | 2 + 2 | 9 | 1 + 6 | 2 | 2 + 3 | 6 | 2 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude group), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; first_value | unique1 | four -------------+---------+------ - | 0 | 0 - 5 | 8 | 0 - 5 | 4 | 0 - | 5 | 1 - 6 | 9 | 1 - 6 | 1 | 1 + 4 | 0 | 0 + 8 | 4 | 0 + 1 | 8 | 0 + 5 | 1 | 1 + 9 | 5 | 1 + 2 | 9 | 1 + 6 | 2 | 2 3 | 6 | 2 - 3 | 2 | 2 - | 3 | 3 + 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude ties), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; first_value | unique1 | four -------------+---------+------ 0 | 0 | 0 - 8 | 8 | 0 4 | 4 | 0 + 8 | 8 | 0 + 1 | 1 | 1 5 | 5 | 1 9 | 9 | 1 - 1 | 1 | 1 - 6 | 6 | 2 2 | 2 | 2 + 6 | 6 | 2 3 | 3 | 3 7 | 7 | 3 (10 rows) -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude current row), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; last_value | unique1 | four ------------+---------+------ - 4 | 0 | 0 + 8 | 0 | 0 + 1 | 4 | 0 5 | 8 | 0 - 9 | 4 | 0 - 1 | 5 | 1 + 9 | 1 | 1 + 2 | 5 | 1 6 | 9 | 1 - 2 | 1 | 1 - 3 | 6 | 2 - 7 | 2 | 2 + 3 | 2 | 2 + 7 | 6 | 2 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude group), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; last_value | unique1 | four ------------+---------+------ - | 0 | 0 + 8 | 0 | 0 + 1 | 4 | 0 5 | 8 | 0 - 9 | 4 | 0 - | 5 | 1 + 9 | 1 | 1 + 2 | 5 | 1 6 | 9 | 1 - 2 | 1 | 1 - 3 | 6 | 2 - 7 | 2 | 2 - | 3 | 3 + 3 | 2 | 2 + 7 | 6 | 2 + 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude ties), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; last_value | unique1 | four ------------+---------+------ - 0 | 0 | 0 + 8 | 0 | 0 + 1 | 4 | 0 5 | 8 | 0 - 9 | 4 | 0 - 5 | 5 | 1 + 9 | 1 | 1 + 2 | 5 | 1 6 | 9 | 1 - 2 | 1 | 1 - 3 | 6 | 2 - 7 | 2 | 2 - 3 | 3 | 3 + 3 | 2 | 2 + 7 | 6 | 2 + 7 | 3 | 3 7 | 7 | 3 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 1 preceding), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 1 preceding), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 4 | 0 - 4 | 2 | 2 - 6 | 1 | 1 - 3 | 6 | 2 - 7 | 9 | 1 - 15 | 8 | 0 - 17 | 5 | 1 - 13 | 3 | 3 - 8 | 7 | 3 - 10 | 0 | 0 + | 0 | 0 + 0 | 1 | 1 + 1 | 2 | 2 + 3 | 3 | 3 + 5 | 4 | 0 + 7 | 5 | 1 + 9 | 6 | 2 + 11 | 7 | 3 + 13 | 8 | 0 + 15 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 1 following and 3 following), +SELECT sum(unique1) over (order by unique1 rows between 1 following and 3 following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 9 | 4 | 0 - 16 | 2 | 2 - 23 | 1 | 1 - 22 | 6 | 2 - 16 | 9 | 1 - 15 | 8 | 0 - 10 | 5 | 1 - 7 | 3 | 3 - 0 | 7 | 3 - | 0 | 0 + 6 | 0 | 0 + 9 | 1 | 1 + 12 | 2 | 2 + 15 | 3 | 3 + 18 | 4 | 0 + 21 | 5 | 1 + 24 | 6 | 2 + 17 | 7 | 3 + 9 | 8 | 0 + | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between unbounded preceding and 1 following), +SELECT sum(unique1) over (order by unique1 rows between unbounded preceding and 1 following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 6 | 4 | 0 - 7 | 2 | 2 - 13 | 1 | 1 - 22 | 6 | 2 - 30 | 9 | 1 - 35 | 8 | 0 - 38 | 5 | 1 - 45 | 3 | 3 - 45 | 7 | 3 - 45 | 0 | 0 + 1 | 0 | 0 + 3 | 1 | 1 + 6 | 2 | 2 + 10 | 3 | 3 + 15 | 4 | 0 + 21 | 5 | 1 + 28 | 6 | 2 + 36 | 7 | 3 + 45 | 8 | 0 + 45 | 9 | 1 (10 rows) SELECT sum(unique1) over (w range between current row and unbounded following), @@ -1603,19 +1603,19 @@ SELECT first_value(unique1) over w, nth_value(unique1, 2) over w AS nth_2, last_value(unique1) over w, unique1, four FROM tenk1 WHERE unique1 < 10 -WINDOW w AS (order by four range between current row and unbounded following); +WINDOW w AS (order by unique1, four range between current row and unbounded following); first_value | nth_2 | last_value | unique1 | four -------------+-------+------------+---------+------ - 0 | 8 | 7 | 0 | 0 - 0 | 8 | 7 | 8 | 0 - 0 | 8 | 7 | 4 | 0 - 5 | 9 | 7 | 5 | 1 - 5 | 9 | 7 | 9 | 1 - 5 | 9 | 7 | 1 | 1 - 6 | 2 | 7 | 6 | 2 - 6 | 2 | 7 | 2 | 2 - 3 | 7 | 7 | 3 | 3 - 3 | 7 | 7 | 7 | 3 + 0 | 1 | 9 | 0 | 0 + 1 | 2 | 9 | 1 | 1 + 2 | 3 | 9 | 2 | 2 + 3 | 4 | 9 | 3 | 3 + 4 | 5 | 9 | 4 | 0 + 5 | 6 | 9 | 5 | 1 + 6 | 7 | 9 | 6 | 2 + 7 | 8 | 9 | 7 | 3 + 8 | 9 | 9 | 8 | 0 + 9 | | 9 | 9 | 1 (10 rows) SELECT sum(unique1) over diff --git a/src/test/regress/expected/window_optimizer.out b/src/test/regress/expected/window_optimizer.out index f6bdcc3b6c7..8576209f725 100644 --- a/src/test/regress/expected/window_optimizer.out +++ b/src/test/regress/expected/window_optimizer.out @@ -16,6 +16,8 @@ CREATE TEMPORARY TABLE empsalary ( salary int, enroll_date date ); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'depname' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. INSERT INTO empsalary VALUES ('develop', 10, 5200, '2007-08-01'), ('sales', 1, 5000, '2006-10-01'), @@ -87,30 +89,30 @@ GROUP BY four, ten ORDER BY four, ten; SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); depname | empno | salary | sum -----------+-------+--------+------- + develop | 11 | 5200 | 25100 + develop | 7 | 4200 | 25100 + develop | 9 | 4500 | 25100 + develop | 8 | 6000 | 25100 + develop | 10 | 5200 | 25100 personnel | 5 | 3500 | 7400 personnel | 2 | 3900 | 7400 + sales | 3 | 4800 | 14600 sales | 1 | 5000 | 14600 sales | 4 | 4800 | 14600 - sales | 3 | 4800 | 14600 - develop | 10 | 5200 | 25100 - develop | 7 | 4200 | 25100 - develop | 9 | 4500 | 25100 - develop | 8 | 6000 | 25100 - develop | 11 | 5200 | 25100 (10 rows) SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; depname | empno | salary | rank -----------+-------+--------+------ develop | 7 | 4200 | 1 - sales | 4 | 4800 | 1 - sales | 3 | 4800 | 1 personnel | 5 | 3500 | 1 + sales | 3 | 4800 | 1 + sales | 4 | 4800 | 1 personnel | 2 | 3900 | 2 develop | 9 | 4500 | 2 - develop | 10 | 5200 | 3 - develop | 11 | 5200 | 3 sales | 1 | 5000 | 3 + develop | 11 | 5200 | 3 + develop | 10 | 5200 | 3 develop | 8 | 6000 | 5 (10 rows) @@ -155,16 +157,16 @@ SELECT four FROM tenk1 WHERE FALSE WINDOW w AS (PARTITION BY ten); SELECT sum(four) OVER (PARTITION BY ten ORDER BY unique2) AS sum_1, ten, four FROM tenk1 WHERE unique2 < 10; sum_1 | ten | four -------+-----+------ - 1 | 9 | 1 + 3 | 3 | 3 + 0 | 4 | 0 + 1 | 7 | 1 0 | 0 | 0 0 | 0 | 0 2 | 0 | 2 3 | 1 | 3 4 | 1 | 1 5 | 1 | 1 - 3 | 3 | 3 - 0 | 4 | 0 - 1 | 7 | 1 + 1 | 9 | 1 (10 rows) SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10; @@ -185,6 +187,9 @@ SELECT row_number() OVER (ORDER BY unique2) FROM tenk1 WHERE unique2 < 10; SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10; rank_1 | ten | four --------+-----+------ + 1 | 0 | 2 + 1 | 1 | 3 + 2 | 3 | 3 1 | 0 | 0 1 | 0 | 0 3 | 4 | 0 @@ -192,14 +197,12 @@ SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM te 1 | 1 | 1 3 | 7 | 1 4 | 9 | 1 - 1 | 0 | 2 - 1 | 1 | 3 - 2 | 3 | 3 (10 rows) SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; dense_rank | ten | four ------------+-----+------ + 1 | 0 | 2 1 | 1 | 3 2 | 3 | 3 1 | 0 | 0 @@ -209,7 +212,6 @@ SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 1 | 1 | 1 2 | 7 | 1 3 | 9 | 1 - 1 | 0 | 2 (10 rows) SELECT percent_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; @@ -267,6 +269,7 @@ SELECT ntile(NULL) OVER (ORDER BY ten, four), ten, four FROM tenk1 LIMIT 2; SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; lag | ten | four -----+-----+------ + | 0 | 2 | 1 | 3 1 | 3 | 3 | 0 | 0 @@ -276,12 +279,12 @@ SELECT lag(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHER 1 | 1 | 1 1 | 7 | 1 7 | 9 | 1 - | 0 | 2 (10 rows) SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; lag | ten | four -----+-----+------ + | 0 | 2 | 1 | 3 | 3 | 3 0 | 0 | 0 @@ -291,12 +294,14 @@ SELECT lag(ten, four) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk 1 | 1 | 1 1 | 7 | 1 7 | 9 | 1 - | 0 | 2 (10 rows) SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; lag | ten | four -----+-----+------ + 0 | 0 | 2 + 0 | 1 | 3 + 0 | 3 | 3 0 | 0 | 0 0 | 0 | 0 4 | 4 | 0 @@ -304,9 +309,6 @@ SELECT lag(ten, four, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM t 1 | 1 | 1 1 | 7 | 1 7 | 9 | 1 - 0 | 0 | 2 - 0 | 1 | 3 - 0 | 3 | 3 (10 rows) SELECT lag(ten, four, 0.7) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten; @@ -327,6 +329,7 @@ SELECT lag(ten, four, 0.7) OVER (PARTITION BY four ORDER BY ten), ten, four FROM SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; lead | ten | four ------+-----+------ + | 0 | 2 3 | 1 | 3 | 3 | 3 0 | 0 | 0 @@ -336,12 +339,12 @@ SELECT lead(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHE 7 | 1 | 1 9 | 7 | 1 | 9 | 1 - | 0 | 2 (10 rows) SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; lead | ten | four ------+-----+------ + | 0 | 2 6 | 1 | 3 | 3 | 3 0 | 0 | 0 @@ -351,12 +354,12 @@ SELECT lead(ten * 2, 1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM te 14 | 1 | 1 18 | 7 | 1 | 9 | 1 - | 0 | 2 (10 rows) SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; lead | ten | four ------+-----+------ + -1 | 0 | 2 6 | 1 | 3 -1 | 3 | 3 0 | 0 | 0 @@ -366,7 +369,6 @@ SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FRO 14 | 1 | 1 18 | 7 | 1 -1 | 9 | 1 - -1 | 0 | 2 (10 rows) SELECT lead(ten * 2, 1, -1.4) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10 ORDER BY four, ten; @@ -387,6 +389,7 @@ SELECT lead(ten * 2, 1, -1.4) OVER (PARTITION BY four ORDER BY ten), ten, four F SELECT first_value(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; first_value | ten | four -------------+-----+------ + 0 | 0 | 2 1 | 1 | 3 1 | 3 | 3 0 | 0 | 0 @@ -396,7 +399,6 @@ SELECT first_value(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM te 1 | 1 | 1 1 | 7 | 1 1 | 9 | 1 - 0 | 0 | 2 (10 rows) -- test split window func @@ -732,41 +734,41 @@ SELECT * FROM (SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, te SELECT * FROM (SELECT row_number() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 < 3; rank_1 | ten | four --------+-----+------ - 1 | 0 | 2 - 1 | 1 | 3 - 2 | 3 | 3 1 | 0 | 0 2 | 0 | 0 1 | 1 | 1 2 | 1 | 1 + 1 | 0 | 2 + 1 | 1 | 3 + 2 | 3 | 3 (7 rows) SELECT * FROM (SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 < 3; rank_1 | ten | four --------+-----+------ - 1 | 0 | 2 - 1 | 1 | 3 - 2 | 3 | 3 1 | 0 | 0 1 | 0 | 0 2 | 4 | 0 1 | 1 | 1 1 | 1 | 1 2 | 7 | 1 + 1 | 0 | 2 + 1 | 1 | 3 + 2 | 3 | 3 (9 rows) SELECT * FROM (SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 <= 3; rank_1 | ten | four --------+-----+------ + 1 | 0 | 2 + 1 | 1 | 3 + 2 | 3 | 3 1 | 0 | 0 1 | 0 | 0 3 | 4 | 0 1 | 1 | 1 1 | 1 | 1 3 | 7 | 1 - 1 | 0 | 2 - 1 | 1 | 3 - 2 | 3 | 3 (9 rows) SELECT * FROM (SELECT row_number() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 <= 3; @@ -814,27 +816,27 @@ SELECT * FROM (SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, te SELECT * FROM (SELECT row_number() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 < 3; rank_1 | ten | four --------+-----+------ - 1 | 0 | 2 - 1 | 1 | 3 - 2 | 3 | 3 1 | 0 | 0 2 | 0 | 0 1 | 1 | 1 2 | 1 | 1 + 1 | 0 | 2 + 1 | 1 | 3 + 2 | 3 | 3 (7 rows) SELECT * FROM (SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 < 3; rank_1 | ten | four --------+-----+------ - 1 | 0 | 2 - 1 | 1 | 3 - 2 | 3 | 3 1 | 0 | 0 1 | 0 | 0 2 | 4 | 0 1 | 1 | 1 1 | 1 | 1 2 | 7 | 1 + 1 | 0 | 2 + 1 | 1 | 3 + 2 | 3 | 3 (9 rows) SELECT * FROM (SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM tenk1 WHERE unique2 < 10) t WHERE rank_1 <= 3; @@ -888,9 +890,9 @@ reset optimizer_force_split_window_function; SELECT last_value(ten) OVER (ORDER BY ten), ten, four FROM tenk1 WHERE unique2 < 10; last_value | ten | four ------------+-----+------ - 0 | 0 | 2 0 | 0 | 0 0 | 0 | 0 + 0 | 0 | 2 1 | 1 | 1 1 | 1 | 3 1 | 1 | 1 @@ -967,6 +969,7 @@ SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) + FROM tenk1 WHERE unique2 < 10; cntsum -------- + 51 92 136 22 @@ -976,7 +979,6 @@ SELECT (count(*) OVER (PARTITION BY four ORDER BY ten) + 24 82 92 - 51 (10 rows) -- opexpr with different windows evaluation. @@ -995,6 +997,9 @@ WHERE total <> fourcount + twosum; SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHERE unique2 < 10; avg ------------------------ + 2.0000000000000000 + 3.0000000000000000 + 3.0000000000000000 0.00000000000000000000 0.00000000000000000000 0.00000000000000000000 @@ -1002,9 +1007,6 @@ SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM tenk1 WHE 1.00000000000000000000 1.00000000000000000000 1.00000000000000000000 - 3.0000000000000000 - 3.0000000000000000 - 2.0000000000000000 (10 rows) SELECT ten, two, sum(hundred) AS gsum, sum(sum(hundred)) OVER win AS wsum @@ -1030,9 +1032,9 @@ SELECT sum(salary), FROM empsalary GROUP BY depname; sum | row_number | sum -------+------------+------- - 25100 | 1 | 47100 - 7400 | 2 | 22000 14600 | 3 | 14600 + 7400 | 2 | 22000 + 25100 | 1 | 47100 (3 rows) -- identical windows with different names @@ -1057,8 +1059,6 @@ SELECT lead(ten, (SELECT two FROM tenk1 WHERE s.unique2 = unique2)) OVER (PARTIT FROM tenk1 s WHERE unique2 < 10; lead ------ - 3 - 0 0 4 @@ -1067,6 +1067,8 @@ FROM tenk1 s WHERE unique2 < 10; 9 0 + 3 + (10 rows) -- empty table @@ -1156,7 +1158,7 @@ select first_value(max(x)) over (), y -> Streaming Partial HashAggregate Group Key: (ten + four) -> Seq Scan on tenk1 - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: GPORCA (10 rows) -- test non-default frame specifications @@ -1166,6 +1168,11 @@ SELECT four, ten, FROM (select distinct ten, four from tenk1) ss; four | ten | sum | last_value ------+-----+-----+------------ + 2 | 0 | 0 | 0 + 2 | 2 | 2 | 2 + 2 | 4 | 6 | 4 + 2 | 6 | 12 | 6 + 2 | 8 | 20 | 8 3 | 1 | 1 | 1 3 | 3 | 4 | 3 3 | 5 | 9 | 5 @@ -1181,11 +1188,6 @@ FROM (select distinct ten, four from tenk1) ss; 1 | 5 | 9 | 5 1 | 7 | 16 | 7 1 | 9 | 25 | 9 - 2 | 0 | 0 | 0 - 2 | 2 | 2 | 2 - 2 | 4 | 6 | 4 - 2 | 6 | 12 | 6 - 2 | 8 | 20 | 8 (20 rows) SELECT four, ten, @@ -1194,16 +1196,6 @@ SELECT four, ten, FROM (select distinct ten, four from tenk1) ss; four | ten | sum | last_value ------+-----+-----+------------ - 0 | 0 | 0 | 0 - 0 | 2 | 2 | 2 - 0 | 4 | 6 | 4 - 0 | 6 | 12 | 6 - 0 | 8 | 20 | 8 - 1 | 1 | 1 | 1 - 1 | 3 | 4 | 3 - 1 | 5 | 9 | 5 - 1 | 7 | 16 | 7 - 1 | 9 | 25 | 9 2 | 0 | 0 | 0 2 | 2 | 2 | 2 2 | 4 | 6 | 4 @@ -1214,6 +1206,16 @@ FROM (select distinct ten, four from tenk1) ss; 3 | 5 | 9 | 5 3 | 7 | 16 | 7 3 | 9 | 25 | 9 + 0 | 0 | 0 | 0 + 0 | 2 | 2 | 2 + 0 | 4 | 6 | 4 + 0 | 6 | 12 | 6 + 0 | 8 | 20 | 8 + 1 | 1 | 1 | 1 + 1 | 3 | 4 | 3 + 1 | 5 | 9 | 5 + 1 | 7 | 16 | 7 + 1 | 9 | 25 | 9 (20 rows) SELECT four, ten, @@ -1222,6 +1224,11 @@ SELECT four, ten, FROM (select distinct ten, four from tenk1) ss; four | ten | sum | last_value ------+-----+-----+------------ + 2 | 0 | 20 | 8 + 2 | 2 | 20 | 8 + 2 | 4 | 20 | 8 + 2 | 6 | 20 | 8 + 2 | 8 | 20 | 8 3 | 1 | 25 | 9 3 | 3 | 25 | 9 3 | 5 | 25 | 9 @@ -1237,11 +1244,6 @@ FROM (select distinct ten, four from tenk1) ss; 1 | 5 | 25 | 9 1 | 7 | 25 | 9 1 | 9 | 25 | 9 - 2 | 0 | 20 | 8 - 2 | 2 | 20 | 8 - 2 | 4 | 20 | 8 - 2 | 6 | 20 | 8 - 2 | 8 | 20 | 8 (20 rows) SELECT four, ten/4 as two, @@ -1250,6 +1252,11 @@ SELECT four, ten/4 as two, FROM (select distinct ten, four from tenk1) ss; four | two | sum | last_value ------+-----+-----+------------ + 2 | 0 | 0 | 0 + 2 | 0 | 0 | 0 + 2 | 1 | 2 | 1 + 2 | 1 | 2 | 1 + 2 | 2 | 4 | 2 3 | 0 | 0 | 0 3 | 0 | 0 | 0 3 | 1 | 2 | 1 @@ -1265,11 +1272,6 @@ FROM (select distinct ten, four from tenk1) ss; 1 | 1 | 2 | 1 1 | 1 | 2 | 1 1 | 2 | 4 | 2 - 2 | 0 | 0 | 0 - 2 | 0 | 0 | 0 - 2 | 1 | 2 | 1 - 2 | 1 | 2 | 1 - 2 | 2 | 4 | 2 (20 rows) SELECT four, ten/4 as two, @@ -1278,6 +1280,11 @@ SELECT four, ten/4 as two, FROM (select distinct ten, four from tenk1) ss; four | two | sum | last_value ------+-----+-----+------------ + 2 | 0 | 0 | 0 + 2 | 0 | 0 | 0 + 2 | 1 | 1 | 1 + 2 | 1 | 2 | 1 + 2 | 2 | 4 | 2 3 | 0 | 0 | 0 3 | 0 | 0 | 0 3 | 1 | 1 | 1 @@ -1293,11 +1300,6 @@ FROM (select distinct ten, four from tenk1) ss; 1 | 1 | 1 | 1 1 | 1 | 2 | 1 1 | 2 | 4 | 2 - 2 | 0 | 0 | 0 - 2 | 0 | 0 | 0 - 2 | 1 | 1 | 1 - 2 | 1 | 2 | 1 - 2 | 2 | 4 | 2 (20 rows) SELECT sum(unique1) over (order by four range between current row and unbounded following), @@ -1305,272 +1307,272 @@ SELECT sum(unique1) over (order by four range between current row and unbounded FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 45 | 0 | 0 - 45 | 8 | 0 45 | 4 | 0 - 33 | 5 | 1 + 45 | 8 | 0 + 45 | 0 | 0 33 | 1 | 1 + 33 | 5 | 1 33 | 9 | 1 - 18 | 2 | 2 18 | 6 | 2 - 10 | 7 | 3 + 18 | 2 | 2 10 | 3 | 3 + 10 | 7 | 3 (10 rows) set search_path=singleseg, public; -SELECT sum(unique1) over (rows between current row and unbounded following), +SELECT sum(unique1) over (order by unique1 rows between current row and unbounded following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 45 | 4 | 0 - 41 | 2 | 2 - 39 | 1 | 1 - 38 | 6 | 2 - 32 | 9 | 1 - 23 | 8 | 0 - 15 | 5 | 1 - 10 | 3 | 3 - 7 | 7 | 3 - 0 | 0 | 0 + 45 | 0 | 0 + 45 | 1 | 1 + 44 | 2 | 2 + 42 | 3 | 3 + 39 | 4 | 0 + 35 | 5 | 1 + 30 | 6 | 2 + 24 | 7 | 3 + 17 | 8 | 0 + 9 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 7 | 4 | 0 - 13 | 2 | 2 - 22 | 1 | 1 - 26 | 6 | 2 - 29 | 9 | 1 - 31 | 8 | 0 - 32 | 5 | 1 - 23 | 3 | 3 - 15 | 7 | 3 - 10 | 0 | 0 + 3 | 0 | 0 + 6 | 1 | 1 + 10 | 2 | 2 + 15 | 3 | 3 + 20 | 4 | 0 + 25 | 5 | 1 + 30 | 6 | 2 + 35 | 7 | 3 + 30 | 8 | 0 + 24 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude no others), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 7 | 4 | 0 - 13 | 2 | 2 - 22 | 1 | 1 - 26 | 6 | 2 - 29 | 9 | 1 - 31 | 8 | 0 - 32 | 5 | 1 - 23 | 3 | 3 - 15 | 7 | 3 - 10 | 0 | 0 + 3 | 0 | 0 + 6 | 1 | 1 + 10 | 2 | 2 + 15 | 3 | 3 + 20 | 4 | 0 + 25 | 5 | 1 + 30 | 6 | 2 + 35 | 7 | 3 + 30 | 8 | 0 + 24 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 3 | 4 | 0 - 11 | 2 | 2 - 21 | 1 | 1 - 20 | 6 | 2 - 20 | 9 | 1 - 23 | 8 | 0 - 27 | 5 | 1 - 20 | 3 | 3 - 8 | 7 | 3 - 10 | 0 | 0 + 3 | 0 | 0 + 5 | 1 | 1 + 8 | 2 | 2 + 12 | 3 | 3 + 16 | 4 | 0 + 20 | 5 | 1 + 24 | 6 | 2 + 28 | 7 | 3 + 22 | 8 | 0 + 15 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 4 | 0 - | 2 | 2 - | 1 | 1 - | 6 | 2 - | 9 | 1 - | 8 | 0 - | 5 | 1 - | 3 | 3 - | 7 | 3 - | 0 | 0 + 3 | 0 | 0 + 5 | 1 | 1 + 8 | 2 | 2 + 12 | 3 | 3 + 16 | 4 | 0 + 20 | 5 | 1 + 24 | 6 | 2 + 28 | 7 | 3 + 22 | 8 | 0 + 15 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 4 | 4 | 0 - 2 | 2 | 2 - 1 | 1 | 1 - 6 | 6 | 2 - 9 | 9 | 1 - 8 | 8 | 0 - 5 | 5 | 1 - 3 | 3 | 3 - 7 | 7 | 3 - 0 | 0 | 0 + 3 | 0 | 0 + 6 | 1 | 1 + 10 | 2 | 2 + 15 | 3 | 3 + 20 | 4 | 0 + 25 | 5 | 1 + 30 | 6 | 2 + 35 | 7 | 3 + 30 | 8 | 0 + 24 | 9 | 1 (10 rows) -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude current row), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; first_value | unique1 | four -------------+---------+------ - 8 | 0 | 0 - 4 | 8 | 0 - 5 | 4 | 0 + 4 | 0 | 0 + 8 | 4 | 0 + 1 | 8 | 0 + 5 | 1 | 1 9 | 5 | 1 - 1 | 9 | 1 - 6 | 1 | 1 - 2 | 6 | 2 - 3 | 2 | 2 + 2 | 9 | 1 + 6 | 2 | 2 + 3 | 6 | 2 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude group), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; first_value | unique1 | four -------------+---------+------ - | 0 | 0 - 5 | 8 | 0 - 5 | 4 | 0 - | 5 | 1 - 6 | 9 | 1 - 6 | 1 | 1 + 4 | 0 | 0 + 8 | 4 | 0 + 1 | 8 | 0 + 5 | 1 | 1 + 9 | 5 | 1 + 2 | 9 | 1 + 6 | 2 | 2 3 | 6 | 2 - 3 | 2 | 2 - | 3 | 3 + 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude ties), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; first_value | unique1 | four -------------+---------+------ 0 | 0 | 0 - 8 | 8 | 0 4 | 4 | 0 + 8 | 8 | 0 + 1 | 1 | 1 5 | 5 | 1 9 | 9 | 1 - 1 | 1 | 1 - 6 | 6 | 2 2 | 2 | 2 + 6 | 6 | 2 3 | 3 | 3 7 | 7 | 3 (10 rows) -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude current row), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; last_value | unique1 | four ------------+---------+------ - 4 | 0 | 0 + 8 | 0 | 0 + 1 | 4 | 0 5 | 8 | 0 - 9 | 4 | 0 - 1 | 5 | 1 + 9 | 1 | 1 + 2 | 5 | 1 6 | 9 | 1 - 2 | 1 | 1 - 3 | 6 | 2 - 7 | 2 | 2 + 3 | 2 | 2 + 7 | 6 | 2 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude group), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; last_value | unique1 | four ------------+---------+------ - | 0 | 0 + 8 | 0 | 0 + 1 | 4 | 0 5 | 8 | 0 - 9 | 4 | 0 - | 5 | 1 + 9 | 1 | 1 + 2 | 5 | 1 6 | 9 | 1 - 2 | 1 | 1 - 3 | 6 | 2 - 7 | 2 | 2 - | 3 | 3 + 3 | 2 | 2 + 7 | 6 | 2 + 7 | 3 | 3 | 7 | 3 (10 rows) -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude ties), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; last_value | unique1 | four ------------+---------+------ - 0 | 0 | 0 + 8 | 0 | 0 + 1 | 4 | 0 5 | 8 | 0 - 9 | 4 | 0 - 5 | 5 | 1 + 9 | 1 | 1 + 2 | 5 | 1 6 | 9 | 1 - 2 | 1 | 1 - 3 | 6 | 2 - 7 | 2 | 2 - 3 | 3 | 3 + 3 | 2 | 2 + 7 | 6 | 2 + 7 | 3 | 3 7 | 7 | 3 (10 rows) -SELECT sum(unique1) over (rows between 2 preceding and 1 preceding), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 1 preceding), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 4 | 0 - 4 | 2 | 2 - 6 | 1 | 1 - 3 | 6 | 2 - 7 | 9 | 1 - 15 | 8 | 0 - 17 | 5 | 1 - 13 | 3 | 3 - 8 | 7 | 3 - 10 | 0 | 0 + | 0 | 0 + 0 | 1 | 1 + 1 | 2 | 2 + 3 | 3 | 3 + 5 | 4 | 0 + 7 | 5 | 1 + 9 | 6 | 2 + 11 | 7 | 3 + 13 | 8 | 0 + 15 | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between 1 following and 3 following), +SELECT sum(unique1) over (order by unique1 rows between 1 following and 3 following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 9 | 4 | 0 - 16 | 2 | 2 - 23 | 1 | 1 - 22 | 6 | 2 - 16 | 9 | 1 - 15 | 8 | 0 - 10 | 5 | 1 - 7 | 3 | 3 - 0 | 7 | 3 - | 0 | 0 + 6 | 0 | 0 + 9 | 1 | 1 + 12 | 2 | 2 + 15 | 3 | 3 + 18 | 4 | 0 + 21 | 5 | 1 + 24 | 6 | 2 + 17 | 7 | 3 + 9 | 8 | 0 + | 9 | 1 (10 rows) -SELECT sum(unique1) over (rows between unbounded preceding and 1 following), +SELECT sum(unique1) over (order by unique1 rows between unbounded preceding and 1 following), unique1, four FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 6 | 4 | 0 - 7 | 2 | 2 - 13 | 1 | 1 - 22 | 6 | 2 - 30 | 9 | 1 - 35 | 8 | 0 - 38 | 5 | 1 - 45 | 3 | 3 - 45 | 7 | 3 - 45 | 0 | 0 + 1 | 0 | 0 + 3 | 1 | 1 + 6 | 2 | 2 + 10 | 3 | 3 + 15 | 4 | 0 + 21 | 5 | 1 + 28 | 6 | 2 + 36 | 7 | 3 + 45 | 8 | 0 + 45 | 9 | 1 (10 rows) SELECT sum(unique1) over (w range between current row and unbounded following), @@ -1584,10 +1586,10 @@ FROM tenk1 WHERE unique1 < 10 WINDOW w AS (order by four); 33 | 5 | 1 33 | 9 | 1 33 | 1 | 1 - 18 | 2 | 2 18 | 6 | 2 - 10 | 7 | 3 + 18 | 2 | 2 10 | 3 | 3 + 10 | 7 | 3 (10 rows) SELECT sum(unique1) over (w range between unbounded preceding and current row exclude current row), @@ -1645,19 +1647,19 @@ SELECT first_value(unique1) over w, nth_value(unique1, 2) over w AS nth_2, last_value(unique1) over w, unique1, four FROM tenk1 WHERE unique1 < 10 -WINDOW w AS (order by four range between current row and unbounded following); +WINDOW w AS (order by unique1, four range between current row and unbounded following); first_value | nth_2 | last_value | unique1 | four -------------+-------+------------+---------+------ - 0 | 8 | 7 | 0 | 0 - 0 | 8 | 7 | 8 | 0 - 0 | 8 | 7 | 4 | 0 - 5 | 9 | 7 | 5 | 1 - 5 | 9 | 7 | 9 | 1 - 5 | 9 | 7 | 1 | 1 - 6 | 2 | 7 | 6 | 2 - 6 | 2 | 7 | 2 | 2 - 3 | 7 | 7 | 3 | 3 - 3 | 7 | 7 | 7 | 3 + 0 | 1 | 9 | 0 | 0 + 1 | 2 | 9 | 1 | 1 + 2 | 3 | 9 | 2 | 2 + 3 | 4 | 9 | 3 | 3 + 4 | 5 | 9 | 4 | 0 + 5 | 6 | 9 | 5 | 1 + 6 | 7 | 9 | 6 | 2 + 7 | 8 | 9 | 7 | 3 + 8 | 9 | 9 | 8 | 0 + 9 | | 9 | 9 | 1 (10 rows) SELECT sum(unique1) over @@ -1865,12 +1867,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 0 | 0 - | 8 | 0 | 4 | 0 + | 8 | 0 + | 0 | 0 + 12 | 1 | 1 12 | 5 | 1 12 | 9 | 1 - 12 | 1 | 1 27 | 6 | 2 27 | 2 | 2 23 | 3 | 3 @@ -1884,14 +1886,14 @@ FROM tenk1 WHERE unique1 < 10; -----+---------+------ | 3 | 3 | 7 | 3 - 10 | 6 | 2 10 | 2 | 2 - 18 | 9 | 1 + 10 | 6 | 2 18 | 5 | 1 + 18 | 9 | 1 18 | 1 | 1 23 | 0 | 0 - 23 | 8 | 0 23 | 4 | 0 + 23 | 8 | 0 (10 rows) SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::int2 preceding exclude no others), @@ -1899,12 +1901,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 0 | 0 - | 8 | 0 | 4 | 0 + | 8 | 0 + | 0 | 0 + 12 | 1 | 1 12 | 5 | 1 12 | 9 | 1 - 12 | 1 | 1 27 | 6 | 2 27 | 2 | 2 23 | 3 | 3 @@ -1916,12 +1918,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 0 | 0 - | 8 | 0 | 4 | 0 + | 8 | 0 + | 0 | 0 + 12 | 1 | 1 12 | 5 | 1 12 | 9 | 1 - 12 | 1 | 1 27 | 6 | 2 27 | 2 | 2 23 | 3 | 3 @@ -1933,12 +1935,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 0 | 0 - | 8 | 0 | 4 | 0 + | 8 | 0 + | 0 | 0 + 12 | 1 | 1 12 | 5 | 1 12 | 9 | 1 - 12 | 1 | 1 27 | 6 | 2 27 | 2 | 2 23 | 3 | 3 @@ -1950,12 +1952,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 1::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 0 | 0 - | 8 | 0 | 4 | 0 + | 8 | 0 + | 0 | 0 + 12 | 1 | 1 12 | 5 | 1 12 | 9 | 1 - 12 | 1 | 1 27 | 6 | 2 27 | 2 | 2 23 | 3 | 3 @@ -1967,12 +1969,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 6::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 33 | 0 | 0 - 41 | 8 | 0 37 | 4 | 0 + 41 | 8 | 0 + 33 | 0 | 0 + 31 | 1 | 1 35 | 5 | 1 39 | 9 | 1 - 31 | 1 | 1 43 | 6 | 2 39 | 2 | 2 26 | 3 | 3 @@ -1984,12 +1986,12 @@ SELECT sum(unique1) over (order by four range between 2::int8 preceding and 6::i FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 33 | 0 | 0 - 33 | 8 | 0 33 | 4 | 0 + 33 | 8 | 0 + 33 | 0 | 0 + 30 | 1 | 1 30 | 5 | 1 30 | 9 | 1 - 30 | 1 | 1 37 | 6 | 2 37 | 2 | 2 23 | 3 | 3 @@ -2001,16 +2003,16 @@ SELECT sum(unique1) over (partition by four order by unique1 range between 5::in FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ + 8 | 2 | 2 + 8 | 6 | 2 + 10 | 3 | 3 + 10 | 7 | 3 4 | 0 | 0 12 | 4 | 0 12 | 8 | 0 6 | 1 | 1 15 | 5 | 1 14 | 9 | 1 - 8 | 2 | 2 - 8 | 6 | 2 - 10 | 3 | 3 - 10 | 7 | 3 (10 rows) SELECT sum(unique1) over (partition by four order by unique1 range between 5::int8 preceding and 6::int2 following @@ -2018,16 +2020,16 @@ SELECT sum(unique1) over (partition by four order by unique1 range between 5::in FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ + 6 | 2 | 2 + 2 | 6 | 2 + 7 | 3 | 3 + 3 | 7 | 3 4 | 0 | 0 8 | 4 | 0 4 | 8 | 0 5 | 1 | 1 10 | 5 | 1 5 | 9 | 1 - 6 | 2 | 2 - 2 | 6 | 2 - 7 | 3 | 3 - 3 | 7 | 3 (10 rows) select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), @@ -2417,6 +2419,8 @@ create temp table numerics( f_float8 float8, f_numeric numeric ); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into numerics values (0, '-infinity', '-infinity', '-infinity'), (1, -3, -3, -3), @@ -2730,6 +2734,8 @@ create temp table datetimes( f_timestamptz timestamptz, f_timestamp timestamp ); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into datetimes values (1, '11:00', '11:00 BST', '1 year', '2000-10-19 10:23:54+01', '2000-10-19 10:23:54'), (2, '12:00', '12:00 BST', '2 years', '2001-10-19 10:23:54+01', '2001-10-19 10:23:54'), @@ -2805,8 +2811,8 @@ window w as (order by f_timetz desc range between 9 | 19:00:00+01 | 10 | 7 8 | 18:00:00+01 | 9 | 7 7 | 17:00:00+01 | 8 | 6 - 6 | 15:00:00+01 | 5 | 3 5 | 15:00:00+01 | 5 | 3 + 6 | 15:00:00+01 | 5 | 3 4 | 14:00:00+01 | 5 | 2 3 | 13:00:00+01 | 4 | 1 2 | 12:00:00+01 | 3 | 1 @@ -2841,8 +2847,8 @@ window w as (order by f_interval desc range between 9 | @ 9 years | 10 | 8 8 | @ 8 years | 9 | 7 7 | @ 7 years | 8 | 7 - 6 | @ 5 years | 5 | 4 5 | @ 5 years | 5 | 4 + 6 | @ 5 years | 5 | 4 4 | @ 4 years | 5 | 3 3 | @ 3 years | 4 | 2 2 | @ 2 years | 3 | 1 @@ -2880,8 +2886,8 @@ window w as (order by f_timestamptz desc range between 6 | Tue Oct 19 02:23:54 2004 PDT | 7 | 5 5 | Sun Oct 19 02:23:54 2003 PDT | 6 | 4 4 | Sat Oct 19 02:23:54 2002 PDT | 5 | 3 - 3 | Fri Oct 19 02:23:54 2001 PDT | 4 | 1 2 | Fri Oct 19 02:23:54 2001 PDT | 4 | 1 + 3 | Fri Oct 19 02:23:54 2001 PDT | 4 | 1 1 | Thu Oct 19 02:23:54 2000 PDT | 2 | 1 (10 rows) @@ -2916,8 +2922,8 @@ window w as (order by f_timestamp desc range between 6 | Tue Oct 19 10:23:54 2004 | 7 | 5 5 | Sun Oct 19 10:23:54 2003 | 6 | 4 4 | Sat Oct 19 10:23:54 2002 | 5 | 3 - 3 | Fri Oct 19 10:23:54 2001 | 4 | 1 2 | Fri Oct 19 10:23:54 2001 | 4 | 1 + 3 | Fri Oct 19 10:23:54 2001 | 4 | 1 1 | Thu Oct 19 10:23:54 2000 | 2 | 1 (10 rows) @@ -2964,12 +2970,12 @@ SELECT sum(unique1) over (order by four groups between unbounded preceding and c FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 12 | 0 | 0 - 12 | 8 | 0 12 | 4 | 0 + 12 | 8 | 0 + 12 | 0 | 0 + 27 | 1 | 1 27 | 5 | 1 27 | 9 | 1 - 27 | 1 | 1 35 | 6 | 2 35 | 2 | 2 45 | 3 | 3 @@ -2981,12 +2987,12 @@ SELECT sum(unique1) over (order by four groups between unbounded preceding and u FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 45 | 0 | 0 - 45 | 8 | 0 45 | 4 | 0 + 45 | 8 | 0 + 45 | 0 | 0 + 45 | 1 | 1 45 | 5 | 1 45 | 9 | 1 - 45 | 1 | 1 45 | 6 | 2 45 | 2 | 2 45 | 3 | 3 @@ -2998,12 +3004,12 @@ SELECT sum(unique1) over (order by four groups between current row and unbounded FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 45 | 0 | 0 - 45 | 8 | 0 45 | 4 | 0 + 45 | 8 | 0 + 45 | 0 | 0 + 33 | 1 | 1 33 | 5 | 1 33 | 9 | 1 - 33 | 1 | 1 18 | 6 | 2 18 | 2 | 2 10 | 3 | 3 @@ -3015,12 +3021,12 @@ SELECT sum(unique1) over (order by four groups between 1 preceding and unbounded FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 45 | 0 | 0 - 45 | 8 | 0 45 | 4 | 0 + 45 | 8 | 0 + 45 | 0 | 0 + 45 | 1 | 1 45 | 5 | 1 45 | 9 | 1 - 45 | 1 | 1 33 | 6 | 2 33 | 2 | 2 18 | 3 | 3 @@ -3032,12 +3038,12 @@ SELECT sum(unique1) over (order by four groups between 1 following and unbounded FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 33 | 0 | 0 - 33 | 8 | 0 33 | 4 | 0 + 33 | 8 | 0 + 33 | 0 | 0 + 18 | 1 | 1 18 | 5 | 1 18 | 9 | 1 - 18 | 1 | 1 10 | 6 | 2 10 | 2 | 2 | 3 | 3 @@ -3049,12 +3055,12 @@ SELECT sum(unique1) over (order by four groups between unbounded preceding and 2 FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 35 | 0 | 0 - 35 | 8 | 0 35 | 4 | 0 + 35 | 8 | 0 + 35 | 0 | 0 + 45 | 1 | 1 45 | 5 | 1 45 | 9 | 1 - 45 | 1 | 1 45 | 6 | 2 45 | 2 | 2 45 | 3 | 3 @@ -3066,12 +3072,12 @@ SELECT sum(unique1) over (order by four groups between 2 preceding and 1 precedi FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - | 0 | 0 - | 8 | 0 | 4 | 0 + | 8 | 0 + | 0 | 0 + 12 | 1 | 1 12 | 5 | 1 12 | 9 | 1 - 12 | 1 | 1 27 | 6 | 2 27 | 2 | 2 23 | 3 | 3 @@ -3083,12 +3089,12 @@ SELECT sum(unique1) over (order by four groups between 2 preceding and 1 followi FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 27 | 0 | 0 - 27 | 8 | 0 27 | 4 | 0 + 27 | 8 | 0 + 27 | 0 | 0 + 35 | 1 | 1 35 | 5 | 1 35 | 9 | 1 - 35 | 1 | 1 45 | 6 | 2 45 | 2 | 2 33 | 3 | 3 @@ -3100,12 +3106,12 @@ SELECT sum(unique1) over (order by four groups between 0 preceding and 0 followi FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 12 | 0 | 0 - 12 | 8 | 0 12 | 4 | 0 + 12 | 8 | 0 + 12 | 0 | 0 + 15 | 1 | 1 15 | 5 | 1 15 | 9 | 1 - 15 | 1 | 1 8 | 6 | 2 8 | 2 | 2 10 | 3 | 3 @@ -3117,12 +3123,12 @@ SELECT sum(unique1) over (order by four groups between 2 preceding and 1 followi FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 27 | 0 | 0 - 19 | 8 | 0 23 | 4 | 0 + 19 | 8 | 0 + 27 | 0 | 0 + 34 | 1 | 1 30 | 5 | 1 26 | 9 | 1 - 34 | 1 | 1 39 | 6 | 2 43 | 2 | 2 30 | 3 | 3 @@ -3134,12 +3140,12 @@ SELECT sum(unique1) over (order by four groups between 2 preceding and 1 followi FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 15 | 0 | 0 - 15 | 8 | 0 15 | 4 | 0 + 15 | 8 | 0 + 15 | 0 | 0 + 20 | 1 | 1 20 | 5 | 1 20 | 9 | 1 - 20 | 1 | 1 37 | 6 | 2 37 | 2 | 2 23 | 3 | 3 @@ -3151,12 +3157,12 @@ SELECT sum(unique1) over (order by four groups between 2 preceding and 1 followi FROM tenk1 WHERE unique1 < 10; sum | unique1 | four -----+---------+------ - 15 | 0 | 0 - 23 | 8 | 0 19 | 4 | 0 + 23 | 8 | 0 + 15 | 0 | 0 + 21 | 1 | 1 25 | 5 | 1 29 | 9 | 1 - 21 | 1 | 1 43 | 6 | 2 39 | 2 | 2 26 | 3 | 3 @@ -3168,15 +3174,15 @@ SELECT sum(unique1) over (partition by ten FROM tenk1 WHERE unique1 < 10; sum | unique1 | four | ten -----+---------+------+----- - 0 | 0 | 0 | 0 - 1 | 1 | 1 | 1 2 | 2 | 2 | 2 3 | 3 | 3 | 3 4 | 4 | 0 | 4 - 5 | 5 | 1 | 5 - 6 | 6 | 2 | 6 7 | 7 | 3 | 7 8 | 8 | 0 | 8 + 0 | 0 | 0 | 0 + 1 | 1 | 1 | 1 + 5 | 5 | 1 | 5 + 6 | 6 | 2 | 6 9 | 9 | 1 | 9 (10 rows) @@ -3190,10 +3196,10 @@ FROM tenk1 WHERE unique1 < 10; | 2 | 2 | 2 | 3 | 3 | 3 | 4 | 0 | 4 - | 5 | 1 | 5 - | 6 | 2 | 6 | 7 | 3 | 7 | 8 | 0 | 8 + | 5 | 1 | 5 + | 6 | 2 | 6 | 9 | 1 | 9 (10 rows) @@ -3204,14 +3210,14 @@ FROM tenk1 WHERE unique1 < 10; -----+---------+------+----- | 0 | 0 | 0 | 1 | 1 | 1 + | 5 | 1 | 5 + | 6 | 2 | 6 + | 9 | 1 | 9 | 2 | 2 | 2 | 3 | 3 | 3 | 4 | 0 | 4 - | 5 | 1 | 5 - | 6 | 2 | 6 | 7 | 3 | 7 | 8 | 0 | 8 - | 9 | 1 | 9 (10 rows) SELECT sum(unique1) over (partition by ten @@ -3221,14 +3227,14 @@ FROM tenk1 WHERE unique1 < 10; -----+---------+------+----- 0 | 0 | 0 | 0 1 | 1 | 1 | 1 + 5 | 5 | 1 | 5 + 6 | 6 | 2 | 6 + 9 | 9 | 1 | 9 2 | 2 | 2 | 2 3 | 3 | 3 | 3 4 | 4 | 0 | 4 - 5 | 5 | 1 | 5 - 6 | 6 | 2 | 6 7 | 7 | 3 | 7 8 | 8 | 0 | 8 - 9 | 9 | 1 | 9 (10 rows) select first_value(salary) over(order by enroll_date groups between 1 preceding and 1 following), @@ -3510,6 +3516,8 @@ DETAIL: Falling back to Postgres-based planner because GPORCA does not support -- check some degenerate cases create temp table t1 (f1 int, f2 int8); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'f1' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. insert into t1 values (1,1),(1,2),(2,2); select f1, sum(f1) over (partition by f1 range between 1 preceding and 1 following) @@ -3531,7 +3539,7 @@ from t1 where f1 = f2; Sort Key: f1, f2 -> Seq Scan on t1 Filter: (f1 = f2) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) select f1, sum(f1) over (partition by f1 order by f2 @@ -3548,8 +3556,8 @@ select f1, sum(f1) over (partition by f1, f1 order by f2 from t1 where f1 = f2; f1 | sum ----+----- - 1 | 2 | + 1 | (2 rows) select f1, sum(f1) over (partition by f1, f2 order by f2 @@ -3581,7 +3589,7 @@ from t1 where f1 = f2; Sort Key: f1, f2 -> Seq Scan on t1 Filter: (f1 = f2) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (9 rows) select f1, sum(f1) over (partition by f1 order by f2 @@ -3589,8 +3597,8 @@ select f1, sum(f1) over (partition by f1 order by f2 from t1 where f1 = f2; f1 | sum ----+----- - 1 | 1 2 | 2 + 1 | 1 (2 rows) select f1, sum(f1) over (partition by f1, f1 order by f2 @@ -3598,8 +3606,8 @@ select f1, sum(f1) over (partition by f1, f1 order by f2 from t1 where f1 = f2; f1 | sum ----+----- - 1 | 2 | + 1 | (2 rows) select f1, sum(f1) over (partition by f1, f2 order by f2 @@ -3607,8 +3615,8 @@ select f1, sum(f1) over (partition by f1, f2 order by f2 from t1 where f1 = f2; f1 | sum ----+----- - 1 | 2 | + 1 | (2 rows) -- ordering by a non-integer constant is allowed @@ -3714,7 +3722,7 @@ WHERE depname = 'sales'; Sort Key: depname -> Seq Scan on empsalary Filter: ((depname)::text = 'sales'::text) - Optimizer: Pivotal Optimizer (GPORCA) version 3.83.0 + Optimizer: GPORCA (13 rows) -- pushdown is unsafe because there's a PARTITION BY clause without depname: @@ -3743,7 +3751,7 @@ WHERE depname = 'sales'; -> Redistribute Motion 3:3 (slice3; segments: 3) Hash Key: enroll_date -> Seq Scan on empsalary - Optimizer: Pivotal Optimizer (GPORCA) version 3.23.0 + Optimizer: GPORCA (17 rows) -- pushdown is unsafe because the subquery contains window functions and the qual is volatile: @@ -3768,7 +3776,7 @@ WHERE depname = 'sales' OR RANDOM() > 0.5; Sort Key: depname -> Seq Scan on empsalary Filter: (((depname)::text = 'sales'::text) OR (random() > '0.5'::double precision)) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (13 rows) -- Test Sort node collapsing @@ -3794,7 +3802,7 @@ WHERE depname = 'sales'; Sort Key: depname, empno -> Seq Scan on empsalary Filter: ((depname)::text = 'sales'::text) - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (14 rows) -- Test Sort node reordering @@ -3817,7 +3825,7 @@ FROM empsalary; -> Sort Sort Key: depname, salary, enroll_date -> Seq Scan on empsalary - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (13 rows) -- Test incremental sorting @@ -3831,9 +3839,7 @@ SELECT * FROM row_number() OVER (PARTITION BY depname ORDER BY enroll_date DESC) AS last_emp FROM empsalary) emp WHERE first_emp = 1 OR last_emp = 1; -NOTICE: One or more columns in the following table(s) do not have statistics: empsalary -HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. - QUERY PLAN + QUERY PLAN ---------------------------------------------------------------------------------- Gather Motion 3:1 (slice1; segments: 3) -> Result @@ -3849,7 +3855,7 @@ HINT: For non-partitioned tables, run analyze (). For -> Sort Sort Key: depname, enroll_date -> Seq Scan on empsalary - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (15 rows) SELECT * FROM @@ -3861,8 +3867,6 @@ SELECT * FROM row_number() OVER (PARTITION BY depname ORDER BY enroll_date DESC) AS last_emp FROM empsalary) emp WHERE first_emp = 1 OR last_emp = 1; -NOTICE: One or more columns in the following table(s) do not have statistics: empsalary -HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. depname | empno | salary | enroll_date | first_emp | last_emp -----------+-------+--------+-------------+-----------+---------- develop | 9 | 4500 | 01-01-2008 | 4 | 1 @@ -4760,10 +4764,10 @@ AS $$ WINDOW w AS (ORDER BY s ROWS BETWEEN CURRENT ROW AND GROUP_SIZE FOLLOWING) $$ LANGUAGE SQL STABLE; EXPLAIN (costs off) SELECT * FROM pg_temp.f(2); - QUERY PLAN ---------------------------------------- + QUERY PLAN +-------------------- Function Scan on f - Optimizer: Pivotal Optimizer (GPORCA) + Optimizer: GPORCA (2 rows) SELECT * FROM pg_temp.f(2); diff --git a/src/test/regress/expected/window_parallel.out b/src/test/regress/expected/window_parallel.out new file mode 100644 index 00000000000..bd4b9161c1b --- /dev/null +++ b/src/test/regress/expected/window_parallel.out @@ -0,0 +1,869 @@ +set force_parallel_mode = 0; +set optimizer = off; +create schema window_parallel; +set search_path to window_parallel; +set gp_appendonly_insert_files = 4; +set min_parallel_table_scan_size = 0; +-- +-- Test of Parallel process of Window Functions. +-- +CREATE TABLE empsalary ( + depname varchar, + empno bigint, + salary int, + enroll_date date +); +NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'depname' as the Apache Cloudberry data distribution key for this table. +HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. +INSERT INTO empsalary VALUES +('develop', 10, 5200, '2007-08-01'), +('sales', 1, 5000, '2006-10-01'), +('personnel', 5, 3500, '2007-12-10'), +('sales', 4, 4800, '2007-08-08'), +('personnel', 2, 3900, '2006-12-23'), +('develop', 7, 4200, '2008-01-01'), +('develop', 9, 4500, '2008-01-01'), +('sales', 3, 4800, '2007-08-01'), +('develop', 8, 6000, '2006-10-01'), +('develop', 11, 5200, '2007-08-15'); +-- w1 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: depname, salary + -> Sort + Sort Key: depname, salary + -> WindowAgg + Partition By: depname + -> Sort + Sort Key: depname + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(10 rows) + +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; + depname | empno | salary | sum +-----------+-------+--------+------- + develop | 7 | 4200 | 25100 + develop | 9 | 4500 | 25100 + develop | 11 | 5200 | 25100 + develop | 10 | 5200 | 25100 + develop | 8 | 6000 | 25100 + personnel | 5 | 3500 | 7400 + personnel | 2 | 3900 | 7400 + sales | 3 | 4800 | 14600 + sales | 4 | 4800 | 14600 + sales | 1 | 5000 | 14600 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + Merge Key: depname, salary + -> Sort + Sort Key: depname, salary + -> WindowAgg + Partition By: depname + -> Sort + Sort Key: depname + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: depname + Hash Module: 3 + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(13 rows) + +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; + depname | empno | salary | sum +-----------+-------+--------+------- + develop | 7 | 4200 | 25100 + develop | 9 | 4500 | 25100 + develop | 10 | 5200 | 25100 + develop | 11 | 5200 | 25100 + develop | 8 | 6000 | 25100 + personnel | 5 | 3500 | 7400 + personnel | 2 | 3900 | 7400 + sales | 4 | 4800 | 14600 + sales | 3 | 4800 | 14600 + sales | 1 | 5000 | 14600 +(10 rows) + +-- w2 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: depname + Order By: salary + -> Sort + Sort Key: depname, salary + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; + depname | empno | salary | rank +-----------+-------+--------+------ + develop | 7 | 4200 | 1 + develop | 9 | 4500 | 2 + develop | 11 | 5200 | 3 + develop | 10 | 5200 | 3 + develop | 8 | 6000 | 5 + personnel | 5 | 3500 | 1 + personnel | 2 | 3900 | 2 + sales | 3 | 4800 | 1 + sales | 4 | 4800 | 1 + sales | 1 | 5000 | 3 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> WindowAgg + Partition By: depname + Order By: salary + -> Sort + Sort Key: depname, salary + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: depname + Hash Module: 3 + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(11 rows) + +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; + depname | empno | salary | rank +-----------+-------+--------+------ + personnel | 5 | 3500 | 1 + personnel | 2 | 3900 | 2 + sales | 4 | 4800 | 1 + sales | 3 | 4800 | 1 + sales | 1 | 5000 | 3 + develop | 7 | 4200 | 1 + develop | 9 | 4500 | 2 + develop | 10 | 5200 | 3 + develop | 11 | 5200 | 3 + develop | 8 | 6000 | 5 +(10 rows) + +-- w3 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); + QUERY PLAN +------------------------------------------ + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: depname + -> Sort + Sort Key: depname + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(7 rows) + +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); + depname | empno | salary | sum +-----------+-------+--------+------- + develop | 11 | 5200 | 25100 + develop | 7 | 4200 | 25100 + develop | 9 | 4500 | 25100 + develop | 8 | 6000 | 25100 + develop | 10 | 5200 | 25100 + personnel | 5 | 3500 | 7400 + personnel | 2 | 3900 | 7400 + sales | 3 | 4800 | 14600 + sales | 1 | 5000 | 14600 + sales | 4 | 4800 | 14600 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> WindowAgg + Partition By: depname + -> Sort + Sort Key: depname + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: depname + Hash Module: 3 + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(10 rows) + +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); + depname | empno | salary | sum +-----------+-------+--------+------- + develop | 10 | 5200 | 25100 + develop | 7 | 4200 | 25100 + develop | 9 | 4500 | 25100 + develop | 8 | 6000 | 25100 + develop | 11 | 5200 | 25100 + personnel | 5 | 3500 | 7400 + personnel | 2 | 3900 | 7400 + sales | 1 | 5000 | 14600 + sales | 4 | 4800 | 14600 + sales | 3 | 4800 | 14600 +(10 rows) + +-- w4 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; + QUERY PLAN +----------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + Merge Key: (rank() OVER (?)) + -> Sort + Sort Key: (rank() OVER (?)) + -> WindowAgg + Partition By: depname + Order By: salary + -> Sort + Sort Key: depname, salary + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(11 rows) + +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; + depname | empno | salary | rank +-----------+-------+--------+------ + develop | 7 | 4200 | 1 + personnel | 5 | 3500 | 1 + sales | 3 | 4800 | 1 + sales | 4 | 4800 | 1 + personnel | 2 | 3900 | 2 + develop | 9 | 4500 | 2 + sales | 1 | 5000 | 3 + develop | 11 | 5200 | 3 + develop | 10 | 5200 | 3 + develop | 8 | 6000 | 5 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; + QUERY PLAN +------------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + Merge Key: (rank() OVER (?)) + -> Sort + Sort Key: (rank() OVER (?)) + -> WindowAgg + Partition By: depname + Order By: salary + -> Sort + Sort Key: depname, salary + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: depname + Hash Module: 3 + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(14 rows) + +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; + depname | empno | salary | rank +-----------+-------+--------+------ + personnel | 5 | 3500 | 1 + sales | 4 | 4800 | 1 + sales | 3 | 4800 | 1 + develop | 7 | 4200 | 1 + develop | 9 | 4500 | 2 + personnel | 2 | 3900 | 2 + sales | 1 | 5000 | 3 + develop | 10 | 5200 | 3 + develop | 11 | 5200 | 3 + develop | 8 | 6000 | 5 +(10 rows) + +-- w5 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; + QUERY PLAN +------------------------------------------------------------ + WindowAgg + Order By: depname + -> Sort + Sort Key: depname + -> WindowAgg + Order By: depname + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: depname + -> Sort + Sort Key: depname DESC + -> HashAggregate + Group Key: depname + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(14 rows) + +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; + sum | row_number | sum +-------+------------+------- + 25100 | 1 | 47100 + 7400 | 2 | 22000 + 14600 | 3 | 14600 +(3 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; + QUERY PLAN +------------------------------------------------------------ + WindowAgg + Order By: depname + -> Sort + Sort Key: depname + -> WindowAgg + Order By: depname + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: depname + -> Sort + Sort Key: depname DESC + -> HashAggregate + Group Key: depname + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(14 rows) + +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; + sum | row_number | sum +-------+------------+------- + 25100 | 1 | 47100 + 7400 | 2 | 22000 + 14600 | 3 | 14600 +(3 rows) + +-- w6 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +-- identical windows with different names +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); + QUERY PLAN +------------------------------------------------------ + WindowAgg + Order By: salary + -> WindowAgg + Order By: salary + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: salary + -> Sort + Sort Key: salary + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(10 rows) + +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); + sum | count +-------+------- + 3500 | 1 + 7400 | 2 + 11600 | 3 + 16100 | 4 + 25700 | 6 + 25700 | 6 + 30700 | 7 + 41100 | 9 + 41100 | 9 + 47100 | 10 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +-- identical windows with different names +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); + QUERY PLAN +-------------------------------------------------------- + WindowAgg + Order By: salary + -> WindowAgg + Order By: salary + -> Gather Motion 6:1 (slice1; segments: 6) + Merge Key: salary + -> Sort + Sort Key: salary + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(10 rows) + +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); + sum | count +-------+------- + 3500 | 1 + 7400 | 2 + 11600 | 3 + 16100 | 4 + 25700 | 6 + 25700 | 6 + 30700 | 7 + 41100 | 9 + 41100 | 9 + 47100 | 10 +(10 rows) + +-- w7 +-- mixture of agg/wfunc in the same window +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); + QUERY PLAN +---------------------------------------------- + Gather Motion 3:1 (slice1; segments: 3) + -> WindowAgg + Partition By: depname + Order By: salary + -> Sort + Sort Key: depname, salary DESC + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); + sum | rank +-------+------ + 6000 | 1 + 16400 | 2 + 16400 | 2 + 20900 | 4 + 25100 | 5 + 3900 | 1 + 7400 | 2 + 5000 | 1 + 14600 | 2 + 14600 | 2 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); + QUERY PLAN +------------------------------------------------------------------ + Gather Motion 6:1 (slice1; segments: 6) + -> WindowAgg + Partition By: depname + Order By: salary + -> Sort + Sort Key: depname, salary DESC + -> Redistribute Motion 6:6 (slice2; segments: 6) + Hash Key: depname + Hash Module: 3 + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(11 rows) + +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); + sum | rank +-------+------ + 3900 | 1 + 7400 | 2 + 5000 | 1 + 14600 | 2 + 14600 | 2 + 6000 | 1 + 16400 | 2 + 16400 | 2 + 20900 | 4 + 25100 | 5 +(10 rows) + +-- w8 +-- strict aggs +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + -> WindowAgg + Order By: s.empno + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: s.empno + -> Sort + Sort Key: s.empno + -> Subquery Scan on s + -> WindowAgg + Partition By: empsalary.depname + -> Sort + Sort Key: empsalary.depname + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(14 rows) + +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; + empno | depname | salary | bonus | depadj | min | max +-------+-----------+--------+-------+--------+------+----- + 1 | sales | 5000 | 1000 | 200 | 1000 | 200 + 2 | personnel | 3900 | 1000 | 200 | 1000 | 200 + 3 | sales | 4800 | 500 | | 500 | 200 + 4 | sales | 4800 | 500 | | 500 | 200 + 5 | personnel | 3500 | 500 | | 500 | 200 + 7 | develop | 4200 | | | 500 | 200 + 8 | develop | 6000 | 1000 | 200 | 500 | 200 + 9 | develop | 4500 | | | 500 | 200 + 10 | develop | 5200 | 500 | 200 | 500 | 200 + 11 | develop | 5200 | 500 | 200 | 500 | 200 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; + QUERY PLAN +------------------------------------------------------------------- + WindowAgg + -> WindowAgg + Order By: s.empno + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: s.empno + -> Sort + Sort Key: s.empno + -> Subquery Scan on s + -> WindowAgg + Partition By: empsalary.depname + -> Sort + Sort Key: empsalary.depname + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(14 rows) + +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; + empno | depname | salary | bonus | depadj | min | max +-------+-----------+--------+-------+--------+------+----- + 1 | sales | 5000 | 1000 | 200 | 1000 | 200 + 2 | personnel | 3900 | 1000 | 200 | 1000 | 200 + 3 | sales | 4800 | 500 | | 500 | 200 + 4 | sales | 4800 | 500 | | 500 | 200 + 5 | personnel | 3500 | 500 | | 500 | 200 + 7 | develop | 4200 | | | 500 | 200 + 8 | develop | 6000 | 1000 | 200 | 500 | 200 + 9 | develop | 4500 | | | 500 | 200 + 10 | develop | 5200 | 500 | 200 | 500 | 200 + 11 | develop | 5200 | 500 | 200 | 500 | 200 +(10 rows) + +-- w9 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + QUERY PLAN +------------------------------------------------ + WindowAgg + Order By: enroll_date + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + sum | salary | enroll_date +-------+--------+------------- + 34900 | 5000 | 10-01-2006 + 34900 | 6000 | 10-01-2006 + 38400 | 3900 | 12-23-2006 + 47100 | 4800 | 08-01-2007 + 47100 | 5200 | 08-01-2007 + 47100 | 4800 | 08-08-2007 + 47100 | 5200 | 08-15-2007 + 36100 | 3500 | 12-10-2007 + 32200 | 4500 | 01-01-2008 + 32200 | 4200 | 01-01-2008 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + QUERY PLAN +-------------------------------------------------- + WindowAgg + Order By: enroll_date + -> Gather Motion 6:1 (slice1; segments: 6) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + sum | salary | enroll_date +-------+--------+------------- + 34900 | 5000 | 10-01-2006 + 34900 | 6000 | 10-01-2006 + 38400 | 3900 | 12-23-2006 + 47100 | 4800 | 08-01-2007 + 47100 | 5200 | 08-01-2007 + 47100 | 4800 | 08-08-2007 + 47100 | 5200 | 08-15-2007 + 36100 | 3500 | 12-10-2007 + 32200 | 4500 | 01-01-2008 + 32200 | 4200 | 01-01-2008 +(10 rows) + +-- w10 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + QUERY PLAN +------------------------------------------------ + WindowAgg + Order By: enroll_date + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + sum | salary | enroll_date +-------+--------+------------- + 32200 | 4200 | 01-01-2008 + 32200 | 4500 | 01-01-2008 + 36100 | 3500 | 12-10-2007 + 47100 | 5200 | 08-15-2007 + 47100 | 4800 | 08-08-2007 + 47100 | 4800 | 08-01-2007 + 47100 | 5200 | 08-01-2007 + 38400 | 3900 | 12-23-2006 + 34900 | 5000 | 10-01-2006 + 34900 | 6000 | 10-01-2006 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + QUERY PLAN +-------------------------------------------------- + WindowAgg + Order By: enroll_date + -> Gather Motion 6:1 (slice1; segments: 6) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date DESC + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + sum | salary | enroll_date +-------+--------+------------- + 32200 | 4200 | 01-01-2008 + 32200 | 4500 | 01-01-2008 + 36100 | 3500 | 12-10-2007 + 47100 | 5200 | 08-15-2007 + 47100 | 4800 | 08-08-2007 + 47100 | 4800 | 08-01-2007 + 47100 | 5200 | 08-01-2007 + 38400 | 3900 | 12-23-2006 + 34900 | 5000 | 10-01-2006 + 34900 | 6000 | 10-01-2006 +(10 rows) + +-- w11 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; + QUERY PLAN +------------------------------------------------ + WindowAgg + Order By: enroll_date + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date DESC + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; + sum | salary | enroll_date +-----+--------+------------- + | 4200 | 01-01-2008 + | 4500 | 01-01-2008 + | 3500 | 12-10-2007 + | 5200 | 08-15-2007 + | 4800 | 08-08-2007 + | 4800 | 08-01-2007 + | 5200 | 08-01-2007 + | 3900 | 12-23-2006 + | 5000 | 10-01-2006 + | 6000 | 10-01-2006 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; + QUERY PLAN +-------------------------------------------------- + WindowAgg + Order By: enroll_date + -> Gather Motion 6:1 (slice1; segments: 6) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date DESC + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; + sum | salary | enroll_date +-----+--------+------------- + | 4200 | 01-01-2008 + | 4500 | 01-01-2008 + | 3500 | 12-10-2007 + | 5200 | 08-15-2007 + | 4800 | 08-08-2007 + | 4800 | 08-01-2007 + | 5200 | 08-01-2007 + | 3900 | 12-23-2006 + | 5000 | 10-01-2006 + | 6000 | 10-01-2006 +(10 rows) + +-- w12 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; + QUERY PLAN +------------------------------------------------ + WindowAgg + Order By: enroll_date + -> Gather Motion 3:1 (slice1; segments: 3) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date + -> Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; + sum | salary | enroll_date +-------+--------+------------- + 29900 | 5000 | 10-01-2006 + 28900 | 6000 | 10-01-2006 + 34500 | 3900 | 12-23-2006 + 42300 | 4800 | 08-01-2007 + 41900 | 5200 | 08-01-2007 + 42300 | 4800 | 08-08-2007 + 41900 | 5200 | 08-15-2007 + 32600 | 3500 | 12-10-2007 + 27700 | 4500 | 01-01-2008 + 28000 | 4200 | 01-01-2008 +(10 rows) + +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; + QUERY PLAN +-------------------------------------------------- + WindowAgg + Order By: enroll_date + -> Gather Motion 6:1 (slice1; segments: 6) + Merge Key: enroll_date + -> Sort + Sort Key: enroll_date + -> Parallel Seq Scan on empsalary + Optimizer: Postgres query optimizer +(8 rows) + +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; + sum | salary | enroll_date +-------+--------+------------- + 29900 | 5000 | 10-01-2006 + 28900 | 6000 | 10-01-2006 + 34500 | 3900 | 12-23-2006 + 42300 | 4800 | 08-01-2007 + 41900 | 5200 | 08-01-2007 + 42300 | 4800 | 08-08-2007 + 41900 | 5200 | 08-15-2007 + 32600 | 3500 | 12-10-2007 + 27700 | 4500 | 01-01-2008 + 28000 | 4200 | 01-01-2008 +(10 rows) + +-- +-- End of test of Parallel process of Window Functions. +-- +-- start_ignore +drop schema window_parallel cascade; +NOTICE: drop cascades to table empsalary +-- end_ignore +reset min_parallel_table_scan_size; +reset enable_parallel; +reset gp_appendonly_insert_files; +reset force_parallel_mode; +reset optimizer; diff --git a/src/test/regress/greenplum_schedule b/src/test/regress/greenplum_schedule index 3614e349ff8..f2f66e6cd6e 100755 --- a/src/test/regress/greenplum_schedule +++ b/src/test/regress/greenplum_schedule @@ -281,6 +281,9 @@ test: freeze_aux_tables # cbdb parallel test test: cbdb_parallel +# window function parallel test +test: window_parallel + # These cannot run in parallel, because they check that VACUUM FULL shrinks table size. # A concurrent session could hold back the xid horizon and prevent old tuples from being # removed. diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql index 5eb71740fd2..61cf6a4e6cf 100644 --- a/src/test/regress/sql/cluster.sql +++ b/src/test/regress/sql/cluster.sql @@ -226,9 +226,9 @@ set enable_indexscan = off; set maintenance_work_mem = '1MB'; cluster clstr_4 using cluster_sort; select * from -(select hundred, lag(hundred) over () as lhundred, - thousand, lag(thousand) over () as lthousand, - tenthous, lag(tenthous) over () as ltenthous from clstr_4) ss +(select hundred, lag(hundred) over (order by hundred) as lhundred, + thousand, lag(thousand) over (order by hundred) as lthousand, + tenthous, lag(tenthous) over (order by hundred) as ltenthous from clstr_4) ss where row(hundred, thousand, tenthous) <= row(lhundred, lthousand, ltenthous); reset enable_indexscan; diff --git a/src/test/regress/sql/select_parallel.sql b/src/test/regress/sql/select_parallel.sql index 8a56061710e..846066fad05 100644 --- a/src/test/regress/sql/select_parallel.sql +++ b/src/test/regress/sql/select_parallel.sql @@ -389,6 +389,7 @@ reset force_parallel_mode; reset role; -- Window function calculation can't be pushed to workers. +-- CBDB_PARALLEL: window function's subpath could be parallel. explain (costs off, verbose) select count(*) from tenk1 a where (unique1, two) in (select unique1, row_number() over() from tenk1 b); diff --git a/src/test/regress/sql/statement_mem_for_windowagg.sql b/src/test/regress/sql/statement_mem_for_windowagg.sql index 5c4b96a1761..a471690c6be 100644 --- a/src/test/regress/sql/statement_mem_for_windowagg.sql +++ b/src/test/regress/sql/statement_mem_for_windowagg.sql @@ -2,6 +2,7 @@ CREATE TABLE dummy_table(x int, y int) DISTRIBUTED BY (y); INSERT INTO dummy_table SELECT generate_series(0, 20000), 0; INSERT INTO dummy_table SELECT generate_series(0, 20000), 3; INSERT INTO dummy_table SELECT generate_series(0, 20000), 10; +set enable_parallel = off; -- 1. Test that if we set statement_mem to a larger value, the tuplestore -- for caching the tuples in partition used in WindowAgg is able to be fitted @@ -53,5 +54,6 @@ SELECT gp_inject_fault_infinite('distinct_winagg_perform_sort', 'reset', dbid) FROM gp_segment_configuration WHERE role='p' AND content>=0; -- Do some clean-ups. +reset enable_parallel; DROP TABLE dummy_table; RESET statement_mem; diff --git a/src/test/regress/sql/window.sql b/src/test/regress/sql/window.sql index 70dd94fafae..1ae2af61983 100644 --- a/src/test/regress/sql/window.sql +++ b/src/test/regress/sql/window.sql @@ -242,63 +242,63 @@ SELECT sum(unique1) over (order by four range between current row and unbounded FROM tenk1 WHERE unique1 < 10; set search_path=singleseg, public; -SELECT sum(unique1) over (rows between current row and unbounded following), +SELECT sum(unique1) over (order by unique1 rows between current row and unbounded following), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 2 preceding and 2 following), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude no others), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude no others), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude current row), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude group), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 2 preceding and 2 following exclude ties), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude current row), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude group), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT first_value(unique1) over (ORDER BY four rows between current row and 2 following exclude ties), +SELECT first_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude current row), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude current row), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude group), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude group), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT last_value(unique1) over (ORDER BY four rows between current row and 2 following exclude ties), +SELECT last_value(unique1) over (ORDER BY four, unique1 rows between current row and 2 following exclude ties), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 2 preceding and 1 preceding), +SELECT sum(unique1) over (order by unique1 rows between 2 preceding and 1 preceding), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between 1 following and 3 following), +SELECT sum(unique1) over (order by unique1 rows between 1 following and 3 following), unique1, four FROM tenk1 WHERE unique1 < 10; -SELECT sum(unique1) over (rows between unbounded preceding and 1 following), +SELECT sum(unique1) over (order by unique1 rows between unbounded preceding and 1 following), unique1, four FROM tenk1 WHERE unique1 < 10; @@ -322,7 +322,7 @@ SELECT first_value(unique1) over w, nth_value(unique1, 2) over w AS nth_2, last_value(unique1) over w, unique1, four FROM tenk1 WHERE unique1 < 10 -WINDOW w AS (order by four range between current row and unbounded following); +WINDOW w AS (order by unique1, four range between current row and unbounded following); SELECT sum(unique1) over (order by unique1 diff --git a/src/test/regress/sql/window_parallel.sql b/src/test/regress/sql/window_parallel.sql new file mode 100644 index 00000000000..e8136750b2a --- /dev/null +++ b/src/test/regress/sql/window_parallel.sql @@ -0,0 +1,223 @@ +set force_parallel_mode = 0; +set optimizer = off; + +create schema window_parallel; +set search_path to window_parallel; +set gp_appendonly_insert_files = 4; +set min_parallel_table_scan_size = 0; + +-- +-- Test of Parallel process of Window Functions. +-- +CREATE TABLE empsalary ( + depname varchar, + empno bigint, + salary int, + enroll_date date +); + +INSERT INTO empsalary VALUES +('develop', 10, 5200, '2007-08-01'), +('sales', 1, 5000, '2006-10-01'), +('personnel', 5, 3500, '2007-12-10'), +('sales', 4, 4800, '2007-08-08'), +('personnel', 2, 3900, '2006-12-23'), +('develop', 7, 4200, '2008-01-01'), +('develop', 9, 4500, '2008-01-01'), +('sales', 3, 4800, '2007-08-01'), +('develop', 8, 6000, '2006-10-01'), +('develop', 11, 5200, '2007-08-15'); + + +-- w1 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; +SELECT depname, empno, salary, sum(salary) OVER (PARTITION BY depname) FROM empsalary ORDER BY depname, salary; + +-- w2 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; +SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary) FROM empsalary; + +-- w3 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); +SELECT depname, empno, salary, sum(salary) OVER w FROM empsalary WINDOW w AS (PARTITION BY depname); + +-- w4 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; +SELECT depname, empno, salary, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary) ORDER BY rank() OVER w; + +-- w5 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; +SELECT sum(salary), + row_number() OVER (ORDER BY depname), + sum(sum(salary)) OVER (ORDER BY depname DESC) +FROM empsalary GROUP BY depname; + + +-- w6 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +-- identical windows with different names +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); +set enable_parallel = on; +EXPLAIN(COSTS OFF) +-- identical windows with different names +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); +SELECT sum(salary) OVER w1, count(*) OVER w2 +FROM empsalary WINDOW w1 AS (ORDER BY salary), w2 AS (ORDER BY salary); + +-- w7 +-- mixture of agg/wfunc in the same window +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); +SELECT sum(salary) OVER w, rank() OVER w FROM empsalary WINDOW w AS (PARTITION BY depname ORDER BY salary DESC); + +-- w8 +-- strict aggs +set enable_parallel = off; +EXPLAIN(COSTS OFF) +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; +SELECT empno, depname, salary, bonus, depadj, MIN(bonus) OVER (ORDER BY empno), MAX(depadj) OVER () FROM( + SELECT *, + CASE WHEN enroll_date < '2008-01-01' THEN 2008 - extract(YEAR FROM enroll_date) END * 500 AS bonus, + CASE WHEN + AVG(salary) OVER (PARTITION BY depname) < salary + THEN 200 END AS depadj FROM empsalary +)s; + +-- w9 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + +-- w10 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date desc range between '1 year'::interval preceding and '1 year'::interval following), + salary, enroll_date from empsalary; + +-- w11 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date desc range between '1 year'::interval following and '1 year'::interval following), + salary, enroll_date from empsalary; + +-- w12 +set enable_parallel = off; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; +set enable_parallel = on; +EXPLAIN(COSTS OFF) +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; +select sum(salary) over (order by enroll_date range between '1 year'::interval preceding and '1 year'::interval following + exclude current row), salary, enroll_date from empsalary; + +-- +-- End of test of Parallel process of Window Functions. +-- +-- start_ignore +drop schema window_parallel cascade; +-- end_ignore +reset min_parallel_table_scan_size; +reset enable_parallel; +reset gp_appendonly_insert_files; +reset force_parallel_mode; +reset optimizer; \ No newline at end of file