From 8eb3a1b0be600c684b6d0d295a9b2ee84634c6cb Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Fri, 18 Jul 2025 15:26:39 +0800 Subject: [PATCH 1/7] Allow to set up the default null ordering (#3) * add default_null_ordering config * add test for different config * Update datafusion/sql/src/planner.rs Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * update doc * fix sqllogictest --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- datafusion/common/src/config.rs | 5 ++ .../core/src/execution/session_state.rs | 4 ++ datafusion/sql/src/expr/order_by.rs | 7 +- datafusion/sql/src/planner.rs | 62 +++++++++++++++++ datafusion/sql/src/statement.rs | 5 +- datafusion/sql/tests/sql_integration.rs | 5 +- .../test_files/information_schema.slt | 3 + datafusion/sqllogictest/test_files/order.slt | 68 +++++++++++++++++++ 8 files changed, 155 insertions(+), 4 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 5796edc283e0..3c4823fe8dcc 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -277,6 +277,11 @@ config_namespace! { /// Specifies the recursion depth limit when parsing complex SQL Queries pub recursion_limit: usize, default = 50 + + /// Specifies the default null ordering for query results + /// By default, `asc_reverse` is used to follow Postgres's behavior. + /// postgres rule: + pub default_null_ordering: String, default = "asc_reverse".to_string() } } diff --git a/datafusion/core/src/execution/session_state.rs b/datafusion/core/src/execution/session_state.rs index 364ad75b0869..a28cc38919dd 100644 --- a/datafusion/core/src/execution/session_state.rs +++ b/datafusion/core/src/execution/session_state.rs @@ -496,6 +496,10 @@ impl SessionState { support_varchar_with_length: sql_parser_options.support_varchar_with_length, map_string_types_to_utf8view: sql_parser_options.map_string_types_to_utf8view, collect_spans: sql_parser_options.collect_spans, + default_null_ordering: sql_parser_options + .default_null_ordering + .as_str() + .into(), } } diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs index d357c3753e13..2235f8620a31 100644 --- a/datafusion/sql/src/expr/order_by.rs +++ b/datafusion/sql/src/expr/order_by.rs @@ -112,7 +112,12 @@ impl SqlToRel<'_, S> { self.sql_expr_to_logical_expr(e, order_by_schema, planner_context)? } }; - sort_expr_vec.push(make_sort_expr(expr, asc, nulls_first)); + let asc = asc.unwrap_or(true); + expr_vec.push(make_sort_expr( + expr, + asc, + nulls_first.unwrap_or(self.options.default_null_ordering.eval(asc)), + )) } Ok(sort_expr_vec) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 26c982690115..c5fa39075cf6 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -17,6 +17,7 @@ //! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST) use std::collections::HashMap; +use std::str::FromStr; use std::sync::Arc; use std::vec; @@ -54,6 +55,8 @@ pub struct ParserOptions { pub collect_spans: bool, /// Whether string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. pub map_string_types_to_utf8view: bool, + /// Default null ordering for sorting expressions. + pub default_null_ordering: NullOrdering, } impl ParserOptions { @@ -75,6 +78,9 @@ impl ParserOptions { map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, + // By default, `asc_reverse` is used to follow Postgres's behavior. + // postgres rule: https://www.postgresql.org/docs/current/queries-order.html + default_null_ordering: NullOrdering::AscReverse, } } @@ -129,6 +135,12 @@ impl ParserOptions { self.collect_spans = value; self } + + /// Sets the `default_null_ordering` option. + pub fn with_default_null_ordering(mut self, value: NullOrdering) -> Self { + self.default_null_ordering = value; + self + } } impl Default for ParserOptions { @@ -147,10 +159,60 @@ impl From<&SqlParserOptions> for ParserOptions { enable_options_value_normalization: options .enable_options_value_normalization, collect_spans: options.collect_spans, + default_null_ordering: options.default_null_ordering.as_str().into(), } } } +/// Represents the null ordering for sorting expressions. +#[derive(Debug, Clone, Copy)] +pub enum NullOrdering { + /// Ascending order with nulls appearing last. + AscReverse, + /// Descending order with nulls appearing last. + DescReverse, + /// Nulls appear first. + NullsFirst, + /// Nulls appear last. + NullsLast, +} + +impl NullOrdering { + /// Evaluates the null ordering based on the given ascending flag. + /// + /// # Returns + /// * `true` if nulls should appear first. + /// * `false` if nulls should appear last. + pub fn eval(&self, asc: bool) -> bool { + match self { + Self::AscReverse => !asc, + Self::DescReverse => asc, + Self::NullsFirst => true, + Self::NullsLast => false, + } + } +} + +impl FromStr for NullOrdering { + type Err = DataFusionError; + + fn from_str(s: &str) -> Result { + match s { + "asc_reverse" => Ok(Self::AscReverse), + "desc_reverse" => Ok(Self::DescReverse), + "nulls_first" => Ok(Self::NullsFirst), + "nulls_last" => Ok(Self::NullsLast), + _ => plan_err!("Unknown null ordering: {s}"), + } + } +} + +impl From<&str> for NullOrdering { + fn from(s: &str) -> Self { + Self::from_str(s).unwrap_or(Self::AscReverse) + } +} + /// Ident Normalizer #[derive(Debug)] pub struct IdentNormalizer { diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index b2bea86f5524..6866a462ccdd 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1420,8 +1420,9 @@ impl SqlToRel<'_, S> { ) .unwrap(); let asc = order_by_expr.options.asc.unwrap_or(true); - let nulls_first = - order_by_expr.options.nulls_first.unwrap_or(!asc); + let nulls_first = order_by_expr.options + .nulls_first + .unwrap_or(self.options.default_null_ordering.eval(asc)); SortExpr::new(ordered_expr, asc, nulls_first) }) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index dd5ec4a20118..fc6c1b6e2994 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -32,7 +32,7 @@ use datafusion_expr::{ use datafusion_functions::{string, unicode}; use datafusion_sql::{ parser::DFParser, - planner::{ParserOptions, SqlToRel}, + planner::{NullOrdering, ParserOptions, SqlToRel}, }; use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState}; @@ -3361,6 +3361,7 @@ fn parse_decimals_parser_options() -> ParserOptions { map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, + default_null_ordering: NullOrdering::AscReverse, } } @@ -3372,6 +3373,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, + default_null_ordering: NullOrdering::AscReverse, } } @@ -3383,6 +3385,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions { map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, + default_null_ordering: NullOrdering::AscReverse, } } diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 86dfbd7c8496..8a98ad7da38d 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -307,6 +307,7 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.collect_spans false +datafusion.sql_parser.default_null_ordering asc_reverse datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false @@ -420,6 +421,8 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. +datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. +datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index e3bcfcdbda1d..aa25ee00df50 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -94,6 +94,74 @@ NULL three 1 one 2 two +statement ok +set datafusion.sql_parser.default_null_ordering = 'desc_reverse'; + +# test asc with `desc_reverse` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +NULL three +1 one +2 two + +# test desc with `desc_reverse` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +2 two +1 one +NULL three + +statement ok +set datafusion.sql_parser.default_null_ordering = 'nulls_first'; + +# test asc with `nulls_first` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +NULL three +1 one +2 two + +# test desc with `nulls_first` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +NULL three +2 two +1 one + + +statement ok +set datafusion.sql_parser.default_null_ordering = 'nulls_last'; + +# test asc with `nulls_last` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +1 one +2 two +NULL three + +# test desc with `nulls_last` null ordering + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +2 two +1 one +NULL three + +# reset to default null ordering +statement ok +set datafusion.sql_parser.default_null_ordering = 'asc_reverse'; + # sort statement ok From ad84b511e1704120b2962933f58a712a26b122b6 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 29 Jul 2025 19:28:40 +0800 Subject: [PATCH 2/7] rename config and fix test --- datafusion/common/src/config.rs | 9 ++++++-- datafusion/sql/src/expr/order_by.rs | 12 +++------- datafusion/sql/src/planner.rs | 22 +++++++++---------- datafusion/sql/src/statement.rs | 7 +++--- datafusion/sql/tests/sql_integration.rs | 6 ++--- .../test_files/information_schema.slt | 3 +-- datafusion/sqllogictest/test_files/order.slt | 8 +++---- 7 files changed, 33 insertions(+), 34 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 3c4823fe8dcc..444dbc985314 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -278,8 +278,13 @@ config_namespace! { /// Specifies the recursion depth limit when parsing complex SQL Queries pub recursion_limit: usize, default = 50 - /// Specifies the default null ordering for query results - /// By default, `asc_reverse` is used to follow Postgres's behavior. + /// Specifies the default null ordering for query results. There are 4 options: + /// - `nulls_max`: Nulls appear last in ascending order. + /// - `nulls_min`: Nulls appear first in ascending order. + /// - `nulls_first`: Nulls always be first in any order. + /// - `nulls_last`: Nulls always be last in any order. + /// + /// By default, `null_max` is used to follow Postgres's behavior. /// postgres rule: pub default_null_ordering: String, default = "asc_reverse".to_string() } diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs index 2235f8620a31..ed89e8b7d264 100644 --- a/datafusion/sql/src/expr/order_by.rs +++ b/datafusion/sql/src/expr/order_by.rs @@ -66,9 +66,8 @@ impl SqlToRel<'_, S> { let make_sort_expr = |expr: Expr, asc: Option, nulls_first: Option| { let asc = asc.unwrap_or(true); - // When asc is true, by default nulls last to be consistent with postgres - // postgres rule: https://www.postgresql.org/docs/current/queries-order.html - let nulls_first = nulls_first.unwrap_or(!asc); + let nulls_first = nulls_first + .unwrap_or_else(|| self.options.default_null_ordering.eval(asc)); Sort::new(expr, asc, nulls_first) }; @@ -112,12 +111,7 @@ impl SqlToRel<'_, S> { self.sql_expr_to_logical_expr(e, order_by_schema, planner_context)? } }; - let asc = asc.unwrap_or(true); - expr_vec.push(make_sort_expr( - expr, - asc, - nulls_first.unwrap_or(self.options.default_null_ordering.eval(asc)), - )) + sort_expr_vec.push(make_sort_expr(expr, asc, nulls_first)); } Ok(sort_expr_vec) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index c5fa39075cf6..d86404f73a7a 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -78,9 +78,9 @@ impl ParserOptions { map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, - // By default, `asc_reverse` is used to follow Postgres's behavior. + // By default, `nulls_max` is used to follow Postgres's behavior. // postgres rule: https://www.postgresql.org/docs/current/queries-order.html - default_null_ordering: NullOrdering::AscReverse, + default_null_ordering: NullOrdering::NullsMax, } } @@ -167,10 +167,10 @@ impl From<&SqlParserOptions> for ParserOptions { /// Represents the null ordering for sorting expressions. #[derive(Debug, Clone, Copy)] pub enum NullOrdering { - /// Ascending order with nulls appearing last. - AscReverse, - /// Descending order with nulls appearing last. - DescReverse, + /// Nulls appear last in ascending order. + NullsMax, + /// Nulls appear first in descending order. + NullsMin, /// Nulls appear first. NullsFirst, /// Nulls appear last. @@ -185,8 +185,8 @@ impl NullOrdering { /// * `false` if nulls should appear last. pub fn eval(&self, asc: bool) -> bool { match self { - Self::AscReverse => !asc, - Self::DescReverse => asc, + Self::NullsMax => !asc, + Self::NullsMin => asc, Self::NullsFirst => true, Self::NullsLast => false, } @@ -198,8 +198,8 @@ impl FromStr for NullOrdering { fn from_str(s: &str) -> Result { match s { - "asc_reverse" => Ok(Self::AscReverse), - "desc_reverse" => Ok(Self::DescReverse), + "nulls_max" => Ok(Self::NullsMax), + "nulls_min" => Ok(Self::NullsMin), "nulls_first" => Ok(Self::NullsFirst), "nulls_last" => Ok(Self::NullsLast), _ => plan_err!("Unknown null ordering: {s}"), @@ -209,7 +209,7 @@ impl FromStr for NullOrdering { impl From<&str> for NullOrdering { fn from(s: &str) -> Self { - Self::from_str(s).unwrap_or(Self::AscReverse) + Self::from_str(s).unwrap_or(Self::NullsMax) } } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 6866a462ccdd..0be6ca730440 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1420,9 +1420,10 @@ impl SqlToRel<'_, S> { ) .unwrap(); let asc = order_by_expr.options.asc.unwrap_or(true); - let nulls_first = order_by_expr.options - .nulls_first - .unwrap_or(self.options.default_null_ordering.eval(asc)); + let nulls_first = + order_by_expr.options.nulls_first.unwrap_or_else(|| { + self.options.default_null_ordering.eval(asc) + }); SortExpr::new(ordered_expr, asc, nulls_first) }) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index fc6c1b6e2994..25144042504f 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -3361,7 +3361,7 @@ fn parse_decimals_parser_options() -> ParserOptions { map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, - default_null_ordering: NullOrdering::AscReverse, + default_null_ordering: NullOrdering::NullsMax, } } @@ -3373,7 +3373,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, - default_null_ordering: NullOrdering::AscReverse, + default_null_ordering: NullOrdering::NullsMax, } } @@ -3385,7 +3385,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions { map_string_types_to_utf8view: true, enable_options_value_normalization: false, collect_spans: false, - default_null_ordering: NullOrdering::AscReverse, + default_null_ordering: NullOrdering::NullsMax, } } diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 8a98ad7da38d..43b71c24f2ce 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -421,8 +421,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. -datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](sqlparser::tokenizer::Span)) will be collected and recorded in the logical plan nodes. -datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results By default, `asc_reverse` is used to follow Postgres's behavior. postgres rule: +datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `null_max` is used to follow Postgres's behavior. postgres rule: datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index aa25ee00df50..2fac6029f941 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -95,9 +95,9 @@ NULL three 2 two statement ok -set datafusion.sql_parser.default_null_ordering = 'desc_reverse'; +set datafusion.sql_parser.default_null_ordering = 'nulls_min'; -# test asc with `desc_reverse` null ordering +# test asc with `nulls_min` null ordering query IT SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num @@ -106,7 +106,7 @@ NULL three 1 one 2 two -# test desc with `desc_reverse` null ordering +# test desc with `nulls_min` null ordering query IT SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC @@ -160,7 +160,7 @@ NULL three # reset to default null ordering statement ok -set datafusion.sql_parser.default_null_ordering = 'asc_reverse'; +set datafusion.sql_parser.default_null_ordering = 'nulls_max'; # sort From fb4ecf327aa8f9278f888c779c0e7d671028c83d Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 29 Jul 2025 19:45:32 +0800 Subject: [PATCH 3/7] update doc --- docs/source/user-guide/configs.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 96b7ee672bdb..c104341148bc 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -136,6 +136,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.map_string_types_to_utf8view | true | If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | +| datafusion.sql_parser.default_null_ordering | asc_reverse | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `null_max` is used to follow Postgres's behavior. postgres rule: | | datafusion.format.safe | true | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] | | datafusion.format.null | | Format string for nulls | | datafusion.format.date_format | %Y-%m-%d | Date format for date arrays | From cb56c8fbed6b38fac22a02d7108ab609e0981f1f Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 29 Jul 2025 20:07:33 +0800 Subject: [PATCH 4/7] fix default --- datafusion/common/src/config.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 444dbc985314..b75a5df8bec8 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -284,9 +284,9 @@ config_namespace! { /// - `nulls_first`: Nulls always be first in any order. /// - `nulls_last`: Nulls always be last in any order. /// - /// By default, `null_max` is used to follow Postgres's behavior. + /// By default, `nulls_max` is used to follow Postgres's behavior. /// postgres rule: - pub default_null_ordering: String, default = "asc_reverse".to_string() + pub default_null_ordering: String, default = "nulls_max".to_string() } } From a58ff66e7e142b78445ebbfd4701f7b481826d6a Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 29 Jul 2025 20:17:49 +0800 Subject: [PATCH 5/7] fix doc --- docs/source/user-guide/configs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index c104341148bc..ec51e8fc9f40 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -136,7 +136,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.sql_parser.map_string_types_to_utf8view | true | If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true. | | datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. | | datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries | -| datafusion.sql_parser.default_null_ordering | asc_reverse | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `null_max` is used to follow Postgres's behavior. postgres rule: | +| datafusion.sql_parser.default_null_ordering | nulls_max | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: | | datafusion.format.safe | true | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] | | datafusion.format.null | | Format string for nulls | | datafusion.format.date_format | %Y-%m-%d | Date format for date arrays | From 2de2ac919efd41276b75ec86ed85bb95ebdfb1fb Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Tue, 29 Jul 2025 20:44:32 +0800 Subject: [PATCH 6/7] fix sqllogictests --- datafusion/sqllogictest/test_files/information_schema.slt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 43b71c24f2ce..2d778bc9d654 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -307,7 +307,7 @@ datafusion.optimizer.repartition_windows true datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.collect_spans false -datafusion.sql_parser.default_null_ordering asc_reverse +datafusion.sql_parser.default_null_ordering nulls_max datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true datafusion.sql_parser.enable_options_value_normalization false @@ -421,7 +421,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. -datafusion.sql_parser.default_null_ordering asc_reverse Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `null_max` is used to follow Postgres's behavior. postgres rule: +datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically. From 4ea5901afb1cc1f55d5a7c2edf00494cbeb2a194 Mon Sep 17 00:00:00 2001 From: Jax Liu Date: Wed, 30 Jul 2025 19:22:32 +0800 Subject: [PATCH 7/7] address comments --- datafusion/sql/src/expr/order_by.rs | 15 ++++++------ datafusion/sql/src/planner.rs | 4 ++-- datafusion/sql/src/statement.rs | 2 +- datafusion/sqllogictest/test_files/order.slt | 24 ++++++++++++++++++++ 4 files changed, 35 insertions(+), 10 deletions(-) diff --git a/datafusion/sql/src/expr/order_by.rs b/datafusion/sql/src/expr/order_by.rs index ed89e8b7d264..79ebc5943ffb 100644 --- a/datafusion/sql/src/expr/order_by.rs +++ b/datafusion/sql/src/expr/order_by.rs @@ -63,13 +63,14 @@ impl SqlToRel<'_, S> { let mut sort_expr_vec = Vec::with_capacity(order_by_exprs.len()); - let make_sort_expr = - |expr: Expr, asc: Option, nulls_first: Option| { - let asc = asc.unwrap_or(true); - let nulls_first = nulls_first - .unwrap_or_else(|| self.options.default_null_ordering.eval(asc)); - Sort::new(expr, asc, nulls_first) - }; + let make_sort_expr = |expr: Expr, + asc: Option, + nulls_first: Option| { + let asc = asc.unwrap_or(true); + let nulls_first = nulls_first + .unwrap_or_else(|| self.options.default_null_ordering.nulls_first(asc)); + Sort::new(expr, asc, nulls_first) + }; for order_by_expr in order_by_exprs { let OrderByExpr { diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index d86404f73a7a..2cb1dbdcb4ac 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -183,7 +183,7 @@ impl NullOrdering { /// # Returns /// * `true` if nulls should appear first. /// * `false` if nulls should appear last. - pub fn eval(&self, asc: bool) -> bool { + pub fn nulls_first(&self, asc: bool) -> bool { match self { Self::NullsMax => !asc, Self::NullsMin => asc, @@ -202,7 +202,7 @@ impl FromStr for NullOrdering { "nulls_min" => Ok(Self::NullsMin), "nulls_first" => Ok(Self::NullsFirst), "nulls_last" => Ok(Self::NullsLast), - _ => plan_err!("Unknown null ordering: {s}"), + _ => plan_err!("Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}"), } } } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 0be6ca730440..0fef18ac55f8 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1422,7 +1422,7 @@ impl SqlToRel<'_, S> { let asc = order_by_expr.options.asc.unwrap_or(true); let nulls_first = order_by_expr.options.nulls_first.unwrap_or_else(|| { - self.options.default_null_ordering.eval(asc) + self.options.default_null_ordering.nulls_first(asc) }); SortExpr::new(ordered_expr, asc, nulls_first) diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 2fac6029f941..1ceea1a7ea0e 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -158,6 +158,30 @@ SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) 1 one NULL three +statement ok +set datafusion.sql_parser.default_null_ordering = ''; + +# test asc with an empty `default_null_ordering`. Expected to use the default null ordering which is `nulls_max` + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num +---- +1 one +2 two +NULL three + +# test desc with an empty `default_null_ordering`. Expected to use the default null ordering which is `nulls_max` + +query IT +SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC +---- +NULL three +2 two +1 one + +statement error DataFusion error: Error during planning: Unsupported Value NULL +set datafusion.sql_parser.default_null_ordering = null; + # reset to default null ordering statement ok set datafusion.sql_parser.default_null_ordering = 'nulls_max';