Skip to content

Add sql_parser.default_null_ordering config option to customize the default null ordering #16963

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,16 @@ config_namespace! {

/// Specifies the recursion depth limit when parsing complex SQL Queries
pub recursion_limit: usize, default = 50

/// Specifies the default null ordering for query results. There are 4 options:
/// - `nulls_max`: Nulls appear last in ascending order.
/// - `nulls_min`: Nulls appear first in ascending order.
/// - `nulls_first`: Nulls always be first in any order.
/// - `nulls_last`: Nulls always be last in any order.
///
/// By default, `nulls_max` is used to follow Postgres's behavior.
/// postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
pub default_null_ordering: String, default = "nulls_max".to_string()
}
}

Expand Down
4 changes: 4 additions & 0 deletions datafusion/core/src/execution/session_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,10 @@ impl SessionState {
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
map_string_types_to_utf8view: sql_parser_options.map_string_types_to_utf8view,
collect_spans: sql_parser_options.collect_spans,
default_null_ordering: sql_parser_options
.default_null_ordering
.as_str()
.into(),
}
}

Expand Down
16 changes: 8 additions & 8 deletions datafusion/sql/src/expr/order_by.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {

let mut sort_expr_vec = Vec::with_capacity(order_by_exprs.len());

let make_sort_expr =
|expr: Expr, asc: Option<bool>, nulls_first: Option<bool>| {
let asc = asc.unwrap_or(true);
// When asc is true, by default nulls last to be consistent with postgres
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
let nulls_first = nulls_first.unwrap_or(!asc);
Sort::new(expr, asc, nulls_first)
};
let make_sort_expr = |expr: Expr,
asc: Option<bool>,
nulls_first: Option<bool>| {
let asc = asc.unwrap_or(true);
let nulls_first = nulls_first
.unwrap_or_else(|| self.options.default_null_ordering.nulls_first(asc));
Sort::new(expr, asc, nulls_first)
};

for order_by_expr in order_by_exprs {
let OrderByExpr {
Expand Down
62 changes: 62 additions & 0 deletions datafusion/sql/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
use std::collections::HashMap;
use std::str::FromStr;
use std::sync::Arc;
use std::vec;

Expand Down Expand Up @@ -54,6 +55,8 @@ pub struct ParserOptions {
pub collect_spans: bool,
/// Whether string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning.
pub map_string_types_to_utf8view: bool,
/// Default null ordering for sorting expressions.
pub default_null_ordering: NullOrdering,
}

impl ParserOptions {
Expand All @@ -75,6 +78,9 @@ impl ParserOptions {
map_string_types_to_utf8view: true,
enable_options_value_normalization: false,
collect_spans: false,
// By default, `nulls_max` is used to follow Postgres's behavior.
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
default_null_ordering: NullOrdering::NullsMax,
}
}

Expand Down Expand Up @@ -129,6 +135,12 @@ impl ParserOptions {
self.collect_spans = value;
self
}

/// Sets the `default_null_ordering` option.
pub fn with_default_null_ordering(mut self, value: NullOrdering) -> Self {
self.default_null_ordering = value;
self
}
}

impl Default for ParserOptions {
Expand All @@ -147,10 +159,60 @@ impl From<&SqlParserOptions> for ParserOptions {
enable_options_value_normalization: options
.enable_options_value_normalization,
collect_spans: options.collect_spans,
default_null_ordering: options.default_null_ordering.as_str().into(),
}
}
}

/// Represents the null ordering for sorting expressions.
#[derive(Debug, Clone, Copy)]
pub enum NullOrdering {
/// Nulls appear last in ascending order.
NullsMax,
/// Nulls appear first in descending order.
NullsMin,
/// Nulls appear first.
NullsFirst,
/// Nulls appear last.
NullsLast,
}

impl NullOrdering {
/// Evaluates the null ordering based on the given ascending flag.
///
/// # Returns
/// * `true` if nulls should appear first.
/// * `false` if nulls should appear last.
pub fn nulls_first(&self, asc: bool) -> bool {
match self {
Self::NullsMax => !asc,
Self::NullsMin => asc,
Self::NullsFirst => true,
Self::NullsLast => false,
}
}
}

impl FromStr for NullOrdering {
type Err = DataFusionError;

fn from_str(s: &str) -> Result<Self> {
match s {
"nulls_max" => Ok(Self::NullsMax),
"nulls_min" => Ok(Self::NullsMin),
"nulls_first" => Ok(Self::NullsFirst),
"nulls_last" => Ok(Self::NullsLast),
_ => plan_err!("Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}"),
}
}
}

impl From<&str> for NullOrdering {
fn from(s: &str) -> Self {
Self::from_str(s).unwrap_or(Self::NullsMax)
}
}

/// Ident Normalizer
#[derive(Debug)]
pub struct IdentNormalizer {
Expand Down
4 changes: 3 additions & 1 deletion datafusion/sql/src/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
.unwrap();
let asc = order_by_expr.options.asc.unwrap_or(true);
let nulls_first =
order_by_expr.options.nulls_first.unwrap_or(!asc);
order_by_expr.options.nulls_first.unwrap_or_else(|| {
self.options.default_null_ordering.nulls_first(asc)
});

SortExpr::new(ordered_expr, asc, nulls_first)
})
Expand Down
5 changes: 4 additions & 1 deletion datafusion/sql/tests/sql_integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use datafusion_expr::{
use datafusion_functions::{string, unicode};
use datafusion_sql::{
parser::DFParser,
planner::{ParserOptions, SqlToRel},
planner::{NullOrdering, ParserOptions, SqlToRel},
};

use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState};
Expand Down Expand Up @@ -3361,6 +3361,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
map_string_types_to_utf8view: true,
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::NullsMax,
}
}

Expand All @@ -3372,6 +3373,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
map_string_types_to_utf8view: true,
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::NullsMax,
}
}

Expand All @@ -3383,6 +3385,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
map_string_types_to_utf8view: true,
enable_options_value_normalization: false,
collect_spans: false,
default_null_ordering: NullOrdering::NullsMax,
}
}

Expand Down
2 changes: 2 additions & 0 deletions datafusion/sqllogictest/test_files/information_schema.slt
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ datafusion.optimizer.repartition_windows true
datafusion.optimizer.skip_failed_rules false
datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.collect_spans false
datafusion.sql_parser.default_null_ordering nulls_max
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
Expand Down Expand Up @@ -420,6 +421,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data
datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.
Expand Down
92 changes: 92 additions & 0 deletions datafusion/sqllogictest/test_files/order.slt
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,98 @@ NULL three
1 one
2 two

statement ok
set datafusion.sql_parser.default_null_ordering = 'nulls_min';

# test asc with `nulls_min` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
NULL three
1 one
2 two

# test desc with `nulls_min` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
2 two
1 one
NULL three

statement ok
set datafusion.sql_parser.default_null_ordering = 'nulls_first';

# test asc with `nulls_first` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
NULL three
1 one
2 two

# test desc with `nulls_first` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
NULL three
2 two
1 one


statement ok
set datafusion.sql_parser.default_null_ordering = 'nulls_last';

# test asc with `nulls_last` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
1 one
2 two
NULL three

# test desc with `nulls_last` null ordering

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
2 two
1 one
NULL three

statement ok
set datafusion.sql_parser.default_null_ordering = '';

# test asc with an empty `default_null_ordering`. Expected to use the default null ordering which is `nulls_max`

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
----
1 one
2 two
NULL three

# test desc with an empty `default_null_ordering`. Expected to use the default null ordering which is `nulls_max`

query IT
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
----
NULL three
2 two
1 one

statement error DataFusion error: Error during planning: Unsupported Value NULL
set datafusion.sql_parser.default_null_ordering = null;

# reset to default null ordering
statement ok
set datafusion.sql_parser.default_null_ordering = 'nulls_max';

# sort

statement ok
Expand Down
1 change: 1 addition & 0 deletions docs/source/user-guide/configs.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
| datafusion.sql_parser.map_string_types_to_utf8view | true | If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true. |
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. |
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |
| datafusion.sql_parser.default_null_ordering | nulls_max | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html> |
| datafusion.format.safe | true | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] |
| datafusion.format.null | | Format string for nulls |
| datafusion.format.date_format | %Y-%m-%d | Date format for date arrays |
Expand Down