Skip to content

Commit d376a32

Browse files
goldmedalCopilot
andauthored
Add sql_parser.default_null_ordering config option to customize the default null ordering (#16963)
* Allow to set up the default null ordering (#3) * add default_null_ordering config * add test for different config * Update datafusion/sql/src/planner.rs Co-authored-by: Copilot <[email protected]> * update doc * fix sqllogictest --------- Co-authored-by: Copilot <[email protected]> * rename config and fix test * update doc * fix default * fix doc * fix sqllogictests * address comments --------- Co-authored-by: Copilot <[email protected]>
1 parent e6f4c7f commit d376a32

File tree

9 files changed

+186
-10
lines changed

9 files changed

+186
-10
lines changed

datafusion/common/src/config.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,16 @@ config_namespace! {
277277

278278
/// Specifies the recursion depth limit when parsing complex SQL Queries
279279
pub recursion_limit: usize, default = 50
280+
281+
/// Specifies the default null ordering for query results. There are 4 options:
282+
/// - `nulls_max`: Nulls appear last in ascending order.
283+
/// - `nulls_min`: Nulls appear first in ascending order.
284+
/// - `nulls_first`: Nulls always be first in any order.
285+
/// - `nulls_last`: Nulls always be last in any order.
286+
///
287+
/// By default, `nulls_max` is used to follow Postgres's behavior.
288+
/// postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
289+
pub default_null_ordering: String, default = "nulls_max".to_string()
280290
}
281291
}
282292

datafusion/core/src/execution/session_state.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,10 @@ impl SessionState {
496496
support_varchar_with_length: sql_parser_options.support_varchar_with_length,
497497
map_string_types_to_utf8view: sql_parser_options.map_string_types_to_utf8view,
498498
collect_spans: sql_parser_options.collect_spans,
499+
default_null_ordering: sql_parser_options
500+
.default_null_ordering
501+
.as_str()
502+
.into(),
499503
}
500504
}
501505

datafusion/sql/src/expr/order_by.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,14 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
6363

6464
let mut sort_expr_vec = Vec::with_capacity(order_by_exprs.len());
6565

66-
let make_sort_expr =
67-
|expr: Expr, asc: Option<bool>, nulls_first: Option<bool>| {
68-
let asc = asc.unwrap_or(true);
69-
// When asc is true, by default nulls last to be consistent with postgres
70-
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
71-
let nulls_first = nulls_first.unwrap_or(!asc);
72-
Sort::new(expr, asc, nulls_first)
73-
};
66+
let make_sort_expr = |expr: Expr,
67+
asc: Option<bool>,
68+
nulls_first: Option<bool>| {
69+
let asc = asc.unwrap_or(true);
70+
let nulls_first = nulls_first
71+
.unwrap_or_else(|| self.options.default_null_ordering.nulls_first(asc));
72+
Sort::new(expr, asc, nulls_first)
73+
};
7474

7575
for order_by_expr in order_by_exprs {
7676
let OrderByExpr {

datafusion/sql/src/planner.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
//! [`SqlToRel`]: SQL Query Planner (produces [`LogicalPlan`] from SQL AST)
1919
use std::collections::HashMap;
20+
use std::str::FromStr;
2021
use std::sync::Arc;
2122
use std::vec;
2223

@@ -54,6 +55,8 @@ pub struct ParserOptions {
5455
pub collect_spans: bool,
5556
/// Whether string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning.
5657
pub map_string_types_to_utf8view: bool,
58+
/// Default null ordering for sorting expressions.
59+
pub default_null_ordering: NullOrdering,
5760
}
5861

5962
impl ParserOptions {
@@ -75,6 +78,9 @@ impl ParserOptions {
7578
map_string_types_to_utf8view: true,
7679
enable_options_value_normalization: false,
7780
collect_spans: false,
81+
// By default, `nulls_max` is used to follow Postgres's behavior.
82+
// postgres rule: https://www.postgresql.org/docs/current/queries-order.html
83+
default_null_ordering: NullOrdering::NullsMax,
7884
}
7985
}
8086

@@ -129,6 +135,12 @@ impl ParserOptions {
129135
self.collect_spans = value;
130136
self
131137
}
138+
139+
/// Sets the `default_null_ordering` option.
140+
pub fn with_default_null_ordering(mut self, value: NullOrdering) -> Self {
141+
self.default_null_ordering = value;
142+
self
143+
}
132144
}
133145

134146
impl Default for ParserOptions {
@@ -147,10 +159,60 @@ impl From<&SqlParserOptions> for ParserOptions {
147159
enable_options_value_normalization: options
148160
.enable_options_value_normalization,
149161
collect_spans: options.collect_spans,
162+
default_null_ordering: options.default_null_ordering.as_str().into(),
150163
}
151164
}
152165
}
153166

167+
/// Represents the null ordering for sorting expressions.
168+
#[derive(Debug, Clone, Copy)]
169+
pub enum NullOrdering {
170+
/// Nulls appear last in ascending order.
171+
NullsMax,
172+
/// Nulls appear first in descending order.
173+
NullsMin,
174+
/// Nulls appear first.
175+
NullsFirst,
176+
/// Nulls appear last.
177+
NullsLast,
178+
}
179+
180+
impl NullOrdering {
181+
/// Evaluates the null ordering based on the given ascending flag.
182+
///
183+
/// # Returns
184+
/// * `true` if nulls should appear first.
185+
/// * `false` if nulls should appear last.
186+
pub fn nulls_first(&self, asc: bool) -> bool {
187+
match self {
188+
Self::NullsMax => !asc,
189+
Self::NullsMin => asc,
190+
Self::NullsFirst => true,
191+
Self::NullsLast => false,
192+
}
193+
}
194+
}
195+
196+
impl FromStr for NullOrdering {
197+
type Err = DataFusionError;
198+
199+
fn from_str(s: &str) -> Result<Self> {
200+
match s {
201+
"nulls_max" => Ok(Self::NullsMax),
202+
"nulls_min" => Ok(Self::NullsMin),
203+
"nulls_first" => Ok(Self::NullsFirst),
204+
"nulls_last" => Ok(Self::NullsLast),
205+
_ => plan_err!("Unknown null ordering: Expected one of 'nulls_first', 'nulls_last', 'nulls_min' or 'nulls_max'. Got {s}"),
206+
}
207+
}
208+
}
209+
210+
impl From<&str> for NullOrdering {
211+
fn from(s: &str) -> Self {
212+
Self::from_str(s).unwrap_or(Self::NullsMax)
213+
}
214+
}
215+
154216
/// Ident Normalizer
155217
#[derive(Debug)]
156218
pub struct IdentNormalizer {

datafusion/sql/src/statement.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1421,7 +1421,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
14211421
.unwrap();
14221422
let asc = order_by_expr.options.asc.unwrap_or(true);
14231423
let nulls_first =
1424-
order_by_expr.options.nulls_first.unwrap_or(!asc);
1424+
order_by_expr.options.nulls_first.unwrap_or_else(|| {
1425+
self.options.default_null_ordering.nulls_first(asc)
1426+
});
14251427

14261428
SortExpr::new(ordered_expr, asc, nulls_first)
14271429
})

datafusion/sql/tests/sql_integration.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use datafusion_expr::{
3232
use datafusion_functions::{string, unicode};
3333
use datafusion_sql::{
3434
parser::DFParser,
35-
planner::{ParserOptions, SqlToRel},
35+
planner::{NullOrdering, ParserOptions, SqlToRel},
3636
};
3737

3838
use crate::common::{CustomExprPlanner, CustomTypePlanner, MockSessionState};
@@ -3361,6 +3361,7 @@ fn parse_decimals_parser_options() -> ParserOptions {
33613361
map_string_types_to_utf8view: true,
33623362
enable_options_value_normalization: false,
33633363
collect_spans: false,
3364+
default_null_ordering: NullOrdering::NullsMax,
33643365
}
33653366
}
33663367

@@ -3372,6 +3373,7 @@ fn ident_normalization_parser_options_no_ident_normalization() -> ParserOptions
33723373
map_string_types_to_utf8view: true,
33733374
enable_options_value_normalization: false,
33743375
collect_spans: false,
3376+
default_null_ordering: NullOrdering::NullsMax,
33753377
}
33763378
}
33773379

@@ -3383,6 +3385,7 @@ fn ident_normalization_parser_options_ident_normalization() -> ParserOptions {
33833385
map_string_types_to_utf8view: true,
33843386
enable_options_value_normalization: false,
33853387
collect_spans: false,
3388+
default_null_ordering: NullOrdering::NullsMax,
33863389
}
33873390
}
33883391

datafusion/sqllogictest/test_files/information_schema.slt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ datafusion.optimizer.repartition_windows true
307307
datafusion.optimizer.skip_failed_rules false
308308
datafusion.optimizer.top_down_join_key_reordering true
309309
datafusion.sql_parser.collect_spans false
310+
datafusion.sql_parser.default_null_ordering nulls_max
310311
datafusion.sql_parser.dialect generic
311312
datafusion.sql_parser.enable_ident_normalization true
312313
datafusion.sql_parser.enable_options_value_normalization false
@@ -420,6 +421,7 @@ datafusion.optimizer.repartition_windows true Should DataFusion repartition data
420421
datafusion.optimizer.skip_failed_rules false When set to true, the logical plan optimizer will produce warning messages if any optimization rules produce errors and then proceed to the next rule. When set to false, any rules that produce errors will cause the query to fail
421422
datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys
422423
datafusion.sql_parser.collect_spans false When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes.
424+
datafusion.sql_parser.default_null_ordering nulls_max Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html>
423425
datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
424426
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted)
425427
datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase). Note that this option is ignored and will be removed in the future. All case-insensitive values are normalized automatically.

datafusion/sqllogictest/test_files/order.slt

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,98 @@ NULL three
9494
1 one
9595
2 two
9696

97+
statement ok
98+
set datafusion.sql_parser.default_null_ordering = 'nulls_min';
99+
100+
# test asc with `nulls_min` null ordering
101+
102+
query IT
103+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
104+
----
105+
NULL three
106+
1 one
107+
2 two
108+
109+
# test desc with `nulls_min` null ordering
110+
111+
query IT
112+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
113+
----
114+
2 two
115+
1 one
116+
NULL three
117+
118+
statement ok
119+
set datafusion.sql_parser.default_null_ordering = 'nulls_first';
120+
121+
# test asc with `nulls_first` null ordering
122+
123+
query IT
124+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
125+
----
126+
NULL three
127+
1 one
128+
2 two
129+
130+
# test desc with `nulls_first` null ordering
131+
132+
query IT
133+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
134+
----
135+
NULL three
136+
2 two
137+
1 one
138+
139+
140+
statement ok
141+
set datafusion.sql_parser.default_null_ordering = 'nulls_last';
142+
143+
# test asc with `nulls_last` null ordering
144+
145+
query IT
146+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
147+
----
148+
1 one
149+
2 two
150+
NULL three
151+
152+
# test desc with `nulls_last` null ordering
153+
154+
query IT
155+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
156+
----
157+
2 two
158+
1 one
159+
NULL three
160+
161+
statement ok
162+
set datafusion.sql_parser.default_null_ordering = '';
163+
164+
# test asc with an empty `default_null_ordering`. Expected to use the default null ordering which is `nulls_max`
165+
166+
query IT
167+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num
168+
----
169+
1 one
170+
2 two
171+
NULL three
172+
173+
# test desc with an empty `default_null_ordering`. Expected to use the default null ordering which is `nulls_max`
174+
175+
query IT
176+
SELECT * FROM (VALUES (1, 'one'), (2, 'two'), (null, 'three')) AS t (num,letter) ORDER BY num DESC
177+
----
178+
NULL three
179+
2 two
180+
1 one
181+
182+
statement error DataFusion error: Error during planning: Unsupported Value NULL
183+
set datafusion.sql_parser.default_null_ordering = null;
184+
185+
# reset to default null ordering
186+
statement ok
187+
set datafusion.sql_parser.default_null_ordering = 'nulls_max';
188+
97189
# sort
98190

99191
statement ok

docs/source/user-guide/configs.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus
136136
| datafusion.sql_parser.map_string_types_to_utf8view | true | If true, string types (VARCHAR, CHAR, Text, and String) are mapped to `Utf8View` during SQL planning. If false, they are mapped to `Utf8`. Default is true. |
137137
| datafusion.sql_parser.collect_spans | false | When set to true, the source locations relative to the original SQL query (i.e. [`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html)) will be collected and recorded in the logical plan nodes. |
138138
| datafusion.sql_parser.recursion_limit | 50 | Specifies the recursion depth limit when parsing complex SQL Queries |
139+
| datafusion.sql_parser.default_null_ordering | nulls_max | Specifies the default null ordering for query results. There are 4 options: - `nulls_max`: Nulls appear last in ascending order. - `nulls_min`: Nulls appear first in ascending order. - `nulls_first`: Nulls always be first in any order. - `nulls_last`: Nulls always be last in any order. By default, `nulls_max` is used to follow Postgres's behavior. postgres rule: <https://www.postgresql.org/docs/current/queries-order.html> |
139140
| datafusion.format.safe | true | If set to `true` any formatting errors will be written to the output instead of being converted into a [`std::fmt::Error`] |
140141
| datafusion.format.null | | Format string for nulls |
141142
| datafusion.format.date_format | %Y-%m-%d | Date format for date arrays |

0 commit comments

Comments
 (0)