@@ -32,7 +32,9 @@ use async_trait::async_trait;
32
32
use datafusion_catalog:: { ScanArgs , ScanResult , Session , TableProvider } ;
33
33
use datafusion_common:: {
34
34
config_datafusion_err, config_err, internal_err, plan_err, project_schema,
35
- stats:: Precision , Constraints , DataFusionError , Result , SchemaExt ,
35
+ stats:: Precision ,
36
+ tree_node:: { TreeNodeContainer , TreeNodeRecursion } ,
37
+ Constraints , DataFusionError , Result , SchemaExt ,
36
38
} ;
37
39
use datafusion_datasource:: {
38
40
compute_all_files_statistics,
@@ -1221,10 +1223,31 @@ impl TableProvider for ListingTable {
1221
1223
let known_file_ordering = self . try_create_output_ordering ( ) ?;
1222
1224
let desired_file_ordering = match args. preferred_ordering ( ) {
1223
1225
Some ( ordering) if !ordering. is_empty ( ) => {
1224
- // Prefer the ordering requested by the query to any inherint file ordering
1225
- create_ordering ( & self . table_schema , & [ ordering. to_vec ( ) ] ) ?
1226
- . first ( )
1227
- . cloned ( )
1226
+ // Prefer the ordering requested by the query to any natural file ordering.
1227
+ // We'll try to re-order the file reads to match the requested ordering as best we can using statistics.
1228
+ // Whatever the result is, it's likely better than a natural file ordering that doesn't match the query's ordering.
1229
+ // But we can only do this if the query's ordering is a simple ordering of columns (no expressions).
1230
+ let can_use_preferred_ordering = ordering. iter ( ) . all ( |sort_expr| {
1231
+ let mut contains_only_columns = true ;
1232
+ sort_expr
1233
+ . apply_elements ( |e| {
1234
+ if !matches ! ( e, Expr :: Column ( _) ) {
1235
+ contains_only_columns = false ;
1236
+ Ok ( TreeNodeRecursion :: Stop )
1237
+ } else {
1238
+ Ok ( TreeNodeRecursion :: Continue )
1239
+ }
1240
+ } )
1241
+ . expect ( "infallible closure cannot fail" ) ;
1242
+ contains_only_columns
1243
+ } ) ;
1244
+ if can_use_preferred_ordering {
1245
+ create_ordering ( & self . table_schema , & [ ordering. to_vec ( ) ] ) ?
1246
+ . first ( )
1247
+ . cloned ( )
1248
+ } else {
1249
+ known_file_ordering. first ( ) . cloned ( )
1250
+ }
1228
1251
}
1229
1252
Some ( _) | None => {
1230
1253
// If the query did not request a specific ordering, fall back to any inherent file ordering
0 commit comments