Skip to content

Commit f3273b7

Browse files
committed
minimize diff
1 parent 459571a commit f3273b7

File tree

1 file changed

+7
-16
lines changed
  • datafusion/core/src/datasource/listing

1 file changed

+7
-16
lines changed

datafusion/core/src/datasource/listing/table.rs

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1194,26 +1194,17 @@ impl TableProvider for ListingTable {
11941194
.iter()
11951195
.map(|field| field.name().as_str())
11961196
.collect::<Vec<_>>();
1197+
11971198
// If the filters can be resolved using only partition cols, there is no need to
1198-
// collect statistics for files
1199-
let (partition_filters, other_filters): (Vec<_>, Vec<_>) =
1200-
filters.into_iter().partition(|f| {
1201-
can_be_evaluted_for_partition_pruning(&table_partition_col_names, f)
1199+
// pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
1200+
let (partition_filters, filters): (Vec<_>, Vec<_>) =
1201+
filters.iter().cloned().partition(|filter| {
1202+
can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter)
12021203
});
12031204

1204-
// Apply partition pruning. Although we only use the partition filtering, we
1205-
// need to include both the partition column filters and their unfiltered
1206-
// counterparts since we want the statistics to be accurate. If a file is not
1207-
// included because it doesn't match the partition filters, we should not include
1208-
// the statistics for that file. But if a file is included, the statistics for
1209-
// that file should not have the partition filter applied to it. For example
1210-
// if we have the query `SELECT sum(col1) FROM tbl WHERE date='2023-01-01'
1211-
// and the table is partitioned by `date`, we want to only consider files that
1212-
// match the predicate `date='2023-01-01'` but we don't want to apply the
1213-
// predicate to the statistics for that file as the whole file matches that
1214-
// predicate and the statistics for `col1` are from the entire file.
1205+
// We should not limit the number of partitioned files to scan if there are filters and limit
12151206
// at the same time. This is because the limit should be applied after the filters are applied.
1216-
let statistic_file_limit = if other_filters.is_empty() { limit } else { None };
1207+
let statistic_file_limit = if filters.is_empty() { limit } else { None };
12171208

12181209
let (mut partitioned_file_lists, statistics) = self
12191210
.list_files_for_scan(state, &partition_filters, statistic_file_limit)

0 commit comments

Comments
 (0)