Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions datafusion/common/src/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,7 @@ impl Statistics {
col_stats.max_value = col_stats.max_value.max(&item_col_stats.max_value);
col_stats.min_value = col_stats.min_value.min(&item_col_stats.min_value);
col_stats.sum_value = col_stats.sum_value.add(&item_col_stats.sum_value);
col_stats.distinct_count = Precision::Absent;
}

Ok(Statistics {
Expand Down Expand Up @@ -1152,4 +1153,50 @@ mod tests {
let e = Statistics::try_merge_iter(&items, &schema).unwrap_err();
assert_contains!(e.to_string(), "Error during planning: Cannot merge statistics with different number of columns: 0 vs 1");
}

#[test]
fn test_try_merge_distinct_count_absent() {
// Create statistics with known distinct counts
let stats1 = Statistics::default()
.with_num_rows(Precision::Exact(10))
.with_total_byte_size(Precision::Exact(100))
.add_column_statistics(
ColumnStatistics::new_unknown()
.with_null_count(Precision::Exact(0))
.with_min_value(Precision::Exact(ScalarValue::Int32(Some(1))))
.with_max_value(Precision::Exact(ScalarValue::Int32(Some(10))))
.with_distinct_count(Precision::Exact(5)),
);

let stats2 = Statistics::default()
.with_num_rows(Precision::Exact(15))
.with_total_byte_size(Precision::Exact(150))
.add_column_statistics(
ColumnStatistics::new_unknown()
.with_null_count(Precision::Exact(0))
.with_min_value(Precision::Exact(ScalarValue::Int32(Some(5))))
.with_max_value(Precision::Exact(ScalarValue::Int32(Some(20))))
.with_distinct_count(Precision::Exact(7)),
);

// Merge statistics
let merged_stats = stats1.try_merge(&stats2).unwrap();

// Verify the results
assert_eq!(merged_stats.num_rows, Precision::Exact(25));
assert_eq!(merged_stats.total_byte_size, Precision::Exact(250));

let col_stats = &merged_stats.column_statistics[0];
assert_eq!(col_stats.null_count, Precision::Exact(0));
assert_eq!(
col_stats.min_value,
Precision::Exact(ScalarValue::Int32(Some(1)))
);
assert_eq!(
col_stats.max_value,
Precision::Exact(ScalarValue::Int32(Some(20)))
);
// Distinct count should be Absent after merge
assert_eq!(col_stats.distinct_count, Precision::Absent);
}
}