Skip to content

Commit f74b2c9

Browse files
committed
Add comment explaining the rationale for using .get_slice_memory_size()
1 parent 83a20c5 commit f74b2c9

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

datafusion/functions-aggregate/src/array_agg.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ use arrow::compute::{filter, SortOptions};
2929
use arrow::datatypes::{DataType, Field, FieldRef, Fields};
3030

3131
use datafusion_common::cast::as_list_array;
32-
use datafusion_common::scalar::copy_array_data;
3332
use datafusion_common::utils::{
3433
compare_rows, get_row_at_idx, take_function_args, SingleRowListArrayBuilder,
3534
};
@@ -393,6 +392,17 @@ impl Accumulator for ArrayAggAccumulator {
393392
+ self
394393
.values
395394
.iter()
395+
// Each ArrayRef might be just a reference to a bigger array, and many
396+
// ArrayRefs here might be referencing exactly the same array, so if we
397+
// were to call `arr.get_array_memory_size()`, we would be double-counting
398+
// the same underlying data many times.
399+
//
400+
// Instead, we do an approximation by estimating how much memory each
401+
// ArrayRef would occupy if its underlying data was fully owned by this
402+
// accumulator.
403+
//
404+
// Note that this is just an estimation, but the reality is that this
405+
// accumulator might not own any data.
396406
.map(|arr| arr.to_data().get_slice_memory_size().unwrap_or_default())
397407
.sum::<usize>()
398408
+ self.datatype.size()

0 commit comments

Comments
 (0)