Skip to content

Commit 70e7eb3

Browse files
authored
Add benchmark for ByteViewGroupValueBuilder (#16826)
* add bench mark for ByteViewGroupValueBuilder * Add more hot functions
1 parent 350c61b commit 70e7eb3

File tree

5 files changed

+204
-2
lines changed

5 files changed

+204
-2
lines changed

datafusion/physical-plan/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,7 @@ name = "spill_io"
9292
[[bench]]
9393
harness = false
9494
name = "sort_preserving_merge"
95+
96+
[[bench]]
97+
harness = false
98+
name = "aggregate_vectorized"
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::ArrayRef;
19+
use arrow::datatypes::StringViewType;
20+
use arrow::util::bench_util::{
21+
create_string_view_array_with_len, create_string_view_array_with_max_len,
22+
};
23+
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
24+
use datafusion_physical_plan::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder;
25+
use datafusion_physical_plan::aggregates::group_values::multi_group_by::GroupColumn;
26+
use std::sync::Arc;
27+
28+
const SIZES: [usize; 3] = [1_000, 10_000, 100_000];
29+
const NULL_DENSITIES: [f32; 3] = [0.0, 0.1, 0.5];
30+
31+
fn bench_vectorized_append(c: &mut Criterion) {
32+
let mut group = c.benchmark_group("ByteViewGroupValueBuilder_vectorized_append");
33+
34+
for &size in &SIZES {
35+
let rows: Vec<usize> = (0..size).collect();
36+
37+
for &null_density in &NULL_DENSITIES {
38+
let input = create_string_view_array_with_len(size, null_density, 8, false);
39+
let input: ArrayRef = Arc::new(input);
40+
41+
// vectorized_append
42+
let id = BenchmarkId::new(
43+
format!("inlined_null_{null_density:.1}_size_{size}"),
44+
"vectorized_append",
45+
);
46+
group.bench_function(id, |b| {
47+
b.iter(|| {
48+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
49+
builder.vectorized_append(&input, &rows).unwrap();
50+
});
51+
});
52+
53+
// append_val
54+
let id = BenchmarkId::new(
55+
format!("inlined_null_{null_density:.1}_size_{size}"),
56+
"append_val",
57+
);
58+
group.bench_function(id, |b| {
59+
b.iter(|| {
60+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
61+
for &i in &rows {
62+
builder.append_val(&input, i).unwrap();
63+
}
64+
});
65+
});
66+
67+
// vectorized_equal_to
68+
let id = BenchmarkId::new(
69+
format!("inlined_null_{null_density:.1}_size_{size}"),
70+
"vectorized_equal_to",
71+
);
72+
group.bench_function(id, |b| {
73+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
74+
builder.vectorized_append(&input, &rows).unwrap();
75+
let mut results = vec![true; size];
76+
b.iter(|| {
77+
builder.vectorized_equal_to(&rows, &input, &rows, &mut results);
78+
});
79+
});
80+
}
81+
}
82+
83+
for &size in &SIZES {
84+
let rows: Vec<usize> = (0..size).collect();
85+
86+
for &null_density in &NULL_DENSITIES {
87+
let scenario = "mixed";
88+
let input = create_string_view_array_with_len(size, null_density, 64, true);
89+
let input: ArrayRef = Arc::new(input);
90+
91+
// vectorized_append
92+
let id = BenchmarkId::new(
93+
format!("{scenario}_null_{null_density:.1}_size_{size}"),
94+
"vectorized_append",
95+
);
96+
group.bench_function(id, |b| {
97+
b.iter(|| {
98+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
99+
builder.vectorized_append(&input, &rows).unwrap();
100+
});
101+
});
102+
103+
// append_val
104+
let id = BenchmarkId::new(
105+
format!("{scenario}_null_{null_density:.1}_size_{size}"),
106+
"append_val",
107+
);
108+
group.bench_function(id, |b| {
109+
b.iter(|| {
110+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
111+
for &i in &rows {
112+
builder.append_val(&input, i).unwrap();
113+
}
114+
});
115+
});
116+
117+
// vectorized_equal_to
118+
let id = BenchmarkId::new(
119+
format!("{scenario}_null_{null_density:.1}_size_{size}"),
120+
"vectorized_equal_to",
121+
);
122+
group.bench_function(id, |b| {
123+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
124+
builder.vectorized_append(&input, &rows).unwrap();
125+
let mut results = vec![true; size];
126+
b.iter(|| {
127+
builder.vectorized_equal_to(&rows, &input, &rows, &mut results);
128+
});
129+
});
130+
}
131+
}
132+
133+
for &size in &SIZES {
134+
let rows: Vec<usize> = (0..size).collect();
135+
136+
for &null_density in &NULL_DENSITIES {
137+
let scenario = "random";
138+
let input = create_string_view_array_with_max_len(size, null_density, 400);
139+
let input: ArrayRef = Arc::new(input);
140+
141+
// vectorized_append
142+
let id = BenchmarkId::new(
143+
format!("{scenario}_null_{null_density:.1}_size_{size}"),
144+
"vectorized_append",
145+
);
146+
group.bench_function(id, |b| {
147+
b.iter(|| {
148+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
149+
builder.vectorized_append(&input, &rows).unwrap();
150+
});
151+
});
152+
153+
// append_val
154+
let id = BenchmarkId::new(
155+
format!("{scenario}_null_{null_density:.1}_size_{size}"),
156+
"append_val",
157+
);
158+
group.bench_function(id, |b| {
159+
b.iter(|| {
160+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
161+
for &i in &rows {
162+
builder.append_val(&input, i).unwrap();
163+
}
164+
});
165+
});
166+
167+
// vectorized_equal_to
168+
let id = BenchmarkId::new(
169+
format!("{scenario}_null_{null_density:.1}_size_{size}"),
170+
"vectorized_equal_to",
171+
);
172+
group.bench_function(id, |b| {
173+
let mut builder = ByteViewGroupValueBuilder::<StringViewType>::new();
174+
builder.vectorized_append(&input, &rows).unwrap();
175+
let mut results = vec![true; size];
176+
b.iter(|| {
177+
builder.vectorized_equal_to(&rows, &input, &rows, &mut results);
178+
});
179+
});
180+
}
181+
}
182+
183+
group.finish();
184+
}
185+
186+
criterion_group!(benches, bench_vectorized_append);
187+
criterion_main!(benches);

datafusion/physical-plan/src/aggregates/group_values/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ use datafusion_common::Result;
2828

2929
use datafusion_expr::EmitTo;
3030

31-
pub(crate) mod multi_group_by;
31+
pub mod multi_group_by;
3232

3333
mod row;
3434
mod single_group_by;

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,12 @@ pub struct ByteViewGroupValueBuilder<B: ByteViewType> {
7171
_phantom: PhantomData<B>,
7272
}
7373

74+
impl<B: ByteViewType> Default for ByteViewGroupValueBuilder<B> {
75+
fn default() -> Self {
76+
Self::new()
77+
}
78+
}
79+
7480
impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
7581
pub fn new() -> Self {
7682
Self {

datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
//! `GroupValues` implementations for multi group by cases
1919
2020
mod bytes;
21-
mod bytes_view;
21+
pub mod bytes_view;
2222
mod primitive;
2323

2424
use std::mem::{self, size_of};
@@ -91,6 +91,11 @@ pub trait GroupColumn: Send + Sync {
9191
/// Returns the number of rows stored in this builder
9292
fn len(&self) -> usize;
9393

94+
/// true if len == 0
95+
fn is_empty(&self) -> bool {
96+
self.len() == 0
97+
}
98+
9499
/// Returns the number of bytes used by this [`GroupColumn`]
95100
fn size(&self) -> usize;
96101

0 commit comments

Comments
 (0)