Skip to content

Commit f0b8c80

Browse files
committed
[Variant] Add variant_kernels benchmark
1 parent 03a837e commit f0b8c80

File tree

3 files changed

+241
-61
lines changed

3 files changed

+241
-61
lines changed

parquet-variant-compute/Cargo.toml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,11 @@ name = "parquet_variant_compute"
4141
bench = false
4242

4343
[dev-dependencies]
44+
rand = "0.9.1"
4445
criterion = { version = "0.6", default-features = false }
45-
rand = { version = "0.9.1" }
46+
4647

4748
[[bench]]
48-
name = "variant_get"
49+
name = "variant_kernels"
4950
harness = false
51+

parquet-variant-compute/benches/variant_get.rs

Lines changed: 0 additions & 59 deletions
This file was deleted.
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::{ArrayRef, StringArray};
19+
use arrow::util::test_util::seedable_rng;
20+
use criterion::{criterion_group, criterion_main, Criterion};
21+
use parquet_variant::{Variant, VariantBuilder};
22+
use parquet_variant_compute::variant_get::{variant_get, GetOptions};
23+
use parquet_variant_compute::{batch_json_string_to_variant, VariantArray, VariantArrayBuilder};
24+
use rand::distr::Alphanumeric;
25+
use rand::rngs::StdRng;
26+
use rand::Rng;
27+
use rand::SeedableRng;
28+
use std::fmt::Write;
29+
use std::sync::Arc;
30+
31+
/// This function generates a vector of JSON strings, each representing a person
32+
/// with random first name, last name, and age.
33+
///
34+
/// Example:
35+
/// ```json
36+
/// {
37+
/// "first" : random_string_of_1_to_20_characters,
38+
/// "last" : random_string_of_1_to_20_characters,
39+
/// "age": random_value_between_20_and_80,
40+
/// }
41+
/// ```
42+
fn small_repeated_json_structure(count: usize) -> impl Iterator<Item = String> {
43+
let mut rng = seedable_rng();
44+
(0..count).map(move |_| {
45+
let first: String = (0..rng.random_range(1..=20))
46+
.map(|_| rng.sample(Alphanumeric) as char)
47+
.collect();
48+
let last: String = (0..rng.random_range(1..=20))
49+
.map(|_| rng.sample(Alphanumeric) as char)
50+
.collect();
51+
let age: u8 = rng.random_range(20..=80);
52+
format!("{{\"first\":\"{first}\",\"last\":\"{last}\",\"age\":{age}}}")
53+
})
54+
}
55+
56+
/// This function generates a vector of JSON strings which have many fields
57+
/// and a random structure (including field names)
58+
fn small_random_json_structure(count: usize) -> impl Iterator<Item = String> {
59+
let mut generator = RandomJsonGenerator::new();
60+
(0..count).map(move |_| generator.next().to_string())
61+
}
62+
63+
fn benchmark_batch_json_string_to_variant(c: &mut Criterion) {
64+
let input_array = StringArray::from_iter_values(small_repeated_json_structure(8000));
65+
let array_ref: ArrayRef = Arc::new(input_array);
66+
c.bench_function(
67+
"batch_json_string_to_variant small_repeated_json 8k string",
68+
|b| {
69+
b.iter(|| {
70+
let _ = batch_json_string_to_variant(&array_ref).unwrap();
71+
});
72+
},
73+
);
74+
75+
let input_array = StringArray::from_iter_values(small_random_json_structure(8000));
76+
let array_ref: ArrayRef = Arc::new(input_array);
77+
c.bench_function(
78+
"batch_json_string_to_variant small_random_json 8k string",
79+
|b| {
80+
b.iter(|| {
81+
let _ = batch_json_string_to_variant(&array_ref).unwrap();
82+
});
83+
},
84+
);
85+
}
86+
87+
fn create_primitive_variant(size: usize) -> VariantArray {
88+
let mut rng = StdRng::seed_from_u64(42);
89+
90+
let mut variant_builder = VariantArrayBuilder::new(1);
91+
92+
for _ in 0..size {
93+
let mut builder = VariantBuilder::new();
94+
builder.append_value(rng.random::<i64>());
95+
let (metadata, value) = builder.finish();
96+
variant_builder.append_variant(Variant::try_new(&metadata, &value).unwrap());
97+
}
98+
99+
variant_builder.build()
100+
}
101+
102+
pub fn variant_get_bench(c: &mut Criterion) {
103+
let variant_array = create_primitive_variant(8192);
104+
let input: ArrayRef = Arc::new(variant_array);
105+
106+
let options = GetOptions {
107+
path: vec![].into(),
108+
as_type: None,
109+
cast_options: Default::default(),
110+
};
111+
112+
c.bench_function("variant_get_primitive", |b| {
113+
b.iter(|| variant_get(&input.clone(), options.clone()))
114+
});
115+
}
116+
117+
criterion_group!(
118+
benches,
119+
variant_get_bench,
120+
benchmark_batch_json_string_to_variant
121+
);
122+
criterion_main!(benches);
123+
124+
/// Creates JSON with random structure and fields.
125+
///
126+
/// Each type is created in a random proportion, controlled by the
127+
/// probabilities. The sum of all probabilities should be 1.0.
128+
struct RandomJsonGenerator {
129+
/// Random number generator
130+
rng: StdRng,
131+
/// the probability of generating a null value
132+
null_probability: f64,
133+
/// the probably of generating a string value
134+
string_probability: f64,
135+
/// the probably of generating a number value
136+
number_probability: f64,
137+
/// the probably of generating a boolean value
138+
boolean_probability: f64,
139+
/// the probably of generating an object value
140+
object_probability: f64,
141+
// probability of generating a JSON array is the remaining probability
142+
/// The maximum depth of the generated JSON structure
143+
max_depth: usize,
144+
/// output buffer
145+
output_buffer: String,
146+
}
147+
148+
impl RandomJsonGenerator {
149+
fn new() -> Self {
150+
let rng = seedable_rng();
151+
Self {
152+
rng,
153+
null_probability: 0.05,
154+
string_probability: 0.25,
155+
number_probability: 0.25,
156+
boolean_probability: 0.10,
157+
object_probability: 0.10,
158+
max_depth: 5,
159+
output_buffer: String::new(),
160+
}
161+
}
162+
163+
fn next(&mut self) -> &str {
164+
self.output_buffer.clear();
165+
self.append_random_json(0);
166+
&self.output_buffer
167+
}
168+
169+
/// Appends a random JSON value to the output buffer.
170+
fn append_random_json(&mut self, current_depth: usize) {
171+
if current_depth >= self.max_depth {
172+
write!(&mut self.output_buffer, "\"max_depth reached\"").unwrap();
173+
return;
174+
}
175+
// Generate a random number to determine the type
176+
let random_value: f64 = self.rng.random();
177+
if random_value < self.null_probability {
178+
write!(&mut self.output_buffer, "null").unwrap();
179+
} else if random_value < self.null_probability + self.string_probability {
180+
// Generate a random string between 1 and 500 characters
181+
let length = self.rng.random_range(1..=500);
182+
let random_string: String = (0..length)
183+
.map(|_| self.rng.sample(Alphanumeric) as char)
184+
.collect();
185+
write!(&mut self.output_buffer, "\"{random_string}\"",).unwrap();
186+
} else if random_value
187+
< self.null_probability + self.string_probability + self.number_probability
188+
{
189+
// Generate a random number
190+
let random_number: f64 = self.rng.random_range(-1000.0..1000.0);
191+
write!(&mut self.output_buffer, "{random_number}",).unwrap();
192+
} else if random_value
193+
< self.null_probability
194+
+ self.string_probability
195+
+ self.number_probability
196+
+ self.boolean_probability
197+
{
198+
// Generate a random boolean
199+
let random_boolean: bool = self.rng.random();
200+
write!(&mut self.output_buffer, "{random_boolean}",).unwrap();
201+
} else if random_value
202+
< self.null_probability
203+
+ self.string_probability
204+
+ self.number_probability
205+
+ self.boolean_probability
206+
+ self.object_probability
207+
{
208+
// Generate a random object
209+
let num_fields = self.rng.random_range(1..=10);
210+
211+
write!(&mut self.output_buffer, "{{").unwrap();
212+
for i in 0..num_fields {
213+
let key_length = self.rng.random_range(1..=20);
214+
let key: String = (0..key_length)
215+
.map(|_| self.rng.sample(Alphanumeric) as char)
216+
.collect();
217+
write!(&mut self.output_buffer, "\"{key}\":").unwrap();
218+
self.append_random_json(current_depth + 1);
219+
if i < num_fields - 1 {
220+
write!(&mut self.output_buffer, ",").unwrap();
221+
}
222+
}
223+
write!(&mut self.output_buffer, "}}").unwrap();
224+
} else {
225+
// Generate a random array
226+
let length = self.rng.random_range(1..=10);
227+
write!(&mut self.output_buffer, "[").unwrap();
228+
for i in 0..length {
229+
self.append_random_json(current_depth + 1);
230+
if i < length - 1 {
231+
write!(&mut self.output_buffer, ",").unwrap();
232+
}
233+
}
234+
write!(&mut self.output_buffer, "]").unwrap();
235+
}
236+
}
237+
}

0 commit comments

Comments
 (0)