Skip to content

Commit 0fecaa0

Browse files
committed
Merge remote-tracking branch 'apache/main' into 7899-avoid-extra-allocation-in-object-builder
2 parents f5b0465 + 99eb1bc commit 0fecaa0

File tree

16 files changed

+1363
-313
lines changed

16 files changed

+1363
-313
lines changed

.github/workflows/parquet-variant.yml

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ on:
3131
pull_request:
3232
paths:
3333
- parquet-variant/**
34+
- parquet-variant-json/**
35+
- parquet-variant-compute/**
3436
- .github/**
3537

3638
jobs:
@@ -50,6 +52,8 @@ jobs:
5052
run: cargo test -p parquet-variant
5153
- name: Test parquet-variant-json
5254
run: cargo test -p parquet-variant-json
55+
- name: Test parquet-variant-compute
56+
run: cargo test -p parquet-variant-compute
5357

5458
# test compilation
5559
linux-features:
@@ -63,10 +67,12 @@ jobs:
6367
submodules: true
6468
- name: Setup Rust toolchain
6569
uses: ./.github/actions/setup-builder
66-
- name: Check compilation
70+
- name: Check compilation (parquet-variant)
6771
run: cargo check -p parquet-variant
68-
- name: Check compilation
72+
- name: Check compilation (parquet-variant-json)
6973
run: cargo check -p parquet-variant-json
74+
- name: Check compilation (parquet-variant-compute)
75+
run: cargo check -p parquet-variant-compute
7076

7177
clippy:
7278
name: Clippy
@@ -79,7 +85,9 @@ jobs:
7985
uses: ./.github/actions/setup-builder
8086
- name: Setup Clippy
8187
run: rustup component add clippy
82-
- name: Run clippy
88+
- name: Run clippy (parquet-variant)
8389
run: cargo clippy -p parquet-variant --all-targets --all-features -- -D warnings
84-
- name: Run clippy
90+
- name: Run clippy (parquet-variant-json)
8591
run: cargo clippy -p parquet-variant-json --all-targets --all-features -- -D warnings
92+
- name: Run clippy (parquet-variant-compute)
93+
run: cargo clippy -p parquet-variant-compute --all-targets --all-features -- -D warnings

arrow-array/src/builder/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
447447
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
448448
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
449449
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
450+
DataType::BinaryView => Box::new(BinaryViewBuilder::with_capacity(capacity)),
450451
DataType::FixedSizeBinary(len) => {
451452
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
452453
}
@@ -464,6 +465,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
464465
),
465466
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
466467
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
468+
DataType::Utf8View => Box::new(StringViewBuilder::with_capacity(capacity)),
467469
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
468470
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
469471
DataType::Time32(TimeUnit::Second) => {

arrow-ord/src/sort.rs

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -180,13 +180,41 @@ where
180180

181181
// partition indices into valid and null indices
182182
fn partition_validity(array: &dyn Array) -> (Vec<u32>, Vec<u32>) {
183-
match array.null_count() {
184-
// faster path
185-
0 => ((0..(array.len() as u32)).collect(), vec![]),
186-
_ => {
187-
let indices = 0..(array.len() as u32);
188-
indices.partition(|index| array.is_valid(*index as usize))
183+
let len = array.len();
184+
let null_count = array.null_count();
185+
match array.nulls() {
186+
Some(nulls) if null_count > 0 => {
187+
let mut valid_indices = Vec::with_capacity(len - null_count);
188+
let mut null_indices = Vec::with_capacity(null_count);
189+
190+
let valid_slice = valid_indices.spare_capacity_mut();
191+
let null_slice = null_indices.spare_capacity_mut();
192+
let mut valid_idx = 0;
193+
let mut null_idx = 0;
194+
195+
nulls.into_iter().enumerate().for_each(|(i, v)| {
196+
if v {
197+
valid_slice[valid_idx].write(i as u32);
198+
valid_idx += 1;
199+
} else {
200+
null_slice[null_idx].write(i as u32);
201+
null_idx += 1;
202+
}
203+
});
204+
205+
assert_eq!(null_idx, null_count);
206+
assert_eq!(valid_idx, len - null_count);
207+
// Safety: The new lengths match the initial capacity as asserted above,
208+
// the bounds checks while writing also ensure they less than or equal to the capacity.
209+
unsafe {
210+
valid_indices.set_len(valid_idx);
211+
null_indices.set_len(null_idx);
212+
}
213+
214+
(valid_indices, null_indices)
189215
}
216+
// faster path
217+
_ => ((0..(len as u32)).collect(), vec![]),
190218
}
191219
}
192220

parquet-variant-compute/Cargo.toml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,12 @@ name = "parquet_variant_compute"
4141
bench = false
4242

4343
[dev-dependencies]
44+
rand = "0.9.1"
4445
criterion = { version = "0.6", default-features = false }
45-
rand = { version = "0.9.1" }
46+
arrow = { workspace = true, features = ["test_utils"] }
47+
4648

4749
[[bench]]
48-
name = "variant_get"
50+
name = "variant_kernels"
4951
harness = false
52+

parquet-variant-compute/benches/variant_get.rs

Lines changed: 0 additions & 59 deletions
This file was deleted.

0 commit comments

Comments
 (0)