Skip to content

Commit 08353cb

Browse files
committed
[ADD] Path-based field extraction for VariantArray
1 parent 99eb1bc commit 08353cb

File tree

5 files changed

+589
-1
lines changed

5 files changed

+589
-1
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[build]
2+
rustflags = ["-A", "unknown-lints", "-A", "clippy::transmute-int-to-float"]

parquet-variant-compute/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ keywords = ["arrow", "parquet", "variant"]
2929
edition = { workspace = true }
3030
rust-version = { workspace = true }
3131

32+
[lints.rust]
33+
unknown_lints = "allow"
3234

3335
[dependencies]
3436
arrow = { workspace = true }
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use parquet_variant_compute::{VariantArrayBuilder, VariantPath};
2+
use parquet_variant::VariantBuilder;
3+
4+
fn main() {
5+
// Create some sample data
6+
let mut builder = VariantArrayBuilder::new(2);
7+
8+
// Row 1: User Alice
9+
{
10+
let mut variant_builder = VariantBuilder::new();
11+
{
12+
let mut obj = variant_builder.new_object();
13+
obj.insert("name", "Alice");
14+
obj.insert("age", 30i32);
15+
16+
{
17+
let mut address = obj.new_object("address");
18+
address.insert("city", "New York");
19+
address.insert("zip", "10001");
20+
let _ = address.finish();
21+
}
22+
23+
{
24+
let mut hobbies = obj.new_list("hobbies");
25+
hobbies.append_value("reading");
26+
hobbies.append_value("hiking");
27+
hobbies.append_value("cooking");
28+
hobbies.finish();
29+
}
30+
31+
obj.finish().unwrap();
32+
}
33+
let (metadata, value) = variant_builder.finish();
34+
builder.append_variant_buffers(&metadata, &value);
35+
}
36+
37+
// Row 2: User Bob
38+
{
39+
let mut variant_builder = VariantBuilder::new();
40+
{
41+
let mut obj = variant_builder.new_object();
42+
obj.insert("name", "Bob");
43+
obj.insert("age", 25i32);
44+
45+
{
46+
let mut address = obj.new_object("address");
47+
address.insert("city", "San Francisco");
48+
address.insert("zip", "94102");
49+
let _ = address.finish();
50+
}
51+
52+
{
53+
let mut hobbies = obj.new_list("hobbies");
54+
hobbies.append_value("swimming");
55+
hobbies.append_value("gaming");
56+
hobbies.finish();
57+
}
58+
59+
obj.finish().unwrap();
60+
}
61+
let (metadata, value) = variant_builder.finish();
62+
builder.append_variant_buffers(&metadata, &value);
63+
}
64+
65+
let variant_array = builder.build();
66+
67+
// Demonstrate path access functionality
68+
println!("=== Path Access Examples ===");
69+
70+
// 1. Single field access
71+
let name_path = VariantPath::field("name");
72+
let alice_name = variant_array.get_path(0, &name_path).unwrap();
73+
println!("Alice's name: {}", alice_name.as_string().unwrap());
74+
75+
// 2. Nested field access
76+
let city_path = VariantPath::field("address").push_field("city");
77+
let alice_city = variant_array.get_path(0, &city_path).unwrap();
78+
let bob_city = variant_array.get_path(1, &city_path).unwrap();
79+
println!("Alice's city: {}", alice_city.as_string().unwrap());
80+
println!("Bob's city: {}", bob_city.as_string().unwrap());
81+
82+
// 3. Array index access
83+
let hobby_path = VariantPath::field("hobbies").push_index(0);
84+
let alice_first_hobby = variant_array.get_path(0, &hobby_path).unwrap();
85+
let bob_first_hobby = variant_array.get_path(1, &hobby_path).unwrap();
86+
println!("Alice's first hobby: {}", alice_first_hobby.as_string().unwrap());
87+
println!("Bob's first hobby: {}", bob_first_hobby.as_string().unwrap());
88+
89+
// 4. Multiple field extraction
90+
let paths = vec![
91+
VariantPath::field("name"),
92+
VariantPath::field("age"),
93+
VariantPath::field("address").push_field("city"),
94+
];
95+
96+
let alice_data = variant_array.get_paths(0, &paths);
97+
println!("Alice's data: name={}, age={}, city={}",
98+
alice_data[0].as_ref().unwrap().as_string().unwrap(),
99+
alice_data[1].as_ref().unwrap().as_int32().unwrap(),
100+
alice_data[2].as_ref().unwrap().as_string().unwrap());
101+
102+
// 5. Column-wise extraction
103+
let names = variant_array.extract_field(&VariantPath::field("name"));
104+
println!("All names: {:?}", names.iter().map(|v| v.as_ref().unwrap().as_string().unwrap()).collect::<Vec<_>>());
105+
106+
println!("=== Performance Benefit ===");
107+
println!("✓ Direct field access without reconstructing entire variants");
108+
println!("✓ Efficient batch operations for analytical workloads");
109+
println!("✓ Foundation for shredding/unshredding operations");
110+
}

parquet-variant-compute/src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,17 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
// Suppress warnings from arrow dependencies
19+
#![allow(unknown_lints)]
20+
#![allow(clippy::transmute_int_to_float)]
21+
1822
mod from_json;
1923
mod to_json;
2024
mod variant_array;
2125
mod variant_array_builder;
2226
pub mod variant_get;
2327

24-
pub use variant_array::VariantArray;
28+
pub use variant_array::{VariantArray, VariantPath, VariantPathElement};
2529
pub use variant_array_builder::VariantArrayBuilder;
2630

2731
pub use from_json::batch_json_string_to_variant;

0 commit comments

Comments
 (0)