Skip to content

Commit 314a599

Browse files
committed
[ADD] add hybrid approach for field access
1 parent aeccf2b commit 314a599

File tree

8 files changed

+1147
-572
lines changed

8 files changed

+1147
-572
lines changed
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
use arrow::array::Array;
2+
use parquet_variant_compute::VariantArrayBuilder;
3+
use parquet_variant::VariantBuilder;
4+
5+
fn main() {
6+
// Create some sample data with fields to remove
7+
let mut builder = VariantArrayBuilder::new(2);
8+
9+
// Row 1: User with temporary data
10+
{
11+
let mut variant_builder = VariantBuilder::new();
12+
{
13+
let mut obj = variant_builder.new_object();
14+
obj.insert("name", "Alice");
15+
obj.insert("age", 30i32);
16+
obj.insert("temp_session", "abc123");
17+
obj.insert("debug_info", "temporary debug data");
18+
19+
{
20+
let mut address = obj.new_object("address");
21+
address.insert("city", "New York");
22+
address.insert("zip", "10001");
23+
address.insert("temp_geocode", "40.7128,-74.0060");
24+
let _ = address.finish();
25+
}
26+
27+
let _ = obj.finish();
28+
}
29+
let (metadata, value) = variant_builder.finish();
30+
builder.append_variant_buffers(&metadata, &value);
31+
}
32+
33+
// Row 2: Another user with temporary data
34+
{
35+
let mut variant_builder = VariantBuilder::new();
36+
{
37+
let mut obj = variant_builder.new_object();
38+
obj.insert("name", "Bob");
39+
obj.insert("age", 25i32);
40+
obj.insert("temp_session", "def456");
41+
obj.insert("debug_info", "more temporary data");
42+
43+
{
44+
let mut address = obj.new_object("address");
45+
address.insert("city", "San Francisco");
46+
address.insert("zip", "94102");
47+
address.insert("temp_geocode", "37.7749,-122.4194");
48+
let _ = address.finish();
49+
}
50+
51+
let _ = obj.finish();
52+
}
53+
let (metadata, value) = variant_builder.finish();
54+
builder.append_variant_buffers(&metadata, &value);
55+
}
56+
57+
let array = builder.finish();
58+
59+
println!("=== Field Removal Examples ===");
60+
61+
// Show original data
62+
println!("Original data:");
63+
for i in 0..array.len() {
64+
let variant = array.value(i);
65+
if let Some(obj) = variant.as_object() {
66+
let name = obj.get("name").unwrap().as_string().unwrap().to_string();
67+
let session = obj.get("temp_session").map(|v| v.as_string().unwrap().to_string()).unwrap_or("None".to_string());
68+
let debug = obj.get("debug_info").map(|v| v.as_string().unwrap().to_string()).unwrap_or("None".to_string());
69+
println!(" {}: session={}, debug={}", name, session, debug);
70+
}
71+
}
72+
73+
// Remove temporary session field
74+
let cleaned_array = array.with_field_removed("temp_session").unwrap();
75+
76+
println!("\nRemoving temporary session fields...");
77+
println!("After removing temp_session:");
78+
for i in 0..cleaned_array.len() {
79+
let variant = cleaned_array.value(i);
80+
if let Some(obj) = variant.as_object() {
81+
let name = obj.get("name").unwrap().as_string().unwrap().to_string();
82+
let session = obj.get("temp_session").map(|v| v.as_string().unwrap().to_string()).unwrap_or("None".to_string());
83+
let debug = obj.get("debug_info").map(|v| v.as_string().unwrap().to_string()).unwrap_or("None".to_string());
84+
println!(" {}: session={}, debug={}", name, session, debug);
85+
}
86+
}
87+
88+
// Remove multiple temporary fields
89+
let final_array = cleaned_array.with_fields_removed(&["debug_info", "temp_session"]).unwrap();
90+
91+
println!("\nRemoving multiple temporary fields...");
92+
println!("Final clean data:");
93+
for i in 0..final_array.len() {
94+
let variant = final_array.value(i);
95+
if let Some(obj) = variant.as_object() {
96+
let name = obj.get("name").unwrap().as_string().unwrap().to_string();
97+
let age = obj.get("age").unwrap().as_int32().unwrap();
98+
99+
if let Some(address) = obj.get("address") {
100+
if let Some(addr_obj) = address.as_object() {
101+
let city = addr_obj.get("city").unwrap().as_string().unwrap().to_string();
102+
let zip = addr_obj.get("zip").unwrap().as_string().unwrap().to_string();
103+
let geocode = addr_obj.get("temp_geocode").map(|v| format!("Some(ShortString(ShortString(\"{}\")))", v.as_string().unwrap())).unwrap_or("None".to_string());
104+
println!(" {}: age={}, city={}, zip={}, geocode={}", name, age, city, zip, geocode);
105+
}
106+
}
107+
}
108+
}
109+
110+
println!("\n=== Performance Features ===");
111+
println!("✓ Efficient field removal at byte level");
112+
println!("✓ Support for nested field removal");
113+
println!("✓ Batch operations for cleaning multiple fields");
114+
println!("✓ Maintains data integrity during field removal");
115+
println!("✓ Foundation for data governance and privacy compliance");
116+
}

parquet-variant-compute/examples/path_access.rs

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ fn main() {
6262
builder.append_variant_buffers(&metadata, &value);
6363
}
6464

65-
let variant_array = builder.build();
65+
let variant_array = builder.finish();
6666

6767
// Demonstrate path access functionality
6868
println!("=== Path Access Examples ===");
@@ -72,7 +72,7 @@ fn main() {
7272
let alice_name = variant_array.get_path(0, &name_path).unwrap();
7373
println!("Alice's name: {}", alice_name.as_string().unwrap());
7474

75-
// 2. Nested field access
75+
// 2. Nested field access
7676
let city_path = VariantPath::field("address").push_field("city");
7777
let alice_city = variant_array.get_path(0, &city_path).unwrap();
7878
let bob_city = variant_array.get_path(1, &city_path).unwrap();
@@ -92,19 +92,25 @@ fn main() {
9292
VariantPath::field("age"),
9393
VariantPath::field("address").push_field("city"),
9494
];
95-
9695
let alice_data = variant_array.get_paths(0, &paths);
97-
println!("Alice's data: name={}, age={}, city={}",
98-
alice_data[0].as_ref().unwrap().as_string().unwrap(),
99-
alice_data[1].as_ref().unwrap().as_int32().unwrap(),
100-
alice_data[2].as_ref().unwrap().as_string().unwrap());
101-
102-
// 5. Column-wise extraction
103-
let names = variant_array.extract_field(&VariantPath::field("name"));
104-
println!("All names: {:?}", names.iter().map(|v| v.as_ref().unwrap().as_string().unwrap()).collect::<Vec<_>>());
96+
print!("Alice's data: ");
97+
for (i, path_result) in alice_data.iter().enumerate() {
98+
if let Some(variant) = path_result {
99+
if i == 0 {
100+
print!("name={}", variant.as_string().unwrap());
101+
} else if i == 1 {
102+
print!(", age={}", variant.as_int32().unwrap());
103+
} else if i == 2 {
104+
print!(", city={}", variant.as_string().unwrap());
105+
}
106+
}
107+
}
108+
println!();
105109

106-
println!("=== Performance Benefit ===");
107-
println!("✓ Direct field access without reconstructing entire variants");
108-
println!("✓ Efficient batch operations for analytical workloads");
109-
println!("✓ Foundation for shredding/unshredding operations");
110+
// 5. Batch field extraction
111+
let all_names = variant_array.extract_field_by_path(&VariantPath::field("name"));
112+
let name_strings: Vec<String> = all_names.iter()
113+
.filter_map(|opt| opt.as_ref().map(|v| v.as_string().unwrap().to_string()))
114+
.collect();
115+
println!("All names: {:?}", name_strings);
110116
}

0 commit comments

Comments
 (0)