@@ -105,11 +105,26 @@ impl VariantArray {
105
105
) ) ) ;
106
106
} ;
107
107
108
+ // Extract value and typed_value fields
109
+ let value = if let Some ( value_col) = inner. column_by_name ( "value" ) {
110
+ if let Some ( binary_view) = value_col. as_binary_view_opt ( ) {
111
+ Some ( binary_view. clone ( ) )
112
+ } else {
113
+ return Err ( ArrowError :: NotYetImplemented ( format ! (
114
+ "VariantArray 'value' field must be BinaryView, got {}" ,
115
+ value_col. data_type( )
116
+ ) ) ) ;
117
+ }
118
+ } else {
119
+ None
120
+ } ;
121
+ let typed_value = inner. column_by_name ( "typed_value" ) . cloned ( ) ;
122
+
108
123
// Note these clones are cheap, they just bump the ref count
109
124
Ok ( Self {
110
125
inner : inner. clone ( ) ,
111
126
metadata : metadata. clone ( ) ,
112
- shredding_state : ShreddingState :: try_new ( inner ) ?,
127
+ shredding_state : ShreddingState :: try_new ( metadata . clone ( ) , value , typed_value ) ?,
113
128
} )
114
129
}
115
130
@@ -135,7 +150,7 @@ impl VariantArray {
135
150
// This would be a lot simpler if ShreddingState were just a pair of Option... we already
136
151
// have everything we need.
137
152
let inner = builder. build ( ) ;
138
- let shredding_state = ShreddingState :: try_new ( & inner ) . unwrap ( ) ; // valid by construction
153
+ let shredding_state = ShreddingState :: try_new ( metadata . clone ( ) , value , typed_value ) . unwrap ( ) ; // valid by construction
139
154
Self {
140
155
inner,
141
156
metadata,
@@ -200,7 +215,8 @@ impl VariantArray {
200
215
typed_value_to_variant ( typed_value, index)
201
216
}
202
217
}
203
- ShreddingState :: AllNull { .. } => {
218
+ ( None , None ) => {
219
+ // AllNull case: neither value nor typed_value fields exist
204
220
// NOTE: This handles the case where neither value nor typed_value fields exist.
205
221
// For top-level variants, this returns Variant::Null (JSON null).
206
222
// For shredded object fields, this technically should indicate SQL NULL,
@@ -296,11 +312,18 @@ impl ShreddedVariantFieldArray {
296
312
) ) ;
297
313
} ;
298
314
315
+ // Extract value and typed_value fields (metadata is not expected in ShreddedVariantFieldArray)
316
+ let value = inner_struct. column_by_name ( "value" ) . and_then ( |col| col. as_binary_view_opt ( ) . cloned ( ) ) ;
317
+ let typed_value = inner_struct. column_by_name ( "typed_value" ) . cloned ( ) ;
318
+
319
+ // Use a dummy metadata for the constructor (ShreddedVariantFieldArray doesn't have metadata)
320
+ let dummy_metadata = arrow:: array:: BinaryViewArray :: new_null ( inner_struct. len ( ) ) ;
321
+
299
322
// Note this clone is cheap, it just bumps the ref count
300
323
let inner = inner_struct. clone ( ) ;
301
324
Ok ( Self {
302
325
inner : inner. clone ( ) ,
303
- shredding_state : ShreddingState :: try_new ( & inner ) ?,
326
+ shredding_state : ShreddingState :: try_new ( dummy_metadata , value , typed_value ) ?,
304
327
} )
305
328
}
306
329
@@ -398,114 +421,36 @@ impl Array for ShreddedVariantFieldArray {
398
421
///
399
422
/// [Parquet Variant Shredding Spec]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding
400
423
#[ derive( Debug ) ]
401
- pub enum ShreddingState {
402
- /// This variant has no typed_value field
403
- Unshredded { value : BinaryViewArray } ,
404
- /// This variant has a typed_value field and no value field
405
- /// meaning it is the shredded type
406
- PerfectlyShredded { typed_value : ArrayRef } ,
407
- /// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
408
- /// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
409
- /// values have NULL `typed_value` and `Variant::Null` in `value`.
410
- ///
411
- /// NOTE: A partially shredded struct is a special kind of imperfect shredding, where
412
- /// `typed_value` and `value` are both non-NULL. The `typed_value` is a struct containing the
413
- /// subset of fields for which shredding was attempted (each field will then have its own value
414
- /// and/or typed_value sub-fields that indicate how shredding actually turned out). Meanwhile,
415
- /// the `value` is a variant object containing the subset of fields for which shredding was
416
- /// not even attempted.
417
- ImperfectlyShredded {
418
- value : BinaryViewArray ,
419
- typed_value : ArrayRef ,
420
- } ,
421
- /// All values are null, only metadata is present.
422
- ///
423
- /// This state occurs when neither `value` nor `typed_value` fields exist in the schema.
424
- /// Note: By strict spec interpretation, this should only be valid for shredded object fields,
425
- /// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
426
- /// handling of missing data.
427
- AllNull { metadata : BinaryViewArray } ,
424
+ pub struct ShreddingState {
425
+ pub value : Option < BinaryViewArray > ,
426
+ pub typed_value : Option < ArrayRef > ,
428
427
}
429
428
430
429
impl ShreddingState {
431
430
/// try to create a new `ShreddingState` from the given fields
432
431
pub fn try_new (
433
- metadata : BinaryViewArray ,
432
+ _metadata : BinaryViewArray ,
434
433
value : Option < BinaryViewArray > ,
435
434
typed_value : Option < ArrayRef > ,
436
435
) -> Result < Self , ArrowError > {
437
- match ( metadata, value, typed_value) {
438
- ( metadata, Some ( value) , Some ( typed_value) ) => Ok ( Self :: PartiallyShredded {
439
- metadata,
440
- value,
441
- typed_value,
442
- } ) ,
443
- ( metadata, Some ( value) , None ) => Ok ( Self :: Unshredded { metadata, value } ) ,
444
- ( metadata, None , Some ( typed_value) ) => Ok ( Self :: Typed {
445
- metadata,
446
- typed_value,
447
- } ) ,
448
- ( metadata, None , None ) => Ok ( Self :: AllNull { metadata } ) ,
449
- }
450
- }
451
-
452
- /// Return a reference to the metadata field
453
- pub fn metadata_field ( & self ) -> & BinaryViewArray {
454
- match self {
455
- ShreddingState :: Unshredded { metadata, .. } => metadata,
456
- ShreddingState :: Typed { metadata, .. } => metadata,
457
- ShreddingState :: PartiallyShredded { metadata, .. } => metadata,
458
- ShreddingState :: AllNull { metadata } => metadata,
459
- }
460
-
461
436
Ok ( Self { value, typed_value } )
462
437
}
463
438
464
439
/// Return a reference to the value field, if present
465
440
pub fn value_field ( & self ) -> Option < & BinaryViewArray > {
466
- match self {
467
- ShreddingState :: Unshredded { value, .. } => Some ( value) ,
468
- ShreddingState :: Typed { .. } => None ,
469
- ShreddingState :: PartiallyShredded { value, .. } => Some ( value) ,
470
- ShreddingState :: AllNull { .. } => None ,
471
- }
441
+ self . value . as_ref ( )
472
442
}
473
443
474
444
/// Return a reference to the typed_value field, if present
475
445
pub fn typed_value_field ( & self ) -> Option < & ArrayRef > {
476
- match self {
477
- ShreddingState :: Unshredded { .. } => None ,
478
- ShreddingState :: Typed { typed_value, .. } => Some ( typed_value) ,
479
- ShreddingState :: PartiallyShredded { typed_value, .. } => Some ( typed_value) ,
480
- ShreddingState :: AllNull { .. } => None ,
481
- }
446
+ self . typed_value . as_ref ( )
482
447
}
483
448
484
449
/// Slice all the underlying arrays
485
450
pub fn slice ( & self , offset : usize , length : usize ) -> Self {
486
- match self {
487
- ShreddingState :: Unshredded { value } => ShreddingState :: Unshredded {
488
- value : value. slice ( offset, length) ,
489
- } ,
490
- ShreddingState :: Typed {
491
- metadata,
492
- typed_value,
493
- } => ShreddingState :: Typed {
494
- metadata : metadata. slice ( offset, length) ,
495
- typed_value : typed_value. slice ( offset, length) ,
496
- } ,
497
- ShreddingState :: PartiallyShredded {
498
- metadata,
499
- value,
500
- typed_value,
501
- } => ShreddingState :: PartiallyShredded {
502
- metadata : metadata. slice ( offset, length) ,
503
- value : value. slice ( offset, length) ,
504
- typed_value : typed_value. slice ( offset, length) ,
505
- } ,
506
- ShreddingState :: AllNull { metadata } => ShreddingState :: AllNull {
507
- metadata : metadata. slice ( offset, length) ,
508
- } ,
451
+ Self {
452
+ value : self . value . as_ref ( ) . map ( |v| v. slice ( offset, length) ) ,
453
+ typed_value : self . typed_value . as_ref ( ) . map ( |tv| tv. slice ( offset, length) ) ,
509
454
}
510
455
}
511
456
}
@@ -664,11 +609,10 @@ mod test {
664
609
// This is a pragmatic decision to handle missing data gracefully.
665
610
let variant_array = VariantArray :: try_new ( Arc :: new ( array) ) . unwrap ( ) ;
666
611
667
- // Verify the shredding state is AllNull
668
- assert ! ( matches!(
669
- variant_array. shredding_state( ) ,
670
- ShreddingState :: AllNull { .. }
671
- ) ) ;
612
+ // Verify the shredding state is AllNull (both value and typed_value are None)
613
+ let shredding_state = variant_array. shredding_state ( ) ;
614
+ assert ! ( shredding_state. value_field( ) . is_none( ) ) ;
615
+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
672
616
673
617
// Verify that value() returns Variant::Null (compensating for spec violation)
674
618
for i in 0 ..variant_array. len ( ) {
@@ -727,13 +671,11 @@ mod test {
727
671
let metadata = BinaryViewArray :: from ( vec ! [ b"test" as & [ u8 ] ] ) ;
728
672
let shredding_state = ShreddingState :: try_new ( metadata. clone ( ) , None , None ) . unwrap ( ) ;
729
673
730
- assert ! ( matches!( shredding_state, ShreddingState :: AllNull { .. } ) ) ;
674
+ // Verify the shredding state is AllNull (both value and typed_value are None)
675
+ assert ! ( shredding_state. value_field( ) . is_none( ) ) ;
676
+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
731
677
732
- // Verify metadata is preserved correctly
733
- if let ShreddingState :: AllNull { metadata : m } = shredding_state {
734
- assert_eq ! ( m. len( ) , metadata. len( ) ) ;
735
- assert_eq ! ( m. value( 0 ) , metadata. value( 0 ) ) ;
736
- }
678
+ // Note: metadata is no longer stored in the struct, it's passed to try_new but not stored
737
679
}
738
680
739
681
#[ test]
@@ -746,11 +688,10 @@ mod test {
746
688
747
689
let variant_array = VariantArray :: try_new ( Arc :: new ( struct_array) ) . unwrap ( ) ;
748
690
749
- // Verify the shredding state is AllNull
750
- assert ! ( matches!(
751
- variant_array. shredding_state( ) ,
752
- ShreddingState :: AllNull { .. }
753
- ) ) ;
691
+ // Verify the shredding state is AllNull (both value and typed_value are None)
692
+ let shredding_state = variant_array. shredding_state ( ) ;
693
+ assert ! ( shredding_state. value_field( ) . is_none( ) ) ;
694
+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
754
695
755
696
// Verify all values are null
756
697
assert_eq ! ( variant_array. len( ) , 3 ) ;
@@ -797,9 +738,8 @@ mod test {
797
738
let variant_array = VariantArray :: try_new ( Arc :: new ( struct_array) ) . unwrap ( ) ;
798
739
799
740
// This should be Unshredded, not AllNull, because value field exists in schema
800
- assert ! ( matches!(
801
- variant_array. shredding_state( ) ,
802
- ShreddingState :: Unshredded { .. }
803
- ) ) ;
741
+ let shredding_state = variant_array. shredding_state ( ) ;
742
+ assert ! ( shredding_state. value_field( ) . is_some( ) ) ;
743
+ assert ! ( shredding_state. typed_value_field( ) . is_none( ) ) ;
804
744
}
805
745
}
0 commit comments