15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- //! [`VariantArray`] implementation with hybrid byte-level and high-level APIs
18
+ //! [`VariantArray`] implementation
19
19
20
20
use crate :: field_operations:: FieldOperations ;
21
- use arrow:: array:: { Array , ArrayData , BinaryViewArray , StructArray } ;
21
+ use arrow:: array:: { Array , ArrayData , ArrayRef , AsArray , StructArray } ;
22
22
use arrow:: buffer:: NullBuffer ;
23
- use arrow:: datatypes:: DataType ;
24
- use arrow:: error:: ArrowError ;
25
- use parquet_variant:: { Variant , VariantMetadata } ;
23
+ use arrow_schema:: { ArrowError , DataType } ;
24
+ use parquet_variant:: Variant ;
26
25
use std:: any:: Any ;
27
26
use std:: sync:: Arc ;
28
27
29
- /// Array implementation for variant data with hybrid byte-level and high-level APIs
28
+ /// An array of Parquet [`Variant`] values
29
+ ///
30
+ /// A [`VariantArray`] wraps an Arrow [`StructArray`] that stores the underlying
31
+ /// `metadata` and `value` fields, and adds convenience methods to access
32
+ /// the `Variant`s
33
+ ///
34
+ /// See [`VariantArrayBuilder`] for constructing a `VariantArray`.
35
+ ///
36
+ /// [`VariantArrayBuilder`]: crate::VariantArrayBuilder
37
+ ///
38
+ /// # Specification
39
+ ///
40
+ /// 1. This code follows the conventions for storing variants in Arrow `StructArray`
41
+ /// defined by [Extension Type for Parquet Variant arrow] and this [document].
42
+ /// At the time of this writing, this is not yet a standardized Arrow extension type.
43
+ ///
44
+ /// [Extension Type for Parquet Variant arrow]: https://github.com/apache/arrow/issues/46908
45
+ /// [document]: https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?usp=sharing
30
46
#[ derive( Debug ) ]
31
47
pub struct VariantArray {
32
48
/// StructArray of up to three fields:
@@ -53,12 +69,29 @@ pub struct VariantArray {
53
69
}
54
70
55
71
impl VariantArray {
56
- /// Create a new VariantArray from a StructArray
57
- pub fn try_new ( inner : Arc < StructArray > ) -> Result < Self , ArrowError > {
58
- // Validate that the struct has the expected format
59
- if inner. num_columns ( ) != 2 {
72
+ /// Creates a new `VariantArray` from a [`StructArray`].
73
+ ///
74
+ /// # Arguments
75
+ /// - `inner` - The underlying [`StructArray`] that contains the variant data.
76
+ ///
77
+ /// # Returns
78
+ /// - A new instance of `VariantArray`.
79
+ ///
80
+ /// # Errors:
81
+ /// - If the `StructArray` does not contain the required fields
82
+ ///
83
+ /// # Current support
84
+ /// This structure does not (yet) support the full Arrow Variant Array specification.
85
+ ///
86
+ /// Only `StructArrays` with `metadata` and `value` fields that are
87
+ /// [`BinaryViewArray`] are supported. Shredded values are not currently supported
88
+ /// nor are using types other than `BinaryViewArray`
89
+ ///
90
+ /// [`BinaryViewArray`]: arrow::array::BinaryViewArray
91
+ pub fn try_new ( inner : ArrayRef ) -> Result < Self , ArrowError > {
92
+ let Some ( inner) = inner. as_struct_opt ( ) else {
60
93
return Err ( ArrowError :: InvalidArgumentError (
61
- "Expected struct with exactly 2 columns (metadata, value) " . to_string ( ) ,
94
+ "Invalid VariantArray: requires StructArray as input " . to_string ( ) ,
62
95
) ) ;
63
96
} ;
64
97
// Ensure the StructArray has a metadata field of BinaryView
@@ -160,50 +193,49 @@ impl VariantArray {
160
193
161
194
Ok ( Self { inner } )
162
195
}
163
-
164
- /// Get the metadata field as a BinaryViewArray
165
- pub fn metadata_field ( & self ) -> & BinaryViewArray {
166
- self . inner . column ( 0 )
167
- . as_any ( )
168
- . downcast_ref :: < BinaryViewArray > ( )
169
- . expect ( "Expected metadata field to be BinaryViewArray" )
196
+
197
+ /// Returns a reference to the underlying [`StructArray`].
198
+ pub fn inner ( & self ) -> & StructArray {
199
+ & self . inner
170
200
}
171
-
172
- /// Get the value field as a BinaryViewArray
173
- pub fn value_field ( & self ) -> & BinaryViewArray {
174
- self . inner . column ( 1 )
175
- . as_any ( )
176
- . downcast_ref :: < BinaryViewArray > ( )
177
- . expect ( "Expected value field to be BinaryViewArray" )
201
+
202
+ /// Returns the inner [`StructArray`], consuming self
203
+ pub fn into_inner ( self ) -> StructArray {
204
+ self . inner
178
205
}
179
-
206
+
207
+ /// Return the [`Variant`] instance stored at the given row
208
+ ///
209
+ /// Panics if the index is out of bounds.
210
+ ///
211
+ /// Note: Does not do deep validation of the [`Variant`], so it is up to the
212
+ /// caller to ensure that the metadata and value were constructed correctly.
213
+ pub fn value ( & self , index : usize ) -> Variant {
214
+ let metadata = self . metadata_field ( ) . as_binary_view ( ) . value ( index) ;
215
+ let value = self . value_field ( ) . as_binary_view ( ) . value ( index) ;
216
+ Variant :: new ( metadata, value)
217
+ }
218
+
219
+ /// Return a reference to the metadata field of the [`StructArray`]
220
+ pub fn metadata_field ( & self ) -> & ArrayRef {
221
+ // spec says fields order is not guaranteed, so we search by name
222
+ self . inner . column_by_name ( "metadata" ) . unwrap ( )
223
+ }
224
+
225
+ /// Return a reference to the value field of the `StructArray`
226
+ pub fn value_field ( & self ) -> & ArrayRef {
227
+ // spec says fields order is not guaranteed, so we search by name
228
+ self . inner . column_by_name ( "value" ) . unwrap ( )
229
+ }
230
+
180
231
/// Get the metadata bytes for a specific index
181
232
pub fn metadata ( & self , index : usize ) -> & [ u8 ] {
182
- self . metadata_field ( ) . value ( index) . as_ref ( )
233
+ self . metadata_field ( ) . as_binary_view ( ) . value ( index) . as_ref ( )
183
234
}
184
235
185
236
/// Get the value bytes for a specific index
186
237
pub fn value_bytes ( & self , index : usize ) -> & [ u8 ] {
187
- self . value_field ( ) . value ( index) . as_ref ( )
188
- }
189
-
190
- /// Get the parsed variant at a specific index
191
- pub fn value ( & self , index : usize ) -> Variant {
192
- if index >= self . len ( ) {
193
- panic ! ( "Index {} out of bounds for array of length {}" , index, self . len( ) ) ;
194
- }
195
-
196
- if self . is_null ( index) {
197
- return Variant :: Null ;
198
- }
199
-
200
- let metadata = self . metadata ( index) ;
201
- let value = self . value_bytes ( index) ;
202
-
203
- let variant_metadata = VariantMetadata :: try_new ( metadata)
204
- . expect ( "Failed to parse variant metadata" ) ;
205
- Variant :: try_new_with_metadata ( variant_metadata, value)
206
- . expect ( "Failed to create variant from metadata and value" )
238
+ self . value_field ( ) . as_binary_view ( ) . value ( index) . as_ref ( )
207
239
}
208
240
209
241
/// Get value at a specific path for the variant at the given index
@@ -342,18 +374,15 @@ impl Array for VariantArray {
342
374
fn as_any ( & self ) -> & dyn Any {
343
375
self
344
376
}
345
-
377
+
346
378
fn to_data ( & self ) -> ArrayData {
347
379
self . inner . to_data ( )
348
380
}
349
-
381
+
350
382
fn into_data ( self ) -> ArrayData {
351
- match Arc :: try_unwrap ( self . inner ) {
352
- Ok ( inner) => inner. into_data ( ) ,
353
- Err ( inner) => inner. to_data ( ) ,
354
- }
383
+ self . inner . into_data ( )
355
384
}
356
-
385
+
357
386
fn data_type ( & self ) -> & DataType {
358
387
self . inner . data_type ( )
359
388
}
@@ -368,38 +397,111 @@ impl Array for VariantArray {
368
397
value_ref : val,
369
398
} )
370
399
}
371
-
400
+
372
401
fn len ( & self ) -> usize {
373
402
self . inner . len ( )
374
403
}
375
-
404
+
376
405
fn is_empty ( & self ) -> bool {
377
406
self . inner . is_empty ( )
378
407
}
379
-
380
- fn nulls ( & self ) -> Option < & NullBuffer > {
381
- self . inner . nulls ( )
382
- }
383
-
408
+
384
409
fn offset ( & self ) -> usize {
385
410
self . inner . offset ( )
386
411
}
387
-
412
+
413
+ fn nulls ( & self ) -> Option < & NullBuffer > {
414
+ self . inner . nulls ( )
415
+ }
416
+
388
417
fn get_buffer_memory_size ( & self ) -> usize {
389
418
self . inner . get_buffer_memory_size ( )
390
419
}
391
-
420
+
392
421
fn get_array_memory_size ( & self ) -> usize {
393
422
self . inner . get_array_memory_size ( )
394
423
}
395
424
}
396
425
397
426
#[ cfg( test) ]
398
- mod tests {
427
+ mod test {
399
428
use super :: * ;
400
429
use crate :: variant_array_builder:: VariantArrayBuilder ;
430
+ use arrow:: array:: { BinaryArray , BinaryViewArray } ;
431
+ use arrow_schema:: { Field , Fields } ;
401
432
use parquet_variant:: VariantBuilder ;
402
433
434
+ #[ test]
435
+ fn invalid_not_a_struct_array ( ) {
436
+ let array = make_binary_view_array ( ) ;
437
+ // Should fail because the input is not a StructArray
438
+ let err = VariantArray :: try_new ( array) ;
439
+ assert_eq ! (
440
+ err. unwrap_err( ) . to_string( ) ,
441
+ "Invalid argument error: Invalid VariantArray: requires StructArray as input"
442
+ ) ;
443
+ }
444
+
445
+ #[ test]
446
+ fn invalid_missing_metadata ( ) {
447
+ let fields = Fields :: from ( vec ! [ Field :: new( "value" , DataType :: BinaryView , true ) ] ) ;
448
+ let array = StructArray :: new ( fields, vec ! [ make_binary_view_array( ) ] , None ) ;
449
+ // Should fail because the StructArray does not contain a 'metadata' field
450
+ let err = VariantArray :: try_new ( Arc :: new ( array) ) ;
451
+ assert_eq ! (
452
+ err. unwrap_err( ) . to_string( ) ,
453
+ "Invalid argument error: Invalid VariantArray: StructArray must contain a 'metadata' field"
454
+ ) ;
455
+ }
456
+
457
+ #[ test]
458
+ fn invalid_missing_value ( ) {
459
+ let fields = Fields :: from ( vec ! [ Field :: new( "metadata" , DataType :: BinaryView , false ) ] ) ;
460
+ let array = StructArray :: new ( fields, vec ! [ make_binary_view_array( ) ] , None ) ;
461
+ // Should fail because the StructArray does not contain a 'value' field
462
+ let err = VariantArray :: try_new ( Arc :: new ( array) ) ;
463
+ assert_eq ! (
464
+ err. unwrap_err( ) . to_string( ) ,
465
+ "Invalid argument error: Invalid VariantArray: StructArray must contain a 'value' field"
466
+ ) ;
467
+ }
468
+
469
+ #[ test]
470
+ fn invalid_metadata_field_type ( ) {
471
+ let fields = Fields :: from ( vec ! [
472
+ Field :: new( "metadata" , DataType :: Binary , true ) , // Not yet supported
473
+ Field :: new( "value" , DataType :: BinaryView , true ) ,
474
+ ] ) ;
475
+ let array = StructArray :: new (
476
+ fields,
477
+ vec ! [ make_binary_array( ) , make_binary_view_array( ) ] ,
478
+ None ,
479
+ ) ;
480
+ let err = VariantArray :: try_new ( Arc :: new ( array) ) ;
481
+ assert_eq ! (
482
+ err. unwrap_err( ) . to_string( ) ,
483
+ "Not yet implemented: VariantArray 'metadata' field must be BinaryView, got Binary"
484
+ ) ;
485
+ }
486
+
487
+ #[ test]
488
+ fn invalid_value_field_type ( ) {
489
+ let fields = Fields :: from ( vec ! [
490
+ Field :: new( "metadata" , DataType :: BinaryView , true ) ,
491
+ Field :: new( "value" , DataType :: Binary , true ) , // Not yet supported
492
+ ] ) ;
493
+ let array = StructArray :: new (
494
+ fields,
495
+ vec ! [ make_binary_view_array( ) , make_binary_array( ) ] ,
496
+ None ,
497
+ ) ;
498
+ let err = VariantArray :: try_new ( Arc :: new ( array) ) ;
499
+ assert_eq ! (
500
+ err. unwrap_err( ) . to_string( ) ,
501
+ "Not yet implemented: VariantArray 'value' field must be BinaryView, got Binary"
502
+ ) ;
503
+ }
504
+
403
505
fn create_test_variant_array ( ) -> VariantArray {
404
506
let mut builder = VariantArrayBuilder :: new ( 2 ) ;
405
507
@@ -510,10 +612,18 @@ mod tests {
510
612
assert_eq ! ( value_field. len( ) , 2 ) ;
511
613
512
614
// Check that metadata and value bytes are non-empty
513
- assert ! ( !metadata_field. value( 0 ) . is_empty( ) ) ;
514
- assert ! ( !value_field. value( 0 ) . is_empty( ) ) ;
515
- assert ! ( !metadata_field. value( 1 ) . is_empty( ) ) ;
516
- assert ! ( !value_field. value( 1 ) . is_empty( ) ) ;
615
+ assert ! ( !metadata_field. as_binary_view( ) . value( 0 ) . is_empty( ) ) ;
616
+ assert ! ( !value_field. as_binary_view( ) . value( 0 ) . is_empty( ) ) ;
617
+ assert ! ( !metadata_field. as_binary_view( ) . value( 1 ) . is_empty( ) ) ;
618
+ assert ! ( !value_field. as_binary_view( ) . value( 1 ) . is_empty( ) ) ;
619
+ }
620
+
621
+ fn make_binary_view_array ( ) -> ArrayRef {
622
+ Arc :: new ( BinaryViewArray :: from ( vec ! [ b"test" as & [ u8 ] ] ) )
623
+ }
624
+
625
+ fn make_binary_array ( ) -> ArrayRef {
626
+ Arc :: new ( BinaryArray :: from ( vec ! [ b"test" as & [ u8 ] ] ) )
517
627
}
518
628
}
519
629
0 commit comments