Skip to content

Commit f519bb4

Browse files
committed
[FIX] fix merge conflict issues
1 parent d02d85e commit f519bb4

File tree

3 files changed

+67
-223
lines changed

3 files changed

+67
-223
lines changed

parquet-variant-compute/src/variant_array.rs

Lines changed: 51 additions & 111 deletions
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,26 @@ impl VariantArray {
105105
)));
106106
};
107107

108+
// Extract value and typed_value fields
109+
let value = if let Some(value_col) = inner.column_by_name("value") {
110+
if let Some(binary_view) = value_col.as_binary_view_opt() {
111+
Some(binary_view.clone())
112+
} else {
113+
return Err(ArrowError::NotYetImplemented(format!(
114+
"VariantArray 'value' field must be BinaryView, got {}",
115+
value_col.data_type()
116+
)));
117+
}
118+
} else {
119+
None
120+
};
121+
let typed_value = inner.column_by_name("typed_value").cloned();
122+
108123
// Note these clones are cheap, they just bump the ref count
109124
Ok(Self {
110125
inner: inner.clone(),
111126
metadata: metadata.clone(),
112-
shredding_state: ShreddingState::try_new(inner)?,
127+
shredding_state: ShreddingState::try_new(metadata.clone(), value, typed_value)?,
113128
})
114129
}
115130

@@ -135,7 +150,7 @@ impl VariantArray {
135150
// This would be a lot simpler if ShreddingState were just a pair of Option... we already
136151
// have everything we need.
137152
let inner = builder.build();
138-
let shredding_state = ShreddingState::try_new(&inner).unwrap(); // valid by construction
153+
let shredding_state = ShreddingState::try_new(metadata.clone(), value, typed_value).unwrap(); // valid by construction
139154
Self {
140155
inner,
141156
metadata,
@@ -200,7 +215,8 @@ impl VariantArray {
200215
typed_value_to_variant(typed_value, index)
201216
}
202217
}
203-
ShreddingState::AllNull { .. } => {
218+
(None, None) => {
219+
// AllNull case: neither value nor typed_value fields exist
204220
// NOTE: This handles the case where neither value nor typed_value fields exist.
205221
// For top-level variants, this returns Variant::Null (JSON null).
206222
// For shredded object fields, this technically should indicate SQL NULL,
@@ -296,11 +312,18 @@ impl ShreddedVariantFieldArray {
296312
));
297313
};
298314

315+
// Extract value and typed_value fields (metadata is not expected in ShreddedVariantFieldArray)
316+
let value = inner_struct.column_by_name("value").and_then(|col| col.as_binary_view_opt().cloned());
317+
let typed_value = inner_struct.column_by_name("typed_value").cloned();
318+
319+
// Use a dummy metadata for the constructor (ShreddedVariantFieldArray doesn't have metadata)
320+
let dummy_metadata = arrow::array::BinaryViewArray::new_null(inner_struct.len());
321+
299322
// Note this clone is cheap, it just bumps the ref count
300323
let inner = inner_struct.clone();
301324
Ok(Self {
302325
inner: inner.clone(),
303-
shredding_state: ShreddingState::try_new(&inner)?,
326+
shredding_state: ShreddingState::try_new(dummy_metadata, value, typed_value)?,
304327
})
305328
}
306329

@@ -398,114 +421,36 @@ impl Array for ShreddedVariantFieldArray {
398421
///
399422
/// [Parquet Variant Shredding Spec]: https://github.com/apache/parquet-format/blob/master/VariantShredding.md#value-shredding
400423
#[derive(Debug)]
401-
pub enum ShreddingState {
402-
/// This variant has no typed_value field
403-
Unshredded { value: BinaryViewArray },
404-
/// This variant has a typed_value field and no value field
405-
/// meaning it is the shredded type
406-
PerfectlyShredded { typed_value: ArrayRef },
407-
/// Imperfectly shredded: Shredded values reside in `typed_value` while those that failed to
408-
/// shred reside in `value`. Missing field values are NULL in both columns, while NULL primitive
409-
/// values have NULL `typed_value` and `Variant::Null` in `value`.
410-
///
411-
/// NOTE: A partially shredded struct is a special kind of imperfect shredding, where
412-
/// `typed_value` and `value` are both non-NULL. The `typed_value` is a struct containing the
413-
/// subset of fields for which shredding was attempted (each field will then have its own value
414-
/// and/or typed_value sub-fields that indicate how shredding actually turned out). Meanwhile,
415-
/// the `value` is a variant object containing the subset of fields for which shredding was
416-
/// not even attempted.
417-
ImperfectlyShredded {
418-
value: BinaryViewArray,
419-
typed_value: ArrayRef,
420-
},
421-
/// All values are null, only metadata is present.
422-
///
423-
/// This state occurs when neither `value` nor `typed_value` fields exist in the schema.
424-
/// Note: By strict spec interpretation, this should only be valid for shredded object fields,
425-
/// not top-level variants. However, we allow it and treat as Variant::Null for pragmatic
426-
/// handling of missing data.
427-
AllNull { metadata: BinaryViewArray },
424+
pub struct ShreddingState {
425+
pub value: Option<BinaryViewArray>,
426+
pub typed_value: Option<ArrayRef>,
428427
}
429428

430429
impl ShreddingState {
431430
/// try to create a new `ShreddingState` from the given fields
432431
pub fn try_new(
433-
metadata: BinaryViewArray,
432+
_metadata: BinaryViewArray,
434433
value: Option<BinaryViewArray>,
435434
typed_value: Option<ArrayRef>,
436435
) -> Result<Self, ArrowError> {
437-
match (metadata, value, typed_value) {
438-
(metadata, Some(value), Some(typed_value)) => Ok(Self::PartiallyShredded {
439-
metadata,
440-
value,
441-
typed_value,
442-
}),
443-
(metadata, Some(value), None) => Ok(Self::Unshredded { metadata, value }),
444-
(metadata, None, Some(typed_value)) => Ok(Self::Typed {
445-
metadata,
446-
typed_value,
447-
}),
448-
(metadata, None, None) => Ok(Self::AllNull { metadata }),
449-
}
450-
}
451-
452-
/// Return a reference to the metadata field
453-
pub fn metadata_field(&self) -> &BinaryViewArray {
454-
match self {
455-
ShreddingState::Unshredded { metadata, .. } => metadata,
456-
ShreddingState::Typed { metadata, .. } => metadata,
457-
ShreddingState::PartiallyShredded { metadata, .. } => metadata,
458-
ShreddingState::AllNull { metadata } => metadata,
459-
}
460-
461436
Ok(Self { value, typed_value })
462437
}
463438

464439
/// Return a reference to the value field, if present
465440
pub fn value_field(&self) -> Option<&BinaryViewArray> {
466-
match self {
467-
ShreddingState::Unshredded { value, .. } => Some(value),
468-
ShreddingState::Typed { .. } => None,
469-
ShreddingState::PartiallyShredded { value, .. } => Some(value),
470-
ShreddingState::AllNull { .. } => None,
471-
}
441+
self.value.as_ref()
472442
}
473443

474444
/// Return a reference to the typed_value field, if present
475445
pub fn typed_value_field(&self) -> Option<&ArrayRef> {
476-
match self {
477-
ShreddingState::Unshredded { .. } => None,
478-
ShreddingState::Typed { typed_value, .. } => Some(typed_value),
479-
ShreddingState::PartiallyShredded { typed_value, .. } => Some(typed_value),
480-
ShreddingState::AllNull { .. } => None,
481-
}
446+
self.typed_value.as_ref()
482447
}
483448

484449
/// Slice all the underlying arrays
485450
pub fn slice(&self, offset: usize, length: usize) -> Self {
486-
match self {
487-
ShreddingState::Unshredded { value } => ShreddingState::Unshredded {
488-
value: value.slice(offset, length),
489-
},
490-
ShreddingState::Typed {
491-
metadata,
492-
typed_value,
493-
} => ShreddingState::Typed {
494-
metadata: metadata.slice(offset, length),
495-
typed_value: typed_value.slice(offset, length),
496-
},
497-
ShreddingState::PartiallyShredded {
498-
metadata,
499-
value,
500-
typed_value,
501-
} => ShreddingState::PartiallyShredded {
502-
metadata: metadata.slice(offset, length),
503-
value: value.slice(offset, length),
504-
typed_value: typed_value.slice(offset, length),
505-
},
506-
ShreddingState::AllNull { metadata } => ShreddingState::AllNull {
507-
metadata: metadata.slice(offset, length),
508-
},
451+
Self {
452+
value: self.value.as_ref().map(|v| v.slice(offset, length)),
453+
typed_value: self.typed_value.as_ref().map(|tv| tv.slice(offset, length)),
509454
}
510455
}
511456
}
@@ -664,11 +609,10 @@ mod test {
664609
// This is a pragmatic decision to handle missing data gracefully.
665610
let variant_array = VariantArray::try_new(Arc::new(array)).unwrap();
666611

667-
// Verify the shredding state is AllNull
668-
assert!(matches!(
669-
variant_array.shredding_state(),
670-
ShreddingState::AllNull { .. }
671-
));
612+
// Verify the shredding state is AllNull (both value and typed_value are None)
613+
let shredding_state = variant_array.shredding_state();
614+
assert!(shredding_state.value_field().is_none());
615+
assert!(shredding_state.typed_value_field().is_none());
672616

673617
// Verify that value() returns Variant::Null (compensating for spec violation)
674618
for i in 0..variant_array.len() {
@@ -727,13 +671,11 @@ mod test {
727671
let metadata = BinaryViewArray::from(vec![b"test" as &[u8]]);
728672
let shredding_state = ShreddingState::try_new(metadata.clone(), None, None).unwrap();
729673

730-
assert!(matches!(shredding_state, ShreddingState::AllNull { .. }));
674+
// Verify the shredding state is AllNull (both value and typed_value are None)
675+
assert!(shredding_state.value_field().is_none());
676+
assert!(shredding_state.typed_value_field().is_none());
731677

732-
// Verify metadata is preserved correctly
733-
if let ShreddingState::AllNull { metadata: m } = shredding_state {
734-
assert_eq!(m.len(), metadata.len());
735-
assert_eq!(m.value(0), metadata.value(0));
736-
}
678+
// Note: metadata is no longer stored in the struct, it's passed to try_new but not stored
737679
}
738680

739681
#[test]
@@ -746,11 +688,10 @@ mod test {
746688

747689
let variant_array = VariantArray::try_new(Arc::new(struct_array)).unwrap();
748690

749-
// Verify the shredding state is AllNull
750-
assert!(matches!(
751-
variant_array.shredding_state(),
752-
ShreddingState::AllNull { .. }
753-
));
691+
// Verify the shredding state is AllNull (both value and typed_value are None)
692+
let shredding_state = variant_array.shredding_state();
693+
assert!(shredding_state.value_field().is_none());
694+
assert!(shredding_state.typed_value_field().is_none());
754695

755696
// Verify all values are null
756697
assert_eq!(variant_array.len(), 3);
@@ -797,9 +738,8 @@ mod test {
797738
let variant_array = VariantArray::try_new(Arc::new(struct_array)).unwrap();
798739

799740
// This should be Unshredded, not AllNull, because value field exists in schema
800-
assert!(matches!(
801-
variant_array.shredding_state(),
802-
ShreddingState::Unshredded { .. }
803-
));
741+
let shredding_state = variant_array.shredding_state();
742+
assert!(shredding_state.value_field().is_some());
743+
assert!(shredding_state.typed_value_field().is_none());
804744
}
805745
}

0 commit comments

Comments
 (0)