Skip to content

Commit c40eda9

Browse files
Logically compare objects, remove custom partial eq impl for metadata
1 parent b3f5077 commit c40eda9

File tree

2 files changed

+11
-105
lines changed

2 files changed

+11
-105
lines changed

parquet-variant/src/variant/metadata.rs

Lines changed: 2 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ impl VariantMetadataHeader {
127127
///
128128
/// [`Variant`]: crate::Variant
129129
/// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
130-
#[derive(Debug, Clone)]
130+
#[derive(Debug, Clone, PartialEq)]
131131
pub struct VariantMetadata<'m> {
132132
pub(crate) bytes: &'m [u8],
133133
header: VariantMetadataHeader,
@@ -335,23 +335,6 @@ impl<'m> VariantMetadata<'m> {
335335
}
336336
}
337337

338-
// Metadata dictionaries can be in any order per the spec to allow flexible Variant construction
339-
// This comparison checks for field name presence in both metadata instances rather than raw byte equality
340-
impl<'m> PartialEq for VariantMetadata<'m> {
341-
fn eq(&self, other: &Self) -> bool {
342-
self.is_empty() == other.is_empty()
343-
&& if self.is_sorted() && other.is_sorted() {
344-
self.len() == other.len() && self.iter().eq(other.iter())
345-
} else {
346-
// can't guarantee neither field names are unique
347-
let self_field_names: HashSet<&'m str> = HashSet::from_iter(self.iter());
348-
let other_field_names: HashSet<&'m str> = HashSet::from_iter(other.iter());
349-
350-
self_field_names == other_field_names
351-
}
352-
}
353-
}
354-
355338
/// Retrieves the ith dictionary entry, panicking if the index is out of bounds. Accessing
356339
/// [unvalidated] input could also panic if the underlying bytes are invalid.
357340
///
@@ -586,42 +569,7 @@ mod tests {
586569
let m2 = VariantMetadata::try_new(&metadata_bytes).unwrap();
587570
assert!(!m2.is_sorted());
588571

589-
assert_eq!(m1, m2);
590-
}
591-
592-
#[test]
593-
fn test_compare_sorted_dictionary_with_unsorted_dictionary2() {
594-
// create a sorted object
595-
let mut b = VariantBuilder::new();
596-
let mut o = b.new_object();
597-
598-
o.insert("a", false);
599-
o.insert("b", false);
600-
601-
o.finish().unwrap();
602-
603-
let (m, _) = b.finish();
604-
605-
let m1 = VariantMetadata::new(&m);
606-
assert!(m1.is_sorted());
607-
608-
// Create metadata with an unsorted dictionary (field names are "b", "b", "a")
609-
// Since field names are not unique nor lexicographically ordered, it is considered not sorted.
610-
let metadata_bytes = vec![
611-
0b0000_0001,
612-
3, // dictionary size
613-
0, // "b"
614-
1, // "b"
615-
2, // "a"
616-
3,
617-
b'b',
618-
b'b',
619-
b'a',
620-
];
621-
let m2 = VariantMetadata::try_new(&metadata_bytes).unwrap();
622-
assert!(!m2.is_sorted());
623-
624-
assert_eq!(m1, m2);
572+
assert_ne!(m1, m2);
625573
}
626574

627575
#[test]
@@ -642,35 +590,4 @@ mod tests {
642590

643591
assert_eq!(m1, m2);
644592
}
645-
646-
#[test]
647-
fn test_compare_sorted_dictionary_with_sorted_dictionary2() {
648-
// create a sorted object
649-
let mut b = VariantBuilder::new();
650-
let mut o = b.new_object();
651-
652-
o.insert("a", false);
653-
o.insert("b", false);
654-
655-
o.finish().unwrap();
656-
657-
let (m, _) = b.finish();
658-
659-
let m1 = VariantMetadata::new(&m);
660-
661-
// create a sorted object with different field names
662-
let mut b = VariantBuilder::new();
663-
let mut o = b.new_object();
664-
665-
o.insert("c", false);
666-
o.insert("d", false);
667-
668-
o.finish().unwrap();
669-
670-
let (m, _) = b.finish();
671-
672-
let m2 = VariantMetadata::new(&m);
673-
674-
assert_ne!(m1, m2);
675-
}
676593
}

parquet-variant/src/variant/object.rs

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -413,13 +413,8 @@ impl<'m, 'v> VariantObject<'m, 'v> {
413413
// checks whether the field values are equal -- regardless of their order
414414
impl<'m, 'v> PartialEq for VariantObject<'m, 'v> {
415415
fn eq(&self, other: &Self) -> bool {
416-
let mut is_equal = self.metadata == other.metadata
417-
&& self.header == other.header
418-
&& self.num_elements == other.num_elements
419-
&& self.first_field_offset_byte == other.first_field_offset_byte
420-
&& self.first_value_byte == other.first_value_byte;
416+
let mut is_equal = self.num_elements == other.num_elements;
421417

422-
// value validation
423418
let other_fields: HashMap<&str, Variant> = HashMap::from_iter(other.iter());
424419

425420
for (field_name, variant) in self.iter() {
@@ -938,27 +933,30 @@ mod tests {
938933

939934
o.finish().unwrap();
940935

941-
let (m, v) = b.finish();
936+
let (meta1, value1) = b.finish();
942937

943-
let v1 = Variant::try_new(&m, &v).unwrap();
938+
let v1 = Variant::try_new(&meta1, &value1).unwrap();
944939
// v1 is sorted
945940
assert!(v1.metadata().unwrap().is_sorted());
946941

947942
// create a second object with different insertion order
948-
let mut b = VariantBuilder::new();
943+
let mut b = VariantBuilder::new().with_field_names(["d", "c", "b", "a"].into_iter());
949944
let mut o = b.new_object();
950945

951946
o.insert("b", 4.3);
952947
o.insert("a", ());
953948

954949
o.finish().unwrap();
955950

956-
let (m, v) = b.finish();
951+
let (meta2, value2) = b.finish();
957952

958-
let v2 = Variant::try_new(&m, &v).unwrap();
953+
let v2 = Variant::try_new(&meta2, &value2).unwrap();
959954
// v2 is not sorted
960955
assert!(!v2.metadata().unwrap().is_sorted());
961956

957+
// object metadata are not the same
958+
assert_ne!(v1.metadata(), v2.metadata());
959+
962960
// objects are still logically equal
963961
assert_eq!(v1, v2);
964962
}
@@ -976,10 +974,6 @@ mod tests {
976974

977975
let (m, v) = b.finish();
978976

979-
let variant_metadata = VariantMetadata::new(&m);
980-
981-
dbg!(variant_metadata.iter().collect::<Vec<_>>());
982-
983977
let v1 = Variant::try_new(&m, &v).unwrap();
984978

985979
// Create metadata with an unsorted dictionary (field names are "a", "a", "b")
@@ -998,12 +992,7 @@ mod tests {
998992
let m = VariantMetadata::try_new(&metadata_bytes).unwrap();
999993
assert!(!m.is_sorted());
1000994

1001-
dbg!(m.iter().collect::<Vec<_>>());
1002995
let v2 = Variant::new_with_metadata(m, &v);
1003-
1004-
dbg!(v1.as_object().unwrap().iter().collect::<Vec<_>>());
1005-
dbg!(v2.as_object().unwrap().iter().collect::<Vec<_>>());
1006-
1007996
assert_eq!(v1, v2);
1008997
}
1009998
}

0 commit comments

Comments
 (0)