Skip to content

Commit b67b6c7

Browse files
Impl PartialEq for VariantObject
1 parent 03a837e commit b67b6c7

File tree

3 files changed

+280
-33
lines changed

3 files changed

+280
-33
lines changed

parquet-variant/src/builder.rs

Lines changed: 79 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
1818
use crate::{
19-
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantMetadata,
19+
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
20+
VariantMetadata, VariantObject,
2021
};
2122
use arrow_schema::ArrowError;
2223
use indexmap::{IndexMap, IndexSet};
23-
use std::collections::{HashMap, HashSet};
24+
use std::collections::HashSet;
2425

2526
const BASIC_TYPE_BITS: u8 = 2;
2627
const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
@@ -216,6 +217,57 @@ impl ValueBuffer {
216217
self.append_slice(value.as_bytes());
217218
}
218219

220+
fn append_object(&mut self, metadata_builder: &mut MetadataBuilder, obj: VariantObject) {
221+
let mut object_builder = self.new_object(metadata_builder);
222+
223+
for (field_name, value) in obj.iter() {
224+
object_builder.insert(field_name, value);
225+
}
226+
227+
object_builder.finish().unwrap();
228+
}
229+
230+
fn try_append_object(
231+
&mut self,
232+
metadata_builder: &mut MetadataBuilder,
233+
obj: VariantObject,
234+
) -> Result<(), ArrowError> {
235+
let mut object_builder = self.new_object(metadata_builder);
236+
237+
for res in obj.iter_try() {
238+
let (field_name, value) = res?;
239+
object_builder.try_insert(field_name, value)?;
240+
}
241+
242+
object_builder.finish()?;
243+
244+
Ok(())
245+
}
246+
247+
fn append_list(&mut self, metadata_builder: &mut MetadataBuilder, list: VariantList) {
248+
let mut list_builder = self.new_list(metadata_builder);
249+
for value in list.iter() {
250+
list_builder.append_value(value);
251+
}
252+
list_builder.finish();
253+
}
254+
255+
fn try_append_list(
256+
&mut self,
257+
metadata_builder: &mut MetadataBuilder,
258+
list: VariantList,
259+
) -> Result<(), ArrowError> {
260+
let mut list_builder = self.new_list(metadata_builder);
261+
for res in list.iter_try() {
262+
let value = res?;
263+
list_builder.try_append_value(value)?;
264+
}
265+
266+
list_builder.finish();
267+
268+
Ok(())
269+
}
270+
219271
fn offset(&self) -> usize {
220272
self.0.len()
221273
}
@@ -252,9 +304,31 @@ impl ValueBuffer {
252304
variant: Variant<'m, 'd>,
253305
metadata_builder: &mut MetadataBuilder,
254306
) {
255-
self.try_append_variant(variant, metadata_builder).unwrap();
307+
match variant {
308+
Variant::Null => self.append_null(),
309+
Variant::BooleanTrue => self.append_bool(true),
310+
Variant::BooleanFalse => self.append_bool(false),
311+
Variant::Int8(v) => self.append_int8(v),
312+
Variant::Int16(v) => self.append_int16(v),
313+
Variant::Int32(v) => self.append_int32(v),
314+
Variant::Int64(v) => self.append_int64(v),
315+
Variant::Date(v) => self.append_date(v),
316+
Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
317+
Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v),
318+
Variant::Decimal4(decimal4) => self.append_decimal4(decimal4),
319+
Variant::Decimal8(decimal8) => self.append_decimal8(decimal8),
320+
Variant::Decimal16(decimal16) => self.append_decimal16(decimal16),
321+
Variant::Float(v) => self.append_float(v),
322+
Variant::Double(v) => self.append_double(v),
323+
Variant::Binary(v) => self.append_binary(v),
324+
Variant::String(s) => self.append_string(s),
325+
Variant::ShortString(s) => self.append_short_string(s),
326+
Variant::Object(obj) => self.append_object(metadata_builder, obj),
327+
Variant::List(list) => self.append_list(metadata_builder, list),
328+
}
256329
}
257330

331+
/// Appends a variant to the buffer
258332
fn try_append_variant<'m, 'd>(
259333
&mut self,
260334
variant: Variant<'m, 'd>,
@@ -279,35 +353,8 @@ impl ValueBuffer {
279353
Variant::Binary(v) => self.append_binary(v),
280354
Variant::String(s) => self.append_string(s),
281355
Variant::ShortString(s) => self.append_short_string(s),
282-
Variant::Object(obj) => {
283-
let metadata_field_names = metadata_builder
284-
.field_names
285-
.iter()
286-
.enumerate()
287-
.map(|(i, f)| (f.clone(), i))
288-
.collect::<HashMap<_, _>>();
289-
290-
let mut object_builder = self.new_object(metadata_builder);
291-
292-
// first add all object fields that exist in metadata builder
293-
let mut object_fields = obj.iter().collect::<Vec<_>>();
294-
295-
object_fields
296-
.sort_by_key(|(field_name, _)| metadata_field_names.get(field_name as &str));
297-
298-
for (field_name, value) in object_fields {
299-
object_builder.insert(field_name, value);
300-
}
301-
302-
object_builder.finish()?;
303-
}
304-
Variant::List(list) => {
305-
let mut list_builder = self.new_list(metadata_builder);
306-
for value in list.iter() {
307-
list_builder.append_value(value);
308-
}
309-
list_builder.finish();
310-
}
356+
Variant::Object(obj) => self.try_append_object(metadata_builder, obj)?,
357+
Variant::List(list) => self.try_append_list(metadata_builder, list)?,
311358
}
312359

313360
Ok(())

parquet-variant/src/variant/metadata.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,7 @@ impl std::ops::Index<usize> for VariantMetadata<'_> {
346346

347347
#[cfg(test)]
348348
mod tests {
349+
349350
use super::*;
350351

351352
/// `"cat"`, `"dog"` – valid metadata

parquet-variant/src/variant/object.rs

Lines changed: 200 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
// KIND, either express or implied. See the License for the
1515
// specific language governing permissions and limitations
1616
// under the License.
17+
1718
use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
1819
use crate::utils::{
1920
first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
2021
};
2122
use crate::variant::{Variant, VariantMetadata};
23+
use std::collections::HashMap;
2224

2325
use arrow_schema::ArrowError;
2426

@@ -114,7 +116,7 @@ impl VariantObjectHeader {
114116
///
115117
/// [valid]: VariantMetadata#Validation
116118
/// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-object-basic_type2
117-
#[derive(Debug, Clone, PartialEq)]
119+
#[derive(Debug, Clone)]
118120
pub struct VariantObject<'m, 'v> {
119121
pub metadata: VariantMetadata<'m>,
120122
pub value: &'v [u8],
@@ -387,6 +389,31 @@ impl<'m, 'v> VariantObject<'m, 'v> {
387389
}
388390
}
389391

392+
impl<'m, 'v> PartialEq for VariantObject<'m, 'v> {
393+
fn eq(&self, other: &Self) -> bool {
394+
let mut is_equal = self.metadata == other.metadata
395+
&& self.header == other.header
396+
&& self.num_elements == other.num_elements
397+
&& self.first_field_offset_byte == other.first_field_offset_byte
398+
&& self.first_value_byte == other.first_value_byte
399+
&& self.validated == other.validated;
400+
401+
// value validation
402+
let other_fields: HashMap<&str, Variant> = HashMap::from_iter(other.iter());
403+
404+
for (field_name, variant) in self.iter() {
405+
match other_fields.get(field_name as &str) {
406+
Some(other_variant) => {
407+
is_equal = is_equal && variant == *other_variant;
408+
}
409+
None => return false,
410+
}
411+
}
412+
413+
is_equal
414+
}
415+
}
416+
390417
#[cfg(test)]
391418
mod tests {
392419
use crate::VariantBuilder;
@@ -718,4 +745,176 @@ mod tests {
718745
test_variant_object_with_large_data(16777216 + 1, OffsetSizeBytes::Four);
719746
// 2^24
720747
}
748+
749+
#[test]
750+
fn test_objects_with_same_fields_are_equal() {
751+
let mut b = VariantBuilder::new();
752+
let mut o = b.new_object();
753+
754+
o.insert("b", ());
755+
o.insert("c", ());
756+
o.insert("a", ());
757+
758+
o.finish().unwrap();
759+
760+
let (m, v) = b.finish();
761+
762+
let v1 = Variant::try_new(&m, &v).unwrap();
763+
let v2 = Variant::try_new(&m, &v).unwrap();
764+
765+
assert_eq!(v1, v2);
766+
}
767+
768+
#[test]
769+
fn test_same_objects_with_different_builder_are_equal() {
770+
let mut b = VariantBuilder::new();
771+
let mut o = b.new_object();
772+
773+
o.insert("a", ());
774+
o.insert("b", false);
775+
776+
o.finish().unwrap();
777+
let (m, v) = b.finish();
778+
779+
let v1 = Variant::try_new(&m, &v).unwrap();
780+
781+
let mut b = VariantBuilder::new();
782+
let mut o = b.new_object();
783+
784+
o.insert("a", ());
785+
o.insert("b", false);
786+
787+
o.finish().unwrap();
788+
let (m, v) = b.finish();
789+
790+
let v2 = Variant::try_new(&m, &v).unwrap();
791+
792+
assert_eq!(v1, v2);
793+
}
794+
795+
#[test]
796+
fn test_objects_with_different_values_are_not_equal() {
797+
let mut b = VariantBuilder::new();
798+
let mut o = b.new_object();
799+
800+
o.insert("a", ());
801+
o.insert("b", 4.3);
802+
803+
o.finish().unwrap();
804+
805+
let (m, v) = b.finish();
806+
807+
let v1 = Variant::try_new(&m, &v).unwrap();
808+
809+
// second object, same field name but different values
810+
let mut b = VariantBuilder::new();
811+
let mut o = b.new_object();
812+
813+
o.insert("a", ());
814+
let mut inner_o = o.new_object("b");
815+
inner_o.insert("c", 3.3);
816+
inner_o.finish().unwrap();
817+
o.finish().unwrap();
818+
819+
let (m, v) = b.finish();
820+
let v2 = Variant::try_new(&m, &v).unwrap();
821+
822+
assert_ne!(v1, v2);
823+
}
824+
825+
#[test]
826+
fn test_objects_with_different_field_names_are_not_equal() {
827+
let mut b = VariantBuilder::new();
828+
let mut o = b.new_object();
829+
830+
o.insert("a", ());
831+
o.insert("b", 4.3);
832+
833+
o.finish().unwrap();
834+
835+
let (m, v) = b.finish();
836+
837+
let v1 = Variant::try_new(&m, &v).unwrap();
838+
839+
// second object, same field name but different values
840+
let mut b = VariantBuilder::new();
841+
let mut o = b.new_object();
842+
843+
o.insert("aardvark", ());
844+
o.insert("barracuda", 3.3);
845+
846+
o.finish().unwrap();
847+
848+
let (m, v) = b.finish();
849+
let v2 = Variant::try_new(&m, &v).unwrap();
850+
851+
assert_ne!(v1, v2);
852+
}
853+
854+
#[test]
855+
fn test_objects_with_different_insertion_order_are_equal() {
856+
let mut b = VariantBuilder::new();
857+
let mut o = b.new_object();
858+
859+
o.insert("b", false);
860+
o.insert("a", ());
861+
862+
o.finish().unwrap();
863+
864+
let (m, v) = b.finish();
865+
866+
let v1 = Variant::try_new(&m, &v).unwrap();
867+
assert!(!v1.metadata().unwrap().is_sorted());
868+
869+
// create another object pre-filled with field names, b and a
870+
// but insert the fields in the order of a, b
871+
let mut b = VariantBuilder::new().with_field_names(["b", "a"].into_iter());
872+
let mut o = b.new_object();
873+
874+
o.insert("a", ());
875+
o.insert("b", false);
876+
877+
o.finish().unwrap();
878+
879+
let (m, v) = b.finish();
880+
881+
let v2 = Variant::try_new(&m, &v).unwrap();
882+
883+
// v2 should also have a unsorted dictionary
884+
assert!(!v2.metadata().unwrap().is_sorted());
885+
886+
assert_eq!(v1, v2);
887+
}
888+
889+
#[test]
890+
fn test_objects_with_differing_metadata_are_not_equal() {
891+
let mut b = VariantBuilder::new();
892+
let mut o = b.new_object();
893+
894+
o.insert("a", ());
895+
o.insert("b", 4.3);
896+
897+
o.finish().unwrap();
898+
899+
let (m, v) = b.finish();
900+
901+
let v1 = Variant::try_new(&m, &v).unwrap();
902+
assert!(v1.metadata().unwrap().is_sorted());
903+
904+
// create a second object with different insertion order
905+
let mut b = VariantBuilder::new();
906+
let mut o = b.new_object();
907+
908+
o.insert("b", 4.3);
909+
o.insert("a", ());
910+
911+
o.finish().unwrap();
912+
913+
let (m, v) = b.finish();
914+
915+
let v2 = Variant::try_new(&m, &v).unwrap();
916+
assert!(!v2.metadata().unwrap().is_sorted());
917+
918+
assert_ne!(v1, v2);
919+
}
721920
}

0 commit comments

Comments
 (0)