Skip to content

Commit a961b17

Browse files
Impl PartialEq for VariantObject
1 parent 03a837e commit a961b17

File tree

3 files changed

+109
-33
lines changed

3 files changed

+109
-33
lines changed

parquet-variant/src/builder.rs

Lines changed: 79 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
1818
use crate::{
19-
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantMetadata,
19+
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
20+
VariantMetadata, VariantObject,
2021
};
2122
use arrow_schema::ArrowError;
2223
use indexmap::{IndexMap, IndexSet};
23-
use std::collections::{HashMap, HashSet};
24+
use std::collections::HashSet;
2425

2526
const BASIC_TYPE_BITS: u8 = 2;
2627
const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
@@ -216,6 +217,57 @@ impl ValueBuffer {
216217
self.append_slice(value.as_bytes());
217218
}
218219

220+
fn append_object(&mut self, metadata_builder: &mut MetadataBuilder, obj: VariantObject) {
221+
let mut object_builder = self.new_object(metadata_builder);
222+
223+
for (field_name, value) in obj.iter() {
224+
object_builder.insert(field_name, value);
225+
}
226+
227+
object_builder.finish().unwrap();
228+
}
229+
230+
fn try_append_object(
231+
&mut self,
232+
metadata_builder: &mut MetadataBuilder,
233+
obj: VariantObject,
234+
) -> Result<(), ArrowError> {
235+
let mut object_builder = self.new_object(metadata_builder);
236+
237+
for res in obj.iter_try() {
238+
let (field_name, value) = res?;
239+
object_builder.try_insert(field_name, value)?;
240+
}
241+
242+
object_builder.finish()?;
243+
244+
Ok(())
245+
}
246+
247+
fn append_list(&mut self, metadata_builder: &mut MetadataBuilder, list: VariantList) {
248+
let mut list_builder = self.new_list(metadata_builder);
249+
for value in list.iter() {
250+
list_builder.append_value(value);
251+
}
252+
list_builder.finish();
253+
}
254+
255+
fn try_append_list(
256+
&mut self,
257+
metadata_builder: &mut MetadataBuilder,
258+
list: VariantList,
259+
) -> Result<(), ArrowError> {
260+
let mut list_builder = self.new_list(metadata_builder);
261+
for res in list.iter_try() {
262+
let value = res?;
263+
list_builder.try_append_value(value)?;
264+
}
265+
266+
list_builder.finish();
267+
268+
Ok(())
269+
}
270+
219271
fn offset(&self) -> usize {
220272
self.0.len()
221273
}
@@ -252,9 +304,31 @@ impl ValueBuffer {
252304
variant: Variant<'m, 'd>,
253305
metadata_builder: &mut MetadataBuilder,
254306
) {
255-
self.try_append_variant(variant, metadata_builder).unwrap();
307+
match variant {
308+
Variant::Null => self.append_null(),
309+
Variant::BooleanTrue => self.append_bool(true),
310+
Variant::BooleanFalse => self.append_bool(false),
311+
Variant::Int8(v) => self.append_int8(v),
312+
Variant::Int16(v) => self.append_int16(v),
313+
Variant::Int32(v) => self.append_int32(v),
314+
Variant::Int64(v) => self.append_int64(v),
315+
Variant::Date(v) => self.append_date(v),
316+
Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
317+
Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v),
318+
Variant::Decimal4(decimal4) => self.append_decimal4(decimal4),
319+
Variant::Decimal8(decimal8) => self.append_decimal8(decimal8),
320+
Variant::Decimal16(decimal16) => self.append_decimal16(decimal16),
321+
Variant::Float(v) => self.append_float(v),
322+
Variant::Double(v) => self.append_double(v),
323+
Variant::Binary(v) => self.append_binary(v),
324+
Variant::String(s) => self.append_string(s),
325+
Variant::ShortString(s) => self.append_short_string(s),
326+
Variant::Object(obj) => self.append_object(metadata_builder, obj),
327+
Variant::List(list) => self.append_list(metadata_builder, list),
328+
}
256329
}
257330

331+
/// Appends a variant to the buffer
258332
fn try_append_variant<'m, 'd>(
259333
&mut self,
260334
variant: Variant<'m, 'd>,
@@ -279,35 +353,8 @@ impl ValueBuffer {
279353
Variant::Binary(v) => self.append_binary(v),
280354
Variant::String(s) => self.append_string(s),
281355
Variant::ShortString(s) => self.append_short_string(s),
282-
Variant::Object(obj) => {
283-
let metadata_field_names = metadata_builder
284-
.field_names
285-
.iter()
286-
.enumerate()
287-
.map(|(i, f)| (f.clone(), i))
288-
.collect::<HashMap<_, _>>();
289-
290-
let mut object_builder = self.new_object(metadata_builder);
291-
292-
// first add all object fields that exist in metadata builder
293-
let mut object_fields = obj.iter().collect::<Vec<_>>();
294-
295-
object_fields
296-
.sort_by_key(|(field_name, _)| metadata_field_names.get(field_name as &str));
297-
298-
for (field_name, value) in object_fields {
299-
object_builder.insert(field_name, value);
300-
}
301-
302-
object_builder.finish()?;
303-
}
304-
Variant::List(list) => {
305-
let mut list_builder = self.new_list(metadata_builder);
306-
for value in list.iter() {
307-
list_builder.append_value(value);
308-
}
309-
list_builder.finish();
310-
}
356+
Variant::Object(obj) => self.try_append_object(metadata_builder, obj)?,
357+
Variant::List(list) => self.try_append_list(metadata_builder, list)?,
311358
}
312359

313360
Ok(())

parquet-variant/src/variant/metadata.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,8 @@ impl std::ops::Index<usize> for VariantMetadata<'_> {
346346

347347
#[cfg(test)]
348348
mod tests {
349+
use crate::{Variant, VariantBuilder};
350+
349351
use super::*;
350352

351353
/// `"cat"`, `"dog"` – valid metadata

parquet-variant/src/variant/object.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
// KIND, either express or implied. See the License for the
1515
// specific language governing permissions and limitations
1616
// under the License.
17+
1718
use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
1819
use crate::utils::{
1920
first_byte_from_slice, overflow_error, slice_from_slice, try_binary_search_range_by,
2021
};
2122
use crate::variant::{Variant, VariantMetadata};
23+
use std::collections::HashMap;
2224

2325
use arrow_schema::ArrowError;
2426

@@ -114,7 +116,7 @@ impl VariantObjectHeader {
114116
///
115117
/// [valid]: VariantMetadata#Validation
116118
/// [Variant spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#value-data-for-object-basic_type2
117-
#[derive(Debug, Clone, PartialEq)]
119+
#[derive(Debug, Clone)]
118120
pub struct VariantObject<'m, 'v> {
119121
pub metadata: VariantMetadata<'m>,
120122
pub value: &'v [u8],
@@ -387,6 +389,31 @@ impl<'m, 'v> VariantObject<'m, 'v> {
387389
}
388390
}
389391

392+
impl<'m, 'v> PartialEq for VariantObject<'m, 'v> {
393+
fn eq(&self, other: &Self) -> bool {
394+
let mut is_equal = self.metadata == other.metadata
395+
&& self.header == other.header
396+
&& self.num_elements == other.num_elements
397+
&& self.first_field_offset_byte == other.first_field_offset_byte
398+
&& self.first_value_byte == other.first_value_byte
399+
&& self.validated == other.validated;
400+
401+
// value validation
402+
let other_fields: HashMap<&str, Variant> = HashMap::from_iter(other.iter());
403+
404+
for (field_name, variant) in self.iter() {
405+
match other_fields.get(field_name as &str) {
406+
Some(other_variant) => {
407+
is_equal = is_equal && variant == *other_variant;
408+
}
409+
None => return false,
410+
}
411+
}
412+
413+
is_equal
414+
}
415+
}
416+
390417
#[cfg(test)]
391418
mod tests {
392419
use crate::VariantBuilder;

0 commit comments

Comments
 (0)