Skip to content

Commit 29010e4

Browse files
Impl PartialEq for VariantObject
1 parent 03a837e commit 29010e4

File tree

3 files changed

+310
-34
lines changed

3 files changed

+310
-34
lines changed

parquet-variant/src/builder.rs

Lines changed: 79 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,12 @@
1616
// under the License.
1717
use crate::decoder::{VariantBasicType, VariantPrimitiveType};
1818
use crate::{
19-
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantMetadata,
19+
ShortString, Variant, VariantDecimal16, VariantDecimal4, VariantDecimal8, VariantList,
20+
VariantMetadata, VariantObject,
2021
};
2122
use arrow_schema::ArrowError;
2223
use indexmap::{IndexMap, IndexSet};
23-
use std::collections::{HashMap, HashSet};
24+
use std::collections::HashSet;
2425

2526
const BASIC_TYPE_BITS: u8 = 2;
2627
const UNIX_EPOCH_DATE: chrono::NaiveDate = chrono::NaiveDate::from_ymd_opt(1970, 1, 1).unwrap();
@@ -216,6 +217,57 @@ impl ValueBuffer {
216217
self.append_slice(value.as_bytes());
217218
}
218219

220+
fn append_object(&mut self, metadata_builder: &mut MetadataBuilder, obj: VariantObject) {
221+
let mut object_builder = self.new_object(metadata_builder);
222+
223+
for (field_name, value) in obj.iter() {
224+
object_builder.insert(field_name, value);
225+
}
226+
227+
object_builder.finish().unwrap();
228+
}
229+
230+
fn try_append_object(
231+
&mut self,
232+
metadata_builder: &mut MetadataBuilder,
233+
obj: VariantObject,
234+
) -> Result<(), ArrowError> {
235+
let mut object_builder = self.new_object(metadata_builder);
236+
237+
for res in obj.iter_try() {
238+
let (field_name, value) = res?;
239+
object_builder.try_insert(field_name, value)?;
240+
}
241+
242+
object_builder.finish()?;
243+
244+
Ok(())
245+
}
246+
247+
fn append_list(&mut self, metadata_builder: &mut MetadataBuilder, list: VariantList) {
248+
let mut list_builder = self.new_list(metadata_builder);
249+
for value in list.iter() {
250+
list_builder.append_value(value);
251+
}
252+
list_builder.finish();
253+
}
254+
255+
fn try_append_list(
256+
&mut self,
257+
metadata_builder: &mut MetadataBuilder,
258+
list: VariantList,
259+
) -> Result<(), ArrowError> {
260+
let mut list_builder = self.new_list(metadata_builder);
261+
for res in list.iter_try() {
262+
let value = res?;
263+
list_builder.try_append_value(value)?;
264+
}
265+
266+
list_builder.finish();
267+
268+
Ok(())
269+
}
270+
219271
fn offset(&self) -> usize {
220272
self.0.len()
221273
}
@@ -252,9 +304,31 @@ impl ValueBuffer {
252304
variant: Variant<'m, 'd>,
253305
metadata_builder: &mut MetadataBuilder,
254306
) {
255-
self.try_append_variant(variant, metadata_builder).unwrap();
307+
match variant {
308+
Variant::Null => self.append_null(),
309+
Variant::BooleanTrue => self.append_bool(true),
310+
Variant::BooleanFalse => self.append_bool(false),
311+
Variant::Int8(v) => self.append_int8(v),
312+
Variant::Int16(v) => self.append_int16(v),
313+
Variant::Int32(v) => self.append_int32(v),
314+
Variant::Int64(v) => self.append_int64(v),
315+
Variant::Date(v) => self.append_date(v),
316+
Variant::TimestampMicros(v) => self.append_timestamp_micros(v),
317+
Variant::TimestampNtzMicros(v) => self.append_timestamp_ntz_micros(v),
318+
Variant::Decimal4(decimal4) => self.append_decimal4(decimal4),
319+
Variant::Decimal8(decimal8) => self.append_decimal8(decimal8),
320+
Variant::Decimal16(decimal16) => self.append_decimal16(decimal16),
321+
Variant::Float(v) => self.append_float(v),
322+
Variant::Double(v) => self.append_double(v),
323+
Variant::Binary(v) => self.append_binary(v),
324+
Variant::String(s) => self.append_string(s),
325+
Variant::ShortString(s) => self.append_short_string(s),
326+
Variant::Object(obj) => self.append_object(metadata_builder, obj),
327+
Variant::List(list) => self.append_list(metadata_builder, list),
328+
}
256329
}
257330

331+
/// Appends a variant to the buffer
258332
fn try_append_variant<'m, 'd>(
259333
&mut self,
260334
variant: Variant<'m, 'd>,
@@ -279,35 +353,8 @@ impl ValueBuffer {
279353
Variant::Binary(v) => self.append_binary(v),
280354
Variant::String(s) => self.append_string(s),
281355
Variant::ShortString(s) => self.append_short_string(s),
282-
Variant::Object(obj) => {
283-
let metadata_field_names = metadata_builder
284-
.field_names
285-
.iter()
286-
.enumerate()
287-
.map(|(i, f)| (f.clone(), i))
288-
.collect::<HashMap<_, _>>();
289-
290-
let mut object_builder = self.new_object(metadata_builder);
291-
292-
// first add all object fields that exist in metadata builder
293-
let mut object_fields = obj.iter().collect::<Vec<_>>();
294-
295-
object_fields
296-
.sort_by_key(|(field_name, _)| metadata_field_names.get(field_name as &str));
297-
298-
for (field_name, value) in object_fields {
299-
object_builder.insert(field_name, value);
300-
}
301-
302-
object_builder.finish()?;
303-
}
304-
Variant::List(list) => {
305-
let mut list_builder = self.new_list(metadata_builder);
306-
for value in list.iter() {
307-
list_builder.append_value(value);
308-
}
309-
list_builder.finish();
310-
}
356+
Variant::Object(obj) => self.try_append_object(metadata_builder, obj)?,
357+
Variant::List(list) => self.try_append_list(metadata_builder, list)?,
311358
}
312359

313360
Ok(())

parquet-variant/src/variant/metadata.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
use std::collections::HashSet;
19+
1820
use crate::decoder::{map_bytes_to_offsets, OffsetSizeBytes};
1921
use crate::utils::{first_byte_from_slice, overflow_error, slice_from_slice, string_from_slice};
2022

@@ -125,7 +127,7 @@ impl VariantMetadataHeader {
125127
///
126128
/// [`Variant`]: crate::Variant
127129
/// [Variant Spec]: https://github.com/apache/parquet-format/blob/master/VariantEncoding.md#metadata-encoding
128-
#[derive(Debug, Clone, PartialEq)]
130+
#[derive(Debug, Clone)]
129131
pub struct VariantMetadata<'m> {
130132
pub(crate) bytes: &'m [u8],
131133
header: VariantMetadataHeader,
@@ -332,6 +334,22 @@ impl<'m> VariantMetadata<'m> {
332334
}
333335
}
334336

337+
impl<'m> PartialEq for VariantMetadata<'m> {
338+
fn eq(&self, other: &Self) -> bool {
339+
let mut is_equal = self.is_empty() == other.is_empty()
340+
&& self.is_fully_validated() == other.is_fully_validated()
341+
&& self.first_value_byte == other.first_value_byte
342+
&& self.validated == other.validated;
343+
344+
let field_names: HashSet<&'m str> = HashSet::from_iter(self.iter());
345+
let other_field_names: HashSet<&'m str> = HashSet::from_iter(other.iter());
346+
347+
is_equal = is_equal && field_names == other_field_names;
348+
349+
is_equal
350+
}
351+
}
352+
335353
/// Retrieves the ith dictionary entry, panicking if the index is out of bounds. Accessing
336354
/// [unvalidated] input could also panic if the underlying bytes are invalid.
337355
///
@@ -346,6 +364,7 @@ impl std::ops::Index<usize> for VariantMetadata<'_> {
346364

347365
#[cfg(test)]
348366
mod tests {
367+
349368
use super::*;
350369

351370
/// `"cat"`, `"dog"` – valid metadata

0 commit comments

Comments
 (0)