@@ -148,7 +148,7 @@ impl<'a> TryFrom<&Schema<'a>> for AvroField {
148
148
match schema {
149
149
Schema :: Complex ( ComplexType :: Record ( r) ) => {
150
150
let mut resolver = Resolver :: default ( ) ;
151
- let data_type = make_data_type ( schema, None , & mut resolver, false ) ?;
151
+ let data_type = make_data_type ( schema, None , & mut resolver, false , false ) ?;
152
152
Ok ( AvroField {
153
153
data_type,
154
154
name : r. name . to_string ( ) ,
@@ -161,6 +161,60 @@ impl<'a> TryFrom<&Schema<'a>> for AvroField {
161
161
}
162
162
}
163
163
164
+ /// Builder for an [`AvroField`]
165
+ #[ derive( Debug ) ]
166
+ pub struct AvroFieldBuilder < ' a > {
167
+ schema : & ' a Schema < ' a > ,
168
+ use_utf8view : bool ,
169
+ strict_mode : bool ,
170
+ }
171
+
172
+ impl < ' a > AvroFieldBuilder < ' a > {
173
+ /// Creates a new [`AvroFieldBuilder`]
174
+ pub fn new ( schema : & ' a Schema < ' a > ) -> Self {
175
+ Self {
176
+ schema,
177
+ use_utf8view : false ,
178
+ strict_mode : false ,
179
+ }
180
+ }
181
+
182
+ /// Enable or disable Utf8View support
183
+ pub fn with_utf8view ( mut self , use_utf8view : bool ) -> Self {
184
+ self . use_utf8view = use_utf8view;
185
+ self
186
+ }
187
+
188
+ /// Enable or disable strict mode.
189
+ pub fn with_strict_mode ( mut self , strict_mode : bool ) -> Self {
190
+ self . strict_mode = strict_mode;
191
+ self
192
+ }
193
+
194
+ /// Build an [`AvroField`] from the builder
195
+ pub fn build ( self ) -> Result < AvroField , ArrowError > {
196
+ match self . schema {
197
+ Schema :: Complex ( ComplexType :: Record ( r) ) => {
198
+ let mut resolver = Resolver :: default ( ) ;
199
+ let data_type = make_data_type (
200
+ self . schema ,
201
+ None ,
202
+ & mut resolver,
203
+ self . use_utf8view ,
204
+ self . strict_mode ,
205
+ ) ?;
206
+ Ok ( AvroField {
207
+ name : r. name . to_string ( ) ,
208
+ data_type,
209
+ } )
210
+ }
211
+ _ => Err ( ArrowError :: ParseError ( format ! (
212
+ "Expected a Record schema to build an AvroField, but got {:?}" ,
213
+ self . schema
214
+ ) ) ) ,
215
+ }
216
+ }
217
+ }
164
218
/// An Avro encoding
165
219
///
166
220
/// <https://avro.apache.org/docs/1.11.1/specification/#encodings>
@@ -409,6 +463,7 @@ fn make_data_type<'a>(
409
463
namespace : Option < & ' a str > ,
410
464
resolver : & mut Resolver < ' a > ,
411
465
use_utf8view : bool ,
466
+ strict_mode : bool ,
412
467
) -> Result < AvroDataType , ArrowError > {
413
468
match schema {
414
469
Schema :: TypeName ( TypeName :: Primitive ( p) ) => {
@@ -428,12 +483,20 @@ fn make_data_type<'a>(
428
483
. position ( |x| x == & Schema :: TypeName ( TypeName :: Primitive ( PrimitiveType :: Null ) ) ) ;
429
484
match ( f. len ( ) == 2 , null) {
430
485
( true , Some ( 0 ) ) => {
431
- let mut field = make_data_type ( & f[ 1 ] , namespace, resolver, use_utf8view) ?;
486
+ let mut field =
487
+ make_data_type ( & f[ 1 ] , namespace, resolver, use_utf8view, strict_mode) ?;
432
488
field. nullability = Some ( Nullability :: NullFirst ) ;
433
489
Ok ( field)
434
490
}
435
491
( true , Some ( 1 ) ) => {
436
- let mut field = make_data_type ( & f[ 0 ] , namespace, resolver, use_utf8view) ?;
492
+ if strict_mode {
493
+ return Err ( ArrowError :: SchemaError (
494
+ "Found Avro union of the form ['T','null'], which is disallowed in strict_mode"
495
+ . to_string ( ) ,
496
+ ) ) ;
497
+ }
498
+ let mut field =
499
+ make_data_type ( & f[ 0 ] , namespace, resolver, use_utf8view, strict_mode) ?;
437
500
field. nullability = Some ( Nullability :: NullSecond ) ;
438
501
Ok ( field)
439
502
}
@@ -456,6 +519,7 @@ fn make_data_type<'a>(
456
519
namespace,
457
520
resolver,
458
521
use_utf8view,
522
+ strict_mode,
459
523
) ?,
460
524
} )
461
525
} )
@@ -469,8 +533,13 @@ fn make_data_type<'a>(
469
533
Ok ( field)
470
534
}
471
535
ComplexType :: Array ( a) => {
472
- let mut field =
473
- make_data_type ( a. items . as_ref ( ) , namespace, resolver, use_utf8view) ?;
536
+ let mut field = make_data_type (
537
+ a. items . as_ref ( ) ,
538
+ namespace,
539
+ resolver,
540
+ use_utf8view,
541
+ strict_mode,
542
+ ) ?;
474
543
Ok ( AvroDataType {
475
544
nullability : None ,
476
545
metadata : a. attributes . field_metadata ( ) ,
@@ -535,7 +604,8 @@ fn make_data_type<'a>(
535
604
Ok ( field)
536
605
}
537
606
ComplexType :: Map ( m) => {
538
- let val = make_data_type ( & m. values , namespace, resolver, use_utf8view) ?;
607
+ let val =
608
+ make_data_type ( & m. values , namespace, resolver, use_utf8view, strict_mode) ?;
539
609
Ok ( AvroDataType {
540
610
nullability : None ,
541
611
metadata : m. attributes . field_metadata ( ) ,
@@ -549,6 +619,7 @@ fn make_data_type<'a>(
549
619
namespace,
550
620
resolver,
551
621
use_utf8view,
622
+ strict_mode,
552
623
) ?;
553
624
554
625
// https://avro.apache.org/docs/1.11.1/specification/#logical-types
@@ -630,7 +701,7 @@ mod tests {
630
701
let schema = create_schema_with_logical_type ( PrimitiveType :: Int , "date" ) ;
631
702
632
703
let mut resolver = Resolver :: default ( ) ;
633
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
704
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
634
705
635
706
assert ! ( matches!( result. codec, Codec :: Date32 ) ) ;
636
707
}
@@ -640,7 +711,7 @@ mod tests {
640
711
let schema = create_schema_with_logical_type ( PrimitiveType :: Int , "time-millis" ) ;
641
712
642
713
let mut resolver = Resolver :: default ( ) ;
643
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
714
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
644
715
645
716
assert ! ( matches!( result. codec, Codec :: TimeMillis ) ) ;
646
717
}
@@ -650,7 +721,7 @@ mod tests {
650
721
let schema = create_schema_with_logical_type ( PrimitiveType :: Long , "time-micros" ) ;
651
722
652
723
let mut resolver = Resolver :: default ( ) ;
653
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
724
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
654
725
655
726
assert ! ( matches!( result. codec, Codec :: TimeMicros ) ) ;
656
727
}
@@ -660,7 +731,7 @@ mod tests {
660
731
let schema = create_schema_with_logical_type ( PrimitiveType :: Long , "timestamp-millis" ) ;
661
732
662
733
let mut resolver = Resolver :: default ( ) ;
663
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
734
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
664
735
665
736
assert ! ( matches!( result. codec, Codec :: TimestampMillis ( true ) ) ) ;
666
737
}
@@ -670,7 +741,7 @@ mod tests {
670
741
let schema = create_schema_with_logical_type ( PrimitiveType :: Long , "timestamp-micros" ) ;
671
742
672
743
let mut resolver = Resolver :: default ( ) ;
673
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
744
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
674
745
675
746
assert ! ( matches!( result. codec, Codec :: TimestampMicros ( true ) ) ) ;
676
747
}
@@ -680,7 +751,7 @@ mod tests {
680
751
let schema = create_schema_with_logical_type ( PrimitiveType :: Long , "local-timestamp-millis" ) ;
681
752
682
753
let mut resolver = Resolver :: default ( ) ;
683
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
754
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
684
755
685
756
assert ! ( matches!( result. codec, Codec :: TimestampMillis ( false ) ) ) ;
686
757
}
@@ -690,7 +761,7 @@ mod tests {
690
761
let schema = create_schema_with_logical_type ( PrimitiveType :: Long , "local-timestamp-micros" ) ;
691
762
692
763
let mut resolver = Resolver :: default ( ) ;
693
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
764
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
694
765
695
766
assert ! ( matches!( result. codec, Codec :: TimestampMicros ( false ) ) ) ;
696
767
}
@@ -745,7 +816,7 @@ mod tests {
745
816
let schema = create_schema_with_logical_type ( PrimitiveType :: Int , "custom-type" ) ;
746
817
747
818
let mut resolver = Resolver :: default ( ) ;
748
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
819
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
749
820
750
821
assert_eq ! (
751
822
result. metadata. get( "logicalType" ) ,
@@ -758,7 +829,7 @@ mod tests {
758
829
let schema = Schema :: TypeName ( TypeName :: Primitive ( PrimitiveType :: String ) ) ;
759
830
760
831
let mut resolver = Resolver :: default ( ) ;
761
- let result = make_data_type ( & schema, None , & mut resolver, true ) . unwrap ( ) ;
832
+ let result = make_data_type ( & schema, None , & mut resolver, true , false ) . unwrap ( ) ;
762
833
763
834
assert ! ( matches!( result. codec, Codec :: Utf8View ) ) ;
764
835
}
@@ -768,7 +839,7 @@ mod tests {
768
839
let schema = Schema :: TypeName ( TypeName :: Primitive ( PrimitiveType :: String ) ) ;
769
840
770
841
let mut resolver = Resolver :: default ( ) ;
771
- let result = make_data_type ( & schema, None , & mut resolver, false ) . unwrap ( ) ;
842
+ let result = make_data_type ( & schema, None , & mut resolver, false , false ) . unwrap ( ) ;
772
843
773
844
assert ! ( matches!( result. codec, Codec :: Utf8 ) ) ;
774
845
}
@@ -796,7 +867,7 @@ mod tests {
796
867
let schema = Schema :: Complex ( ComplexType :: Record ( record) ) ;
797
868
798
869
let mut resolver = Resolver :: default ( ) ;
799
- let result = make_data_type ( & schema, None , & mut resolver, true ) . unwrap ( ) ;
870
+ let result = make_data_type ( & schema, None , & mut resolver, true , false ) . unwrap ( ) ;
800
871
801
872
if let Codec :: Struct ( fields) = & result. codec {
802
873
let first_field_codec = & fields[ 0 ] . data_type ( ) . codec ;
@@ -805,4 +876,25 @@ mod tests {
805
876
panic ! ( "Expected Struct codec" ) ;
806
877
}
807
878
}
879
+
880
+ #[ test]
881
+ fn test_union_with_strict_mode ( ) {
882
+ let schema = Schema :: Union ( vec ! [
883
+ Schema :: TypeName ( TypeName :: Primitive ( PrimitiveType :: String ) ) ,
884
+ Schema :: TypeName ( TypeName :: Primitive ( PrimitiveType :: Null ) ) ,
885
+ ] ) ;
886
+
887
+ let mut resolver = Resolver :: default ( ) ;
888
+ let result = make_data_type ( & schema, None , & mut resolver, false , true ) ;
889
+
890
+ assert ! ( result. is_err( ) ) ;
891
+ match result {
892
+ Err ( ArrowError :: SchemaError ( msg) ) => {
893
+ assert ! ( msg. contains(
894
+ "Found Avro union of the form ['T','null'], which is disallowed in strict_mode"
895
+ ) ) ;
896
+ }
897
+ _ => panic ! ( "Expected SchemaError" ) ,
898
+ }
899
+ }
808
900
}
0 commit comments