@@ -167,9 +167,10 @@ impl VariantParser {
167
167
pub fn parse_short_string_header ( header_byte : u8 ) -> Result < ShortStringHeader , ArrowError > {
168
168
let length = ( header_byte >> 2 ) as usize ;
169
169
170
- if length > 13 {
170
+ // Short strings can be up to 64 bytes (6-bit value: 0-63)
171
+ if length > 63 {
171
172
return Err ( ArrowError :: InvalidArgumentError ( format ! (
172
- "Short string length {} exceeds maximum of 13 " ,
173
+ "Short string length {} exceeds maximum of 63 " ,
173
174
length
174
175
) ) ) ;
175
176
}
@@ -307,22 +308,23 @@ impl VariantParser {
307
308
}
308
309
309
310
/// Get the data length for a primitive type
310
- pub fn get_primitive_data_length ( primitive_type : & PrimitiveType ) -> usize {
311
+ /// Returns Some(len) for fixed-length types, None for variable-length types
312
+ pub fn get_primitive_data_length ( primitive_type : & PrimitiveType ) -> Option < usize > {
311
313
match primitive_type {
312
- PrimitiveType :: Null | PrimitiveType :: True | PrimitiveType :: False => 0 ,
313
- PrimitiveType :: Int8 => 1 ,
314
- PrimitiveType :: Int16 => 2 ,
314
+ PrimitiveType :: Null | PrimitiveType :: True | PrimitiveType :: False => Some ( 0 ) ,
315
+ PrimitiveType :: Int8 => Some ( 1 ) ,
316
+ PrimitiveType :: Int16 => Some ( 2 ) ,
315
317
PrimitiveType :: Int32
316
318
| PrimitiveType :: Float
317
319
| PrimitiveType :: Decimal4
318
- | PrimitiveType :: Date => 4 ,
320
+ | PrimitiveType :: Date => Some ( 4 ) ,
319
321
PrimitiveType :: Int64
320
322
| PrimitiveType :: Double
321
323
| PrimitiveType :: Decimal8
322
324
| PrimitiveType :: TimestampNtz
323
- | PrimitiveType :: TimestampLtz => 8 ,
324
- PrimitiveType :: Decimal16 => 16 ,
325
- PrimitiveType :: Binary | PrimitiveType :: String => 0 , // Variable length, need to read from data
325
+ | PrimitiveType :: TimestampLtz => Some ( 8 ) ,
326
+ PrimitiveType :: Decimal16 => Some ( 16 ) ,
327
+ PrimitiveType :: Binary | PrimitiveType :: String => None , // Variable length, need to read from data
326
328
}
327
329
}
328
330
@@ -357,43 +359,41 @@ impl VariantParser {
357
359
let primitive_type = Self :: parse_primitive_header ( value_bytes[ 0 ] ) ?;
358
360
let data_length = Self :: get_primitive_data_length ( & primitive_type) ;
359
361
360
- if data_length == 0 {
361
- // Handle variable length types and null/boolean
362
- match primitive_type {
363
- PrimitiveType :: Null | PrimitiveType :: True | PrimitiveType :: False => Ok ( & [ ] ) ,
364
- PrimitiveType :: Binary | PrimitiveType :: String => {
365
- // These require reading length from the data
366
- if value_bytes. len ( ) < 5 {
367
- return Err ( ArrowError :: InvalidArgumentError (
368
- "Not enough bytes for variable length primitive" . to_string ( ) ,
369
- ) ) ;
370
- }
371
- let length = u32:: from_le_bytes ( [
372
- value_bytes[ 1 ] ,
373
- value_bytes[ 2 ] ,
374
- value_bytes[ 3 ] ,
375
- value_bytes[ 4 ] ,
376
- ] ) as usize ;
377
- if value_bytes. len ( ) < 5 + length {
378
- return Err ( ArrowError :: InvalidArgumentError (
379
- "Variable length primitive data exceeds available bytes" . to_string ( ) ,
380
- ) ) ;
381
- }
382
- Ok ( & value_bytes[ 5 ..5 + length] )
362
+ match data_length {
363
+ Some ( 0 ) => {
364
+ // Fixed-length 0-byte types (null/true/false)
365
+ Ok ( & [ ] )
366
+ }
367
+ Some ( len) => {
368
+ // Fixed-length types with len bytes
369
+ if value_bytes. len ( ) < 1 + len {
370
+ return Err ( ArrowError :: InvalidArgumentError ( format ! (
371
+ "Fixed length primitive data length {} exceeds available bytes" ,
372
+ len
373
+ ) ) ) ;
383
374
}
384
- _ => Err ( ArrowError :: InvalidArgumentError ( format ! (
385
- "Unhandled primitive type: {:?}" ,
386
- primitive_type
387
- ) ) ) ,
375
+ Ok ( & value_bytes[ 1 ..1 + len] )
388
376
}
389
- } else {
390
- if value_bytes. len ( ) < 1 + data_length {
391
- return Err ( ArrowError :: InvalidArgumentError ( format ! (
392
- "Primitive data length {} exceeds available bytes" ,
393
- data_length
394
- ) ) ) ;
377
+ None => {
378
+ // Variable-length types (binary/string) - read length from data
379
+ if value_bytes. len ( ) < 5 {
380
+ return Err ( ArrowError :: InvalidArgumentError (
381
+ "Not enough bytes for variable length primitive" . to_string ( ) ,
382
+ ) ) ;
383
+ }
384
+ let length = u32:: from_le_bytes ( [
385
+ value_bytes[ 1 ] ,
386
+ value_bytes[ 2 ] ,
387
+ value_bytes[ 3 ] ,
388
+ value_bytes[ 4 ] ,
389
+ ] ) as usize ;
390
+ if value_bytes. len ( ) < 5 + length {
391
+ return Err ( ArrowError :: InvalidArgumentError (
392
+ "Variable length primitive data exceeds available bytes" . to_string ( ) ,
393
+ ) ) ;
394
+ }
395
+ Ok ( & value_bytes[ 5 ..5 + length] )
395
396
}
396
- Ok ( & value_bytes[ 1 ..1 + data_length] )
397
397
}
398
398
}
399
399
@@ -500,14 +500,17 @@ mod tests {
500
500
ShortStringHeader { length: 5 }
501
501
) ;
502
502
503
- // Test 13 -length short string (maximum)
503
+ // Test 63 -length short string (maximum for 6-bit value )
504
504
assert_eq ! (
505
- VariantParser :: parse_short_string_header( 0b00110101 ) . unwrap( ) ,
506
- ShortStringHeader { length: 13 }
505
+ VariantParser :: parse_short_string_header( 0b11111101 ) . unwrap( ) ,
506
+ ShortStringHeader { length: 63 }
507
507
) ;
508
508
509
- // Test invalid length > 13
510
- assert ! ( VariantParser :: parse_short_string_header( 0b00111001 ) . is_err( ) ) ;
509
+ // Test that all values 0-63 are valid
510
+ for length in 0 ..=63 {
511
+ let header_byte = ( length << 2 ) | 1 ; // short string type
512
+ assert ! ( VariantParser :: parse_short_string_header( header_byte as u8 ) . is_ok( ) ) ;
513
+ }
511
514
}
512
515
513
516
#[ test]
@@ -564,50 +567,55 @@ mod tests {
564
567
565
568
#[ test]
566
569
fn test_get_primitive_data_length ( ) {
570
+ // Test fixed-length 0-byte types
567
571
assert_eq ! (
568
572
VariantParser :: get_primitive_data_length( & PrimitiveType :: Null ) ,
569
- 0
573
+ Some ( 0 )
570
574
) ;
571
575
assert_eq ! (
572
576
VariantParser :: get_primitive_data_length( & PrimitiveType :: True ) ,
573
- 0
577
+ Some ( 0 )
574
578
) ;
575
579
assert_eq ! (
576
580
VariantParser :: get_primitive_data_length( & PrimitiveType :: False ) ,
577
- 0
581
+ Some ( 0 )
578
582
) ;
583
+
584
+ // Test fixed-length types with specific byte counts
579
585
assert_eq ! (
580
586
VariantParser :: get_primitive_data_length( & PrimitiveType :: Int8 ) ,
581
- 1
587
+ Some ( 1 )
582
588
) ;
583
589
assert_eq ! (
584
590
VariantParser :: get_primitive_data_length( & PrimitiveType :: Int16 ) ,
585
- 2
591
+ Some ( 2 )
586
592
) ;
587
593
assert_eq ! (
588
594
VariantParser :: get_primitive_data_length( & PrimitiveType :: Int32 ) ,
589
- 4
595
+ Some ( 4 )
590
596
) ;
591
597
assert_eq ! (
592
598
VariantParser :: get_primitive_data_length( & PrimitiveType :: Int64 ) ,
593
- 8
599
+ Some ( 8 )
594
600
) ;
595
601
assert_eq ! (
596
602
VariantParser :: get_primitive_data_length( & PrimitiveType :: Double ) ,
597
- 8
603
+ Some ( 8 )
598
604
) ;
599
605
assert_eq ! (
600
606
VariantParser :: get_primitive_data_length( & PrimitiveType :: Decimal16 ) ,
601
- 16
607
+ Some ( 16 )
602
608
) ;
609
+
610
+ // Test variable-length types (should return None)
603
611
assert_eq ! (
604
612
VariantParser :: get_primitive_data_length( & PrimitiveType :: Binary ) ,
605
- 0
606
- ) ; // Variable length
613
+ None
614
+ ) ;
607
615
assert_eq ! (
608
616
VariantParser :: get_primitive_data_length( & PrimitiveType :: String ) ,
609
- 0
610
- ) ; // Variable length
617
+ None
618
+ ) ;
611
619
}
612
620
613
621
#[ test]
0 commit comments