diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 07058657b..2bd1dc1ee 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -378,7 +378,24 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter { DataType::Int8 | DataType::Int16 | DataType::Int32 => { Ok(Type::Primitive(PrimitiveType::Int)) } + // Cast unsigned types based on bit width (following Python implementation) + DataType::UInt8 | DataType::UInt16 | DataType::UInt32 => { + // Cast to next larger signed type to prevent overflow + let bit_width = p.primitive_width().unwrap_or(0) * 8; // Convert bytes to bits + match bit_width { + 8 | 16 => Ok(Type::Primitive(PrimitiveType::Int)), // uint8/16 → int32 + 32 => Ok(Type::Primitive(PrimitiveType::Long)), // uint32 → int64 + _ => Ok(Type::Primitive(PrimitiveType::Int)), // fallback + } + } DataType::Int64 => Ok(Type::Primitive(PrimitiveType::Long)), + DataType::UInt64 => { + // Block uint64 - no safe casting option + Err(Error::new( + ErrorKind::DataInvalid, + "UInt64 is not supported. Use Int64 for values ≤ 9,223,372,036,854,775,807 or Decimal(20,0) for full uint64 range.", + )) + } DataType::Float32 => Ok(Type::Primitive(PrimitiveType::Float)), DataType::Float64 => Ok(Type::Primitive(PrimitiveType::Double)), DataType::Decimal128(p, s) => Type::decimal(*p as u32, *s as u32).map_err(|e| { @@ -1717,6 +1734,60 @@ mod tests { } } + #[test] + fn test_unsigned_type_casting() { + // Test UInt32 → Int64 casting + { + let arrow_field = Field::new("test", DataType::UInt32, false).with_metadata( + HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]), + ); + let arrow_schema = ArrowSchema::new(vec![arrow_field]); + + let iceberg_schema = arrow_schema_to_schema(&arrow_schema).unwrap(); + + // Verify UInt32 was cast to Long (int64) + let iceberg_field = iceberg_schema.as_struct().fields().first().unwrap(); + assert!(matches!( + iceberg_field.field_type.as_ref(), + Type::Primitive(PrimitiveType::Long) + )); + } + + // Test UInt8/UInt16 → Int32 casting + { + let arrow_field = Field::new("test", DataType::UInt8, false).with_metadata( + HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]), + ); + let arrow_schema = ArrowSchema::new(vec![arrow_field]); + + let iceberg_schema = arrow_schema_to_schema(&arrow_schema).unwrap(); + + // Verify UInt8 was cast to Int (int32) + let iceberg_field = iceberg_schema.as_struct().fields().first().unwrap(); + assert!(matches!( + iceberg_field.field_type.as_ref(), + Type::Primitive(PrimitiveType::Int) + )); + } + + // Test UInt64 blocking + { + let arrow_field = Field::new("test", DataType::UInt64, false).with_metadata( + HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]), + ); + let arrow_schema = ArrowSchema::new(vec![arrow_field]); + + let result = arrow_schema_to_schema(&arrow_schema); + assert!(result.is_err()); + assert!( + result + .unwrap_err() + .to_string() + .contains("UInt64 is not supported") + ); + } + } + #[test] fn test_datum_conversion() { {