apache · Xuanwo · Sep 3, 2025 · Aug 18, 2025 · Aug 18, 2025 · Aug 21, 2025
diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs
@@ -378,7 +378,24 @@ impl ArrowSchemaVisitor for ArrowSchemaConverter {
             DataType::Int8 | DataType::Int16 | DataType::Int32 => {
                 Ok(Type::Primitive(PrimitiveType::Int))
             }
+            // Cast unsigned types based on bit width (following Python implementation)
-            // Cast unsigned types based on bit width (following Python implementation)
+            // Cast unsigned types based on bit width to allow for no data loss
-            // Cast unsigned types based on bit width (following Python implementation)
+            // Cast unsigned types based on bit width to allow for no data loss
+            DataType::UInt8 | DataType::UInt16 | DataType::UInt32 => {
+                // Cast to next larger signed type to prevent overflow
+                let bit_width = p.primitive_width().unwrap_or(0) * 8; // Convert bytes to bits
+                match bit_width {
+                    8 | 16 => Ok(Type::Primitive(PrimitiveType::Int)), // uint8/16 → int32
+                    32 => Ok(Type::Primitive(PrimitiveType::Long)),    // uint32 → int64
+                    _ => Ok(Type::Primitive(PrimitiveType::Int)),      // fallback
+                }
+            }
             DataType::Int64 => Ok(Type::Primitive(PrimitiveType::Long)),
+            DataType::UInt64 => {
+                // Block uint64 - no safe casting option
+                Err(Error::new(
+                    ErrorKind::DataInvalid,
+                    "UInt64 is not supported. Use Int64 for values ≤ 9,223,372,036,854,775,807 or Decimal(20,0) for full uint64 range.",
+                ))
+            }
             DataType::Float32 => Ok(Type::Primitive(PrimitiveType::Float)),
             DataType::Float64 => Ok(Type::Primitive(PrimitiveType::Double)),
             DataType::Decimal128(p, s) => Type::decimal(*p as u32, *s as u32).map_err(|e| {
@@ -1717,6 +1734,60 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_unsigned_type_casting() {
+        // Test UInt32 → Int64 casting
+        {
+            let arrow_field = Field::new("test", DataType::UInt32, false).with_metadata(
+                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]),
+            );
+            let arrow_schema = ArrowSchema::new(vec![arrow_field]);
+
+            let iceberg_schema = arrow_schema_to_schema(&arrow_schema).unwrap();
+
+            // Verify UInt32 was cast to Long (int64)
+            let iceberg_field = iceberg_schema.as_struct().fields().first().unwrap();
+            assert!(matches!(
+                iceberg_field.field_type.as_ref(),
+                Type::Primitive(PrimitiveType::Long)
+            ));
+        }
+
+        // Test UInt8/UInt16 → Int32 casting
+        {
+            let arrow_field = Field::new("test", DataType::UInt8, false).with_metadata(
+                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]),
+            );
+            let arrow_schema = ArrowSchema::new(vec![arrow_field]);
+
+            let iceberg_schema = arrow_schema_to_schema(&arrow_schema).unwrap();
+
+            // Verify UInt8 was cast to Int (int32)
+            let iceberg_field = iceberg_schema.as_struct().fields().first().unwrap();
+            assert!(matches!(
+                iceberg_field.field_type.as_ref(),
+                Type::Primitive(PrimitiveType::Int)
+            ));
+        }
+
+        // Test UInt64 blocking
+        {
+            let arrow_field = Field::new("test", DataType::UInt64, false).with_metadata(
+                HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "1".to_string())]),
+            );
+            let arrow_schema = ArrowSchema::new(vec![arrow_field]);
+
+            let result = arrow_schema_to_schema(&arrow_schema);
+            assert!(result.is_err());
+            assert!(
+                result
+                    .unwrap_err()
+                    .to_string()
+                    .contains("UInt64 is not supported")
+            );
+        }
+    }
+
     #[test]
     fn test_datum_conversion() {
         {