From f4cdab32b360fc93ea3bb79993c8f14fbb187c68 Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Wed, 13 Aug 2025 10:20:12 -0700 Subject: [PATCH 1/8] Add glue catalog loader impl --- crates/catalog/glue/src/catalog.rs | 82 ++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index fb4bd36b8d..ec3f6c5927 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -25,10 +25,7 @@ use iceberg::io::{ }; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; -use iceberg::{ - Catalog, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, - TableCreation, TableIdent, -}; +use iceberg::{Catalog, CatalogBuilder, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent}; use typed_builder::TypedBuilder; use crate::error::{from_aws_build_error, from_aws_sdk_error}; @@ -40,14 +37,89 @@ use crate::{ AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN, with_catalog_id, }; +/// Glue catalog URI +pub const GLUE_CATALOG_PROP_URI: &str = "uri"; +/// Glue catalog id +pub const GLUE_CATALOG_PROP_CATALOG_ID: &str = "catalog_id"; +/// Glue catalog warehouse location +pub const GLUE_CATALOG_PROP_WAREHOUSE: &str = "warehouse"; + +/// Builder for [`GlueCatalog`]. +#[derive(Debug)] +pub struct GlueCatalogBuilder(GlueCatalogConfig); + +impl Default for GlueCatalogBuilder { + fn default() -> Self { + Self(GlueCatalogConfig { + name: None, + uri: None, + catalog_id: None, + warehouse: "".to_string(), + props: HashMap::new(), + }) + } +} + +impl CatalogBuilder for GlueCatalogBuilder { + type C = GlueCatalog; + + fn load( + mut self, + name: impl Into, + props: HashMap, + ) -> impl Future> + Send { + self.0.name = Some(name.into()); + + if props.contains_key(GLUE_CATALOG_PROP_URI) { + self.0.uri = props.get(GLUE_CATALOG_PROP_URI).cloned() + } + + if props.contains_key(GLUE_CATALOG_PROP_CATALOG_ID) { + self.0.catalog_id = props.get(GLUE_CATALOG_PROP_CATALOG_ID).cloned() + } + + if props.contains_key(GLUE_CATALOG_PROP_WAREHOUSE) { + self.0.warehouse = props + .get(GLUE_CATALOG_PROP_WAREHOUSE) + .cloned() + .unwrap_or_default(); + } + + // Collect other remaining properties + self.0.props = props + .into_iter() + .filter(|(k, _)| k != GLUE_CATALOG_PROP_URI && + k != GLUE_CATALOG_PROP_CATALOG_ID && + k != GLUE_CATALOG_PROP_WAREHOUSE) + .collect(); + + async move { + if self.0.name.is_none() { + return Err(Error::new(ErrorKind::DataInvalid, "Catalog name is required")); + } + if self.0.warehouse.is_empty() { + return Err(Error::new(ErrorKind::DataInvalid, "Catalog warehouse is required")); + } + + GlueCatalog::new(self.0).await + } + } +} + #[derive(Debug, TypedBuilder)] /// Glue Catalog configuration pub struct GlueCatalogConfig { + #[builder(default, setter(strip_option))] + name: Option, + #[builder(default, setter(strip_option(fallback = uri_opt)))] uri: Option, + #[builder(default, setter(strip_option(fallback = catalog_id_opt)))] catalog_id: Option, + warehouse: String, + #[builder(default)] props: HashMap, } @@ -71,7 +143,7 @@ impl Debug for GlueCatalog { impl GlueCatalog { /// Create a new glue catalog - pub async fn new(config: GlueCatalogConfig) -> Result { + async fn new(config: GlueCatalogConfig) -> Result { let sdk_config = create_sdk_config(&config.props, config.uri.as_ref()).await; let mut file_io_props = config.props.clone(); if !file_io_props.contains_key(S3_ACCESS_KEY_ID) { From f11200ea40423a29c88689b177d42722d9e6b528 Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Wed, 13 Aug 2025 11:17:39 -0700 Subject: [PATCH 2/8] Fix tests & cargo toml --- Cargo.lock | 1 + Cargo.toml | 1 + crates/catalog/glue/src/catalog.rs | 23 +++++++++++---- .../catalog/glue/tests/glue_catalog_test.rs | 28 +++++++++++++------ crates/catalog/loader/Cargo.toml | 5 ++-- crates/catalog/loader/src/lib.rs | 1 + 6 files changed, 43 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a0b74a8786..17319c3c2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3613,6 +3613,7 @@ version = "0.6.0" dependencies = [ "async-trait", "iceberg", + "iceberg-catalog-glue", "iceberg-catalog-rest", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index a585be7d77..4f03a2021b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ hive_metastore = "0.1" http = "1.2" iceberg = { version = "0.6.0", path = "./crates/iceberg" } iceberg-catalog-rest = { version = "0.6.0", path = "./crates/catalog/rest" } +iceberg-catalog-glue = { version = "0.6.0", path = "./crates/catalog/glue" } iceberg-datafusion = { version = "0.6.0", path = "./crates/integrations/datafusion" } indicatif = "0.17" itertools = "0.13" diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index ec3f6c5927..4167467db6 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -25,7 +25,10 @@ use iceberg::io::{ }; use iceberg::spec::{TableMetadata, TableMetadataBuilder}; use iceberg::table::Table; -use iceberg::{Catalog, CatalogBuilder, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent}; +use iceberg::{ + Catalog, CatalogBuilder, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, + TableCommit, TableCreation, TableIdent, +}; use typed_builder::TypedBuilder; use crate::error::{from_aws_build_error, from_aws_sdk_error}; @@ -88,17 +91,25 @@ impl CatalogBuilder for GlueCatalogBuilder { // Collect other remaining properties self.0.props = props .into_iter() - .filter(|(k, _)| k != GLUE_CATALOG_PROP_URI && - k != GLUE_CATALOG_PROP_CATALOG_ID && - k != GLUE_CATALOG_PROP_WAREHOUSE) + .filter(|(k, _)| { + k != GLUE_CATALOG_PROP_URI + && k != GLUE_CATALOG_PROP_CATALOG_ID + && k != GLUE_CATALOG_PROP_WAREHOUSE + }) .collect(); async move { if self.0.name.is_none() { - return Err(Error::new(ErrorKind::DataInvalid, "Catalog name is required")); + return Err(Error::new( + ErrorKind::DataInvalid, + "Catalog name is required", + )); } if self.0.warehouse.is_empty() { - return Err(Error::new(ErrorKind::DataInvalid, "Catalog warehouse is required")); + return Err(Error::new( + ErrorKind::DataInvalid, + "Catalog warehouse is required", + )); } GlueCatalog::new(self.0).await diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs index bec9494fe9..c4fb8fe03e 100644 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ b/crates/catalog/glue/tests/glue_catalog_test.rs @@ -24,9 +24,12 @@ use std::sync::RwLock; use ctor::{ctor, dtor}; use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; use iceberg::spec::{NestedField, PrimitiveType, Schema, Type}; -use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCreation, TableIdent}; +use iceberg::{ + Catalog, CatalogBuilder, Namespace, NamespaceIdent, Result, TableCreation, TableIdent, +}; use iceberg_catalog_glue::{ - AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GlueCatalog, GlueCatalogConfig, + AWS_ACCESS_KEY_ID, AWS_REGION_NAME, AWS_SECRET_ACCESS_KEY, GLUE_CATALOG_PROP_URI, + GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalog, GlueCatalogBuilder, }; use iceberg_test_utils::docker::DockerCompose; use iceberg_test_utils::{normalize_test_name, set_up}; @@ -89,13 +92,22 @@ async fn get_catalog() -> GlueCatalog { (S3_REGION.to_string(), "us-east-1".to_string()), ]); - let config = GlueCatalogConfig::builder() - .uri(format!("http://{}", glue_socket_addr)) - .warehouse("s3a://warehouse/hive".to_string()) - .props(props.clone()) - .build(); + let mut glue_props = HashMap::from([ + ( + GLUE_CATALOG_PROP_URI.to_string(), + format!("http://{}", glue_socket_addr), + ), + ( + GLUE_CATALOG_PROP_WAREHOUSE.to_string(), + "s3a://warehouse/hive".to_string(), + ), + ]); + glue_props.extend(props.clone()); - GlueCatalog::new(config).await.unwrap() + GlueCatalogBuilder::default() + .load("glue", glue_props) + .await + .unwrap() } async fn set_test_namespace(catalog: &GlueCatalog, namespace: &NamespaceIdent) -> Result<()> { diff --git a/crates/catalog/loader/Cargo.toml b/crates/catalog/loader/Cargo.toml index d29edad051..136847d971 100644 --- a/crates/catalog/loader/Cargo.toml +++ b/crates/catalog/loader/Cargo.toml @@ -30,6 +30,7 @@ repository = { workspace = true } [dependencies] iceberg = { workspace = true } -iceberg-catalog-rest = {workspace = true} +iceberg-catalog-rest = { workspace = true } +iceberg-catalog-glue = { workspace = true } tokio = { workspace = true } -async-trait = {workspace = true} +async-trait = { workspace = true } diff --git a/crates/catalog/loader/src/lib.rs b/crates/catalog/loader/src/lib.rs index e5fce46822..5a7eaf0ca3 100644 --- a/crates/catalog/loader/src/lib.rs +++ b/crates/catalog/loader/src/lib.rs @@ -46,6 +46,7 @@ impl BoxedCatalogBuilder for T { pub fn load(r#type: &str) -> Result> { match r#type { "rest" => Ok(Box::new(RestCatalogBuilder::default()) as Box), + // "glue" => Ok(Box::new(GlueCatalogBuilder::default()) as Box), _ => Err(Error::new( ErrorKind::FeatureUnsupported, format!("Unsupported catalog type: {}", r#type), From f67ce6c013bfe3f64291af4417b562f39c145e58 Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Wed, 13 Aug 2025 11:24:52 -0700 Subject: [PATCH 3/8] Add loader --- crates/catalog/loader/src/lib.rs | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/crates/catalog/loader/src/lib.rs b/crates/catalog/loader/src/lib.rs index 5a7eaf0ca3..c1e88b0cea 100644 --- a/crates/catalog/loader/src/lib.rs +++ b/crates/catalog/loader/src/lib.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use async_trait::async_trait; use iceberg::{Catalog, CatalogBuilder, Error, ErrorKind, Result}; +use iceberg_catalog_glue::GlueCatalogBuilder; use iceberg_catalog_rest::RestCatalogBuilder; #[async_trait] @@ -46,7 +47,7 @@ impl BoxedCatalogBuilder for T { pub fn load(r#type: &str) -> Result> { match r#type { "rest" => Ok(Box::new(RestCatalogBuilder::default()) as Box), - // "glue" => Ok(Box::new(GlueCatalogBuilder::default()) as Box), + "glue" => Ok(Box::new(GlueCatalogBuilder::default()) as Box), _ => Err(Error::new( ErrorKind::FeatureUnsupported, format!("Unsupported catalog type: {}", r#type), @@ -58,12 +59,12 @@ pub fn load(r#type: &str) -> Result> { mod tests { use std::collections::HashMap; - use iceberg_catalog_rest::REST_CATALOG_PROP_URI; - use crate::load; #[tokio::test] async fn test_load_rest_catalog() { + use iceberg_catalog_rest::REST_CATALOG_PROP_URI; + let catalog_loader = load("rest").unwrap(); let catalog = catalog_loader .load( @@ -80,4 +81,25 @@ mod tests { assert!(catalog.is_ok()); } + + #[tokio::test] + async fn test_load_glue_catalog() { + use iceberg_catalog_glue::GLUE_CATALOG_PROP_WAREHOUSE; + + let catalog_loader = load("glue").unwrap(); + let catalog = catalog_loader + .load( + "glue".to_string(), + HashMap::from([ + ( + GLUE_CATALOG_PROP_WAREHOUSE.to_string(), + "s3://test".to_string(), + ), + ("key".to_string(), "value".to_string()), + ]), + ) + .await; + + assert!(catalog.is_ok()); + } } From 99da1616e0d784cbe8c6442a67d8791e0904f4fb Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Wed, 13 Aug 2025 14:24:23 -0700 Subject: [PATCH 4/8] Add doc --- crates/catalog/glue/src/lib.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/crates/catalog/glue/src/lib.rs b/crates/catalog/glue/src/lib.rs index 2376573358..2e5379bb1b 100644 --- a/crates/catalog/glue/src/lib.rs +++ b/crates/catalog/glue/src/lib.rs @@ -16,6 +16,33 @@ // under the License. //! Iceberg Glue Catalog implementation. +//! +//! To build a glue catalog with configurations +//! # Example +//! +//! ```rust, no_run +//! use std::collections::HashMap; +//! +//! use iceberg::CatalogBuilder; +//! use iceberg_catalog_glue::GlueCatalogBuilder; +//! +//! #[tokio::main] +//! async fn main() { +//! use iceberg_catalog_glue::GLUE_CATALOG_PROP_WAREHOUSE; +//! let catalog = GlueCatalogBuilder::default() +//! .load( +//! "glue", +//! HashMap::from([ +//! ( +//! GLUE_CATALOG_PROP_WAREHOUSE.to_string(), +//! "s3://warehouse".to_string(), +//! ), +//! ]), +//! ) +//! .await +//! .unwrap(); +//! } +//! ``` #![deny(missing_docs)] From 245eb16eb30e14c2f58f141dcce4bbec95581d7a Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Wed, 13 Aug 2025 14:25:30 -0700 Subject: [PATCH 5/8] Fix formatting --- crates/catalog/glue/src/lib.rs | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/crates/catalog/glue/src/lib.rs b/crates/catalog/glue/src/lib.rs index 2e5379bb1b..1b9efe3770 100644 --- a/crates/catalog/glue/src/lib.rs +++ b/crates/catalog/glue/src/lib.rs @@ -24,20 +24,17 @@ //! use std::collections::HashMap; //! //! use iceberg::CatalogBuilder; -//! use iceberg_catalog_glue::GlueCatalogBuilder; +//! use iceberg_catalog_glue::{GLUE_CATALOG_PROP_WAREHOUSE, GlueCatalogBuilder}; //! //! #[tokio::main] //! async fn main() { -//! use iceberg_catalog_glue::GLUE_CATALOG_PROP_WAREHOUSE; -//! let catalog = GlueCatalogBuilder::default() +//! let catalog = GlueCatalogBuilder::default() //! .load( //! "glue", -//! HashMap::from([ -//! ( -//! GLUE_CATALOG_PROP_WAREHOUSE.to_string(), -//! "s3://warehouse".to_string(), -//! ), -//! ]), +//! HashMap::from([( +//! GLUE_CATALOG_PROP_WAREHOUSE.to_string(), +//! "s3://warehouse".to_string(), +//! )]), //! ) //! .await //! .unwrap(); From ac69dd42351d9f3222137457b38663642f4e4b1b Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Thu, 14 Aug 2025 16:18:13 -0700 Subject: [PATCH 6/8] Make glue config private --- crates/catalog/glue/src/catalog.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index 4167467db6..4fc91270bb 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -29,7 +29,6 @@ use iceberg::{ Catalog, CatalogBuilder, Error, ErrorKind, MetadataLocation, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent, }; -use typed_builder::TypedBuilder; use crate::error::{from_aws_build_error, from_aws_sdk_error}; use crate::utils::{ @@ -117,9 +116,9 @@ impl CatalogBuilder for GlueCatalogBuilder { } } -#[derive(Debug, TypedBuilder)] +#[derive(Debug)] /// Glue Catalog configuration -pub struct GlueCatalogConfig { +pub(crate) struct GlueCatalogConfig { #[builder(default, setter(strip_option))] name: Option, From 99e30171a8b858a779a6b8044407dc99a1b59034 Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Thu, 14 Aug 2025 16:22:19 -0700 Subject: [PATCH 7/8] Remove builders as not needed --- crates/catalog/glue/src/catalog.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crates/catalog/glue/src/catalog.rs b/crates/catalog/glue/src/catalog.rs index 4fc91270bb..c7584b596c 100644 --- a/crates/catalog/glue/src/catalog.rs +++ b/crates/catalog/glue/src/catalog.rs @@ -119,18 +119,10 @@ impl CatalogBuilder for GlueCatalogBuilder { #[derive(Debug)] /// Glue Catalog configuration pub(crate) struct GlueCatalogConfig { - #[builder(default, setter(strip_option))] name: Option, - - #[builder(default, setter(strip_option(fallback = uri_opt)))] uri: Option, - - #[builder(default, setter(strip_option(fallback = catalog_id_opt)))] catalog_id: Option, - warehouse: String, - - #[builder(default)] props: HashMap, } From 33be4970cab5a0efd1359ed52ef82b919b4f2f54 Mon Sep 17 00:00:00 2001 From: Leon Lin Date: Thu, 14 Aug 2025 17:17:19 -0700 Subject: [PATCH 8/8] Remove type builder --- crates/catalog/glue/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/catalog/glue/Cargo.toml b/crates/catalog/glue/Cargo.toml index 613160e468..b6126021f5 100644 --- a/crates/catalog/glue/Cargo.toml +++ b/crates/catalog/glue/Cargo.toml @@ -37,7 +37,6 @@ iceberg = { workspace = true } serde_json = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } -typed-builder = { workspace = true } [dev-dependencies] ctor = { workspace = true }