From f13c0a44ce1d5e8c133e3e5c1376c649dedebf55 Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Wed, 13 Aug 2025 09:13:41 +0200 Subject: [PATCH 1/2] Python client: make S3 role-ARN optional and add missing endpoint-internal property --- client/python/cli/command/__init__.py | 1 + client/python/cli/command/catalogs.py | 17 +++++++++-------- client/python/cli/constants.py | 4 +++- client/python/cli/options/option_tree.py | 1 + 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/client/python/cli/command/__init__.py b/client/python/cli/command/__init__.py index 659a9b9e2c..53216f3162 100644 --- a/client/python/cli/command/__init__.py +++ b/client/python/cli/command/__init__.py @@ -66,6 +66,7 @@ def options_get(key, f=lambda x: x): iceberg_remote_catalog_name=options_get(Arguments.ICEBERG_REMOTE_CATALOG_NAME), remove_properties=[] if remove_properties is None else remove_properties, endpoint=options_get(Arguments.ENDPOINT), + endpoint_internal=options_get(Arguments.ENDPOINT_INTERNAL), sts_endpoint=options_get(Arguments.STS_ENDPOINT), path_style_access=options_get(Arguments.PATH_STYLE_ACCESS), catalog_connection_type=options_get(Arguments.CATALOG_CONNECTION_TYPE), diff --git a/client/python/cli/command/catalogs.py b/client/python/cli/command/catalogs.py index 3708bb5d63..2f1edba61b 100644 --- a/client/python/cli/command/catalogs.py +++ b/client/python/cli/command/catalogs.py @@ -65,6 +65,7 @@ class CatalogsCommand(Command): hadoop_warehouse: str iceberg_remote_catalog_name: str endpoint: str + endpoint_internal: str sts_endpoint: str path_style_access: bool catalog_connection_type: str @@ -121,18 +122,17 @@ def validate(self): f" {Argument.to_flag_name(Arguments.CATALOG_SERVICE_IDENTITY_IAM_ARN)}") if self.storage_type == StorageType.S3.value: - if not self.role_arn: - raise Exception( - f"Missing required argument for storage type 's3':" - f" {Argument.to_flag_name(Arguments.ROLE_ARN)}" - ) if self._has_azure_storage_info() or self._has_gcs_storage_info(): raise Exception( f"Storage type 's3' supports the storage credentials" f" {Argument.to_flag_name(Arguments.ROLE_ARN)}," f" {Argument.to_flag_name(Arguments.REGION)}," - f" {Argument.to_flag_name(Arguments.EXTERNAL_ID)}, and" - f" {Argument.to_flag_name(Arguments.USER_ARN)}" + f" {Argument.to_flag_name(Arguments.EXTERNAL_ID)}," + f" {Argument.to_flag_name(Arguments.USER_ARN)}," + f" {Argument.to_flag_name(Arguments.ENDPOINT)}," + f" {Argument.to_flag_name(Arguments.ENDPOINT_INTERNAL)}," + f" {Argument.to_flag_name(Arguments.STS_ENDPOINT)}, and" + f" {Argument.to_flag_name(Arguments.PATH_STYLE_ACCESS)}" ) elif self.storage_type == StorageType.AZURE.value: if not self.tenant_id: @@ -164,7 +164,7 @@ def validate(self): ) def _has_aws_storage_info(self): - return self.role_arn or self.external_id or self.user_arn or self.region or self.endpoint or self.sts_endpoint or self.path_style_access + return self.role_arn or self.external_id or self.user_arn or self.region or self.endpoint or self.endpoint_internal or self.sts_endpoint or self.path_style_access def _has_azure_storage_info(self): return self.tenant_id or self.multi_tenant_app_name or self.consent_url @@ -183,6 +183,7 @@ def _build_storage_config_info(self): user_arn=self.user_arn, region=self.region, endpoint=self.endpoint, + endpoint_internal=self.endpoint_internal, sts_endpoint=self.sts_endpoint, path_style_access=self.path_style_access, ) diff --git a/client/python/cli/constants.py b/client/python/cli/constants.py index d3027009a4..1a6ec4c424 100644 --- a/client/python/cli/constants.py +++ b/client/python/cli/constants.py @@ -168,6 +168,7 @@ class Arguments: HADOOP_WAREHOUSE = "hadoop_warehouse" ICEBERG_REMOTE_CATALOG_NAME = "iceberg_remote_catalog_name" ENDPOINT = "endpoint" + ENDPOINT_INTERNAL = "endpoint_internal" STS_ENDPOINT = "sts_endpoint" PATH_STYLE_ACCESS = "path_style_access" CATALOG_CONNECTION_TYPE = "catalog_connection_type" @@ -223,11 +224,12 @@ class Create: "Multiple locations can be provided by specifying this option more than once." ) - ROLE_ARN = "(Required for S3) A role ARN to use when connecting to S3" + ROLE_ARN = "(Only for S3) A role ARN to use when connecting to S3" EXTERNAL_ID = "(Only for S3) The external ID to use when connecting to S3" REGION = "(Only for S3) The region to use when connecting to S3" USER_ARN = "(Only for S3) A user ARN to use when connecting to S3" ENDPOINT = "(Only for S3) The S3 endpoint to use when connecting to S3" + ENDPOINT_INTERNAL = "(Only for S3) The S3 endpoint used by Polaris to use when connecting to S3, if different from the one that clients use" STS_ENDPOINT = ( "(Only for S3) The STS endpoint to use when connecting to STS" ) diff --git a/client/python/cli/options/option_tree.py b/client/python/cli/options/option_tree.py index 7b10a64ea6..5b95741f23 100644 --- a/client/python/cli/options/option_tree.py +++ b/client/python/cli/options/option_tree.py @@ -117,6 +117,7 @@ def get_tree() -> List[Option]: choices=[st.value for st in StorageType]), Argument(Arguments.DEFAULT_BASE_LOCATION, str, Hints.Catalogs.Create.DEFAULT_BASE_LOCATION), Argument(Arguments.ENDPOINT, str, Hints.Catalogs.Create.ENDPOINT), + Argument(Arguments.ENDPOINT_INTERNAL, str, Hints.Catalogs.Create.ENDPOINT_INTERNAL), Argument(Arguments.STS_ENDPOINT, str, Hints.Catalogs.Create.STS_ENDPOINT), Argument(Arguments.PATH_STYLE_ACCESS, bool, Hints.Catalogs.Create.PATH_STYLE_ACCESS), Argument(Arguments.ALLOWED_LOCATION, str, Hints.Catalogs.Create.ALLOWED_LOCATION, From 36b7ec65f47beddf462d436ef8101d16fe21bda3 Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Mon, 18 Aug 2025 14:11:44 +0200 Subject: [PATCH 2/2] review --- client/python/cli/command/catalogs.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/client/python/cli/command/catalogs.py b/client/python/cli/command/catalogs.py index 2f1edba61b..688064a611 100644 --- a/client/python/cli/command/catalogs.py +++ b/client/python/cli/command/catalogs.py @@ -124,7 +124,7 @@ def validate(self): if self.storage_type == StorageType.S3.value: if self._has_azure_storage_info() or self._has_gcs_storage_info(): raise Exception( - f"Storage type 's3' supports the storage credentials" + f"Storage type 's3' supports the options" f" {Argument.to_flag_name(Arguments.ROLE_ARN)}," f" {Argument.to_flag_name(Arguments.REGION)}," f" {Argument.to_flag_name(Arguments.EXTERNAL_ID)}," @@ -142,7 +142,7 @@ def validate(self): ) if self._has_aws_storage_info() or self._has_gcs_storage_info(): raise Exception( - "Storage type 'azure' supports the storage credentials" + "Storage type 'azure' supports the options" f" {Argument.to_flag_name(Arguments.TENANT_ID)}," f" {Argument.to_flag_name(Arguments.MULTI_TENANT_APP_NAME)}, and" f" {Argument.to_flag_name(Arguments.CONSENT_URL)}" @@ -160,7 +160,7 @@ def validate(self): or self._has_gcs_storage_info() ): raise Exception( - "Storage type 'file' does not support any storage credentials" + "Storage type 'file' does not support any additional options" ) def _has_aws_storage_info(self):