Skip to content

Python client: make S3 role-ARN optional and add missing endpoint-internal property #2339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions client/python/cli/command/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def options_get(key, f=lambda x: x):
iceberg_remote_catalog_name=options_get(Arguments.ICEBERG_REMOTE_CATALOG_NAME),
remove_properties=[] if remove_properties is None else remove_properties,
endpoint=options_get(Arguments.ENDPOINT),
endpoint_internal=options_get(Arguments.ENDPOINT_INTERNAL),
sts_endpoint=options_get(Arguments.STS_ENDPOINT),
path_style_access=options_get(Arguments.PATH_STYLE_ACCESS),
catalog_connection_type=options_get(Arguments.CATALOG_CONNECTION_TYPE),
Expand Down
17 changes: 9 additions & 8 deletions client/python/cli/command/catalogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class CatalogsCommand(Command):
hadoop_warehouse: str
iceberg_remote_catalog_name: str
endpoint: str
endpoint_internal: str
sts_endpoint: str
path_style_access: bool
catalog_connection_type: str
Expand Down Expand Up @@ -121,18 +122,17 @@ def validate(self):
f" {Argument.to_flag_name(Arguments.CATALOG_SERVICE_IDENTITY_IAM_ARN)}")

if self.storage_type == StorageType.S3.value:
if not self.role_arn:
raise Exception(
f"Missing required argument for storage type 's3':"
f" {Argument.to_flag_name(Arguments.ROLE_ARN)}"
)
if self._has_azure_storage_info() or self._has_gcs_storage_info():
raise Exception(
f"Storage type 's3' supports the storage credentials"
f" {Argument.to_flag_name(Arguments.ROLE_ARN)},"
f" {Argument.to_flag_name(Arguments.REGION)},"
f" {Argument.to_flag_name(Arguments.EXTERNAL_ID)}, and"
f" {Argument.to_flag_name(Arguments.USER_ARN)}"
f" {Argument.to_flag_name(Arguments.EXTERNAL_ID)},"
f" {Argument.to_flag_name(Arguments.USER_ARN)},"
f" {Argument.to_flag_name(Arguments.ENDPOINT)},"
f" {Argument.to_flag_name(Arguments.ENDPOINT_INTERNAL)},"
f" {Argument.to_flag_name(Arguments.STS_ENDPOINT)}, and"
f" {Argument.to_flag_name(Arguments.PATH_STYLE_ACCESS)}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps the message on line 127 (Storage type 's3' supports the storage credentials) needs to be adjusted now that we have non-credential options?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, This is intended to be the list of all supported arguments for the s3 storage type

)
elif self.storage_type == StorageType.AZURE.value:
if not self.tenant_id:
Expand Down Expand Up @@ -164,7 +164,7 @@ def validate(self):
)

def _has_aws_storage_info(self):
return self.role_arn or self.external_id or self.user_arn or self.region or self.endpoint or self.sts_endpoint or self.path_style_access
return self.role_arn or self.external_id or self.user_arn or self.region or self.endpoint or self.endpoint_internal or self.sts_endpoint or self.path_style_access

def _has_azure_storage_info(self):
return self.tenant_id or self.multi_tenant_app_name or self.consent_url
Expand All @@ -183,6 +183,7 @@ def _build_storage_config_info(self):
user_arn=self.user_arn,
region=self.region,
endpoint=self.endpoint,
endpoint_internal=self.endpoint_internal,
sts_endpoint=self.sts_endpoint,
path_style_access=self.path_style_access,
)
Expand Down
4 changes: 3 additions & 1 deletion client/python/cli/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ class Arguments:
HADOOP_WAREHOUSE = "hadoop_warehouse"
ICEBERG_REMOTE_CATALOG_NAME = "iceberg_remote_catalog_name"
ENDPOINT = "endpoint"
ENDPOINT_INTERNAL = "endpoint_internal"
STS_ENDPOINT = "sts_endpoint"
PATH_STYLE_ACCESS = "path_style_access"
CATALOG_CONNECTION_TYPE = "catalog_connection_type"
Expand Down Expand Up @@ -223,11 +224,12 @@ class Create:
"Multiple locations can be provided by specifying this option more than once."
)

ROLE_ARN = "(Required for S3) A role ARN to use when connecting to S3"
ROLE_ARN = "(Only for S3) A role ARN to use when connecting to S3"
EXTERNAL_ID = "(Only for S3) The external ID to use when connecting to S3"
REGION = "(Only for S3) The region to use when connecting to S3"
USER_ARN = "(Only for S3) A user ARN to use when connecting to S3"
ENDPOINT = "(Only for S3) The S3 endpoint to use when connecting to S3"
ENDPOINT_INTERNAL = "(Only for S3) The S3 endpoint used by Polaris to use when connecting to S3, if different from the one that clients use"
STS_ENDPOINT = (
"(Only for S3) The STS endpoint to use when connecting to STS"
)
Expand Down
1 change: 1 addition & 0 deletions client/python/cli/options/option_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ def get_tree() -> List[Option]:
choices=[st.value for st in StorageType]),
Argument(Arguments.DEFAULT_BASE_LOCATION, str, Hints.Catalogs.Create.DEFAULT_BASE_LOCATION),
Argument(Arguments.ENDPOINT, str, Hints.Catalogs.Create.ENDPOINT),
Argument(Arguments.ENDPOINT_INTERNAL, str, Hints.Catalogs.Create.ENDPOINT_INTERNAL),
Argument(Arguments.STS_ENDPOINT, str, Hints.Catalogs.Create.STS_ENDPOINT),
Argument(Arguments.PATH_STYLE_ACCESS, bool, Hints.Catalogs.Create.PATH_STYLE_ACCESS),
Argument(Arguments.ALLOWED_LOCATION, str, Hints.Catalogs.Create.ALLOWED_LOCATION,
Expand Down