Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion regtests/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ ENV LANGUAGE='en_US:en'

USER root
RUN apt update
RUN apt-get install -y diffutils wget curl python3.10-venv
RUN apt-get install -y diffutils wget curl python3.10-venv jq
RUN mkdir -p /home/spark && \
chown -R spark /home/spark && \
mkdir -p /tmp/polaris-regtests && \
Expand Down
9 changes: 9 additions & 0 deletions regtests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ project, just run:
env POLARIS_HOST=localhost ./regtests/run.sh
```

The catalog federation tests rely on the following configurations in `application.properties` to
be set in order to succeed.

```
polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES"=["S3","GCS","AZURE", "FILE"]
polaris.features."ENABLE_CATALOG_FEDERATION"=true
polaris.features."ALLOW_OVERLAPPING_CATALOG_URLS"=true
```

To run the tests in verbose mode, with test stdout printing to console, set the `VERBOSE`
environment variable to `1`; you can also choose to run only a subset of tests by specifying the
test directories as arguments to `run.sh`. For example, to run only the `t_spark_sql` tests in
Expand Down
2 changes: 2 additions & 0 deletions regtests/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ services:
polaris.features."DROP_WITH_PURGE_ENABLED": "true"
polaris.features."ALLOW_INSECURE_STORAGE_TYPES": "true"
polaris.features."SUPPORTED_CATALOG_STORAGE_TYPES": "[\"FILE\",\"S3\",\"GCS\",\"AZURE\"]"
polaris.features."ALLOW_OVERLAPPING_CATALOG_URLS": "true"
polaris.features."ENABLE_CATALOG_FEDERATION": "true"
polaris.readiness.ignore-severe-issues: "true"
volumes:
- ./credentials:/tmp/credentials/
Expand Down
61 changes: 61 additions & 0 deletions regtests/t_catalog_federation/ref/catalog_federation.sh.ref
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
=== Setting up Catalog Federation Test ===
Creating new principal...
Creating local catalog...
Create local catalog response code: 201
Setting up permissions...
Grant TABLE_WRITE_DATA to catalog_admin response code: 201
Assign catalog_admin to service_admin response code: 201
Assign service_admin to new-user response code: 201
Creating external catalog (passthrough facade)...
Create external catalog response code: 201
Setting up permissions for external catalog...
Grant TABLE_WRITE_DATA to external catalog_admin response code: 201
Assign catalog_admin to service_admin for external catalog response code: 201
Catalogs created successfully

=== Starting federation test ===
=== Creating data via LOCAL catalog ===
spark-sql ()> use polaris;
spark-sql ()> create namespace if not exists ns1;
spark-sql ()> create table if not exists ns1.test_table (id int, name string);
spark-sql ()> insert into ns1.test_table values (1, 'Alice');
spark-sql ()> insert into ns1.test_table values (2, 'Bob');
spark-sql ()> create namespace if not exists ns2;
spark-sql ()> create table if not exists ns2.test_table (id int, name string);
spark-sql ()> insert into ns2.test_table values (1, 'Apache Spark');
spark-sql ()> insert into ns2.test_table values (2, 'Apache Iceberg');
spark-sql ()>
=== Accessing data via EXTERNAL catalog ===
spark-sql ()> use polaris;
spark-sql ()> show namespaces;
ns1
ns2
spark-sql ()> select * from ns1.test_table order by id;
1 Alice
2 Bob
spark-sql ()> insert into ns1.test_table values (3, 'Charlie');
spark-sql ()> select * from ns2.test_table order by id;
1 Apache Spark
2 Apache Iceberg
spark-sql ()> insert into ns2.test_table values (3, 'Apache Polaris');
spark-sql ()>
=== Verifying federation via LOCAL catalog ===
spark-sql ()> use polaris;
spark-sql ()> select * from ns1.test_table order by id;
1 Alice
2 Bob
3 Charlie
spark-sql ()> select * from ns2.test_table order by id;
1 Apache Spark
2 Apache Iceberg
3 Apache Polaris
spark-sql ()> drop table ns1.test_table;
spark-sql ()> drop table ns2.test_table;
spark-sql ()> drop namespace ns1;
spark-sql ()> drop namespace ns2;
spark-sql ()>
=== Cleaning up catalogs and principal ===
Delete external catalog response code: 204
Delete local catalog response code: 204
Delete principal response code: 204
Catalog federation test completed successfully!
196 changes: 196 additions & 0 deletions regtests/t_catalog_federation/src/catalog_federation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# This test creates an INTERNAL catalog and an EXTERNAL catalog with passthrough facade
# to demonstrate true catalog federation.

set -e


SPARK_BEARER_TOKEN="${REGTEST_ROOT_BEARER_TOKEN}"

echo "=== Setting up Catalog Federation Test ==="

# Step 1: Create a new principal
echo "Creating new principal..."
PRINCIPAL_RESPONSE=$(curl -s -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principals \
-d '{
"principal": {
"name": "new-user"
}
}')

NEW_CLIENT_ID=$(echo "$PRINCIPAL_RESPONSE" | jq -r '.credentials.clientId')
NEW_CLIENT_SECRET=$(echo "$PRINCIPAL_RESPONSE" | jq -r '.credentials.clientSecret')

# Step 2: Create local catalog
echo "Creating local catalog..."
RESPONSE_CODE=$(curl -s -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-d '{
"type": "INTERNAL",
"name": "test-catalog-local",
"properties": {
"default-base-location": "file:///tmp/warehouse"
},
"storageConfigInfo": {
"storageType": "FILE",
"allowedLocations": ["file:///tmp/warehouse"]
}
}' \
--write-out "%{http_code}")
echo "Create local catalog response code: $RESPONSE_CODE"



# Step 3: Grant permissions
echo "Setting up permissions..."

# Grant TABLE_WRITE_DATA privilege to catalog_admin for local catalog
RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-local/catalog-roles/catalog_admin/grants \
-d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' \
--write-out "%{http_code}")
echo "Grant TABLE_WRITE_DATA to catalog_admin response code: $RESPONSE_CODE"

# Assign catalog_admin to service_admin
RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/test-catalog-local \
-d '{"name": "catalog_admin"}' \
--write-out "%{http_code}")
echo "Assign catalog_admin to service_admin response code: $RESPONSE_CODE"

# Assign service_admin to new-user
RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principals/new-user/principal-roles \
-d '{"name": "service_admin"}' \
--write-out "%{http_code}")
echo "Assign service_admin to new-user response code: $RESPONSE_CODE"

# Step 4: Create external catalog
echo "Creating external catalog (passthrough facade)..."
RESPONSE_CODE=$(curl -s -X POST -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs \
-d "{
\"type\": \"EXTERNAL\",
\"name\": \"test-catalog-external\",
\"connectionConfigInfo\": {
\"connectionType\": \"ICEBERG_REST\",
\"uri\": \"http://${POLARIS_HOST:-localhost}:8181/api/catalog\",
\"remoteCatalogName\": \"test-catalog-local\",
\"authenticationParameters\": {
\"authenticationType\": \"OAUTH\",
\"tokenUri\": \"http://${POLARIS_HOST:-localhost}:8181/api/catalog/v1/oauth/tokens\",
\"clientId\": \"${NEW_CLIENT_ID}\",
\"clientSecret\": \"${NEW_CLIENT_SECRET}\",
\"scopes\": [\"PRINCIPAL_ROLE:ALL\"]
}
},
\"properties\": {
\"default-base-location\": \"file:///tmp/warehouse\"
},
\"storageConfigInfo\": {
\"storageType\": \"FILE\",
\"allowedLocations\": [\"file:///tmp/warehouse\"]
}
}" \
--write-out "%{http_code}")
echo "Create external catalog response code: $RESPONSE_CODE"

# Step 5: Grant permissions for external catalog
echo "Setting up permissions for external catalog..."

# Grant TABLE_WRITE_DATA privilege to catalog_admin role for test-catalog-external
RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-external/catalog-roles/catalog_admin/grants \
-d '{"type": "catalog", "privilege": "TABLE_WRITE_DATA"}' \
--write-out "%{http_code}")
echo "Grant TABLE_WRITE_DATA to external catalog_admin response code: $RESPONSE_CODE"

# Assign catalog_admin role to service_admin principal-role for test-catalog-external
RESPONSE_CODE=$(curl -s -X PUT -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principal-roles/service_admin/catalog-roles/test-catalog-external \
-d '{"name": "catalog_admin"}' \
--write-out "%{http_code}")
echo "Assign catalog_admin to service_admin for external catalog response code: $RESPONSE_CODE"

echo "Catalogs created successfully"

echo ""
echo "=== Starting federation test ==="

# Test data operations via local catalog
echo "=== Creating data via LOCAL catalog ==="
cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=test-catalog-local --conf spark.sql.defaultCatalog=polaris --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
use polaris;
create namespace if not exists ns1;
create table if not exists ns1.test_table (id int, name string);
insert into ns1.test_table values (1, 'Alice');
insert into ns1.test_table values (2, 'Bob');
create namespace if not exists ns2;
create table if not exists ns2.test_table (id int, name string);
insert into ns2.test_table values (1, 'Apache Spark');
insert into ns2.test_table values (2, 'Apache Iceberg');
EOF

echo ""
echo "=== Accessing data via EXTERNAL catalog ==="
cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=test-catalog-external --conf spark.sql.defaultCatalog=polaris --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
use polaris;
show namespaces;
select * from ns1.test_table order by id;
insert into ns1.test_table values (3, 'Charlie');
select * from ns2.test_table order by id;
insert into ns2.test_table values (3, 'Apache Polaris');
EOF

echo ""
echo "=== Verifying federation via LOCAL catalog ==="
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we do some verification of RBAC as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will update this test once #2223 is merged. For now, we can only do catalog-level RBACs for federated catalogs.

cat << EOF | ${SPARK_HOME}/bin/spark-sql -S --conf spark.sql.catalog.polaris.token="${SPARK_BEARER_TOKEN}" --conf spark.sql.catalog.polaris.warehouse=test-catalog-local --conf spark.sql.defaultCatalog=polaris --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
use polaris;
select * from ns1.test_table order by id;
select * from ns2.test_table order by id;
drop table ns1.test_table;
drop table ns2.test_table;
drop namespace ns1;
drop namespace ns2;
EOF

echo ""
echo "=== Cleaning up catalogs and principal ==="
# Clean up catalogs
RESPONSE_CODE=$(curl -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-external \
--write-out "%{http_code}")
echo "Delete external catalog response code: $RESPONSE_CODE"

RESPONSE_CODE=$(curl -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/catalogs/test-catalog-local \
--write-out "%{http_code}")
echo "Delete local catalog response code: $RESPONSE_CODE"

# Clean up principal
RESPONSE_CODE=$(curl -X DELETE -H "Authorization: Bearer ${SPARK_BEARER_TOKEN}" -H 'Accept: application/json' -H 'Content-Type: application/json' \
http://${POLARIS_HOST:-localhost}:8181/api/management/v1/principals/new-user \
--write-out "%{http_code}")
echo "Delete principal response code: $RESPONSE_CODE"

echo "Catalog federation test completed successfully!"