Skip to content

Commit 7f50667

Browse files
Support HMS Federation (#2355)
Supports federating to HiveCatalog using the Iceberg REST library. All hive dependencies are added in an independent module, i.e., `polaris-extensions-federation-hive` and can be removed/converted to a compile time flag if necessary. Similar to HadoopCatalog, HMS federation support is currently restricted to `IMPLICIT` auth. The underlying authentication can be any form that Hive supports, however Polaris will not store and manage any of these credentials. Again, similar to HadoopCatalog, this version supports federating to a single Hive instance. This PR relies on Polaris discovering the `hive-site.xml` file to get the configuration options from the classpath (including `HADOOP_CONF_DIR`). The spec change has been discussed in the [dev mailing list](https://lists.apache.org/thread/5qktjv6rzd8pghcl6f4oohko798o2p2g), followed by a discussion in the Polaris community sync on Aug 7, 2025. Testing: Modified the regression test to locally test that Hive federation works as expected. The next step would be to add a regression test once the change is baked into the Polaris docker image (for CI builds). This PR primarily builds on #1305 and #1466. Thank you @dennishuo and @eric-maynard for helping out with this!
1 parent 22e0c1c commit 7f50667

File tree

10 files changed

+301
-0
lines changed

10 files changed

+301
-0
lines changed

extensions/federation/hive/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
<!--
2+
Licensed to the Apache Software Foundation (ASF) under one
3+
or more contributor license agreements. See the NOTICE file
4+
distributed with this work for additional information
5+
regarding copyright ownership. The ASF licenses this file
6+
to you under the Apache License, Version 2.0 (the
7+
"License"); you may not use this file except in compliance
8+
with the License. You may obtain a copy of the License at
9+
10+
http://www.apache.org/licenses/LICENSE-2.0
11+
12+
Unless required by applicable law or agreed to in writing,
13+
software distributed under the License is distributed on an
14+
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
KIND, either express or implied. See the License for the
16+
specific language governing permissions and limitations
17+
under the License.
18+
-->
19+
### Using the `HiveFederatedCatalogFactory`
20+
21+
This `HiveFederatedCatalogFactory` module is an independent compilation unit and will be built into the Polaris binary only when the following flag is set in the gradle.properties file:
22+
```
23+
NonRESTCatalogs=HIVE,<alternates>
24+
```
25+
26+
The other option is to pass it as an argument to the gradle JVM as follows:
27+
```
28+
./gradlew build -DNonRESTCatalogs=HIVE
29+
```
30+
31+
Without this flag, the Hive factory won't be compiled into Polaris and therefore Polaris will not load the class at runtime, throwing an unsupported exception for federated catalog calls.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
plugins {
21+
id("polaris-client")
22+
alias(libs.plugins.jandex)
23+
}
24+
25+
dependencies {
26+
// Polaris dependencies
27+
implementation(project(":polaris-core"))
28+
29+
implementation(platform(libs.iceberg.bom))
30+
implementation("org.apache.iceberg:iceberg-api")
31+
implementation("org.apache.iceberg:iceberg-core")
32+
implementation("org.apache.iceberg:iceberg-common")
33+
// Use iceberg-hive-metastore but exclude conflicting hive dependencies
34+
implementation("org.apache.iceberg:iceberg-hive-metastore") { exclude(group = "org.apache.hive") }
35+
// Add our own Hive 4.1.0 dependencies
36+
implementation(libs.hive.metastore) {
37+
exclude("org.slf4j", "slf4j-reload4j")
38+
exclude("org.slf4j", "slf4j-log4j12")
39+
exclude("ch.qos.reload4j", "reload4j")
40+
exclude("log4j", "log4j")
41+
exclude("org.apache.zookeeper", "zookeeper")
42+
}
43+
44+
// Hadoop dependencies
45+
implementation(libs.hadoop.common) {
46+
exclude("org.slf4j", "slf4j-reload4j")
47+
exclude("org.slf4j", "slf4j-log4j12")
48+
exclude("ch.qos.reload4j", "reload4j")
49+
exclude("log4j", "log4j")
50+
exclude("org.apache.zookeeper", "zookeeper")
51+
exclude("org.apache.hadoop.thirdparty", "hadoop-shaded-protobuf_3_25")
52+
exclude("com.github.pjfanning", "jersey-json")
53+
exclude("com.sun.jersey", "jersey-core")
54+
exclude("com.sun.jersey", "jersey-server")
55+
exclude("com.sun.jersey", "jersey-servlet")
56+
exclude("io.dropwizard.metrics", "metrics-core")
57+
}
58+
59+
// CDI dependencies for runtime discovery
60+
implementation(libs.jakarta.enterprise.cdi.api)
61+
implementation(libs.smallrye.common.annotation)
62+
63+
// Logging
64+
implementation(libs.slf4j.api)
65+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.polaris.extensions.federation.hive;
20+
21+
import io.smallrye.common.annotation.Identifier;
22+
import jakarta.enterprise.context.ApplicationScoped;
23+
import org.apache.iceberg.catalog.Catalog;
24+
import org.apache.iceberg.hive.HiveCatalog;
25+
import org.apache.polaris.core.catalog.ExternalCatalogFactory;
26+
import org.apache.polaris.core.connection.AuthenticationParametersDpo;
27+
import org.apache.polaris.core.connection.AuthenticationType;
28+
import org.apache.polaris.core.connection.ConnectionConfigInfoDpo;
29+
import org.apache.polaris.core.connection.ConnectionType;
30+
import org.apache.polaris.core.connection.hive.HiveConnectionConfigInfoDpo;
31+
import org.apache.polaris.core.secrets.UserSecretsManager;
32+
import org.slf4j.Logger;
33+
import org.slf4j.LoggerFactory;
34+
35+
/** Factory class for creating a Hive catalog handle based on connection configuration. */
36+
@ApplicationScoped
37+
@Identifier(ConnectionType.HIVE_FACTORY_IDENTIFIER)
38+
public class HiveFederatedCatalogFactory implements ExternalCatalogFactory {
39+
private static final Logger LOGGER = LoggerFactory.getLogger(HiveFederatedCatalogFactory.class);
40+
41+
@Override
42+
public Catalog createCatalog(
43+
ConnectionConfigInfoDpo connectionConfigInfoDpo, UserSecretsManager userSecretsManager) {
44+
// Currently, Polaris supports Hive federation only via IMPLICIT authentication.
45+
// Hence, prior to initializing the configuration, ensure that the catalog uses
46+
// IMPLICIT authentication.
47+
AuthenticationParametersDpo authenticationParametersDpo =
48+
connectionConfigInfoDpo.getAuthenticationParameters();
49+
if (authenticationParametersDpo.getAuthenticationTypeCode()
50+
!= AuthenticationType.IMPLICIT.getCode()) {
51+
throw new IllegalStateException("Hive federation only supports IMPLICIT authentication.");
52+
}
53+
String warehouse = ((HiveConnectionConfigInfoDpo) connectionConfigInfoDpo).getWarehouse();
54+
// Unlike Hadoop, HiveCatalog does not require us to create a Configuration object, the iceberg
55+
// rest library find the default configuration by reading hive-site.xml in the classpath
56+
// (including HADOOP_CONF_DIR classpath).
57+
58+
// TODO: In the future, we could support multiple HiveCatalog instances based on polaris/catalog
59+
// properties.
60+
// A brief set of setps involved (and the options):
61+
// 1. Create a configuration without default properties.
62+
// `Configuration conf = new Configuration(boolean loadDefaults=false);`
63+
// 2a. Specify the hive-site.xml file path in the configuration.
64+
// `conf.addResource(new Path(hiveSiteXmlPath));`
65+
// 2b. Specify individual properties in the configuration.
66+
// `conf.set(property, value);`
67+
// Polaris could support federating to multiple LDAP based Hive metastores. Multiple
68+
// Kerberos instances are not suitable because Kerberos ties a single identity to the server.
69+
HiveCatalog hiveCatalog = new HiveCatalog();
70+
hiveCatalog.initialize(
71+
warehouse, connectionConfigInfoDpo.asIcebergCatalogProperties(userSecretsManager));
72+
return hiveCatalog;
73+
}
74+
}

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
[versions]
2121
checkstyle = "10.25.0"
2222
hadoop = "3.4.1"
23+
hive = "3.1.3"
2324
iceberg = "1.9.2" # Ensure to update the iceberg version in regtests to keep regtests up-to-date
2425
quarkus = "3.25.0"
2526
immutables = "2.11.3"
@@ -56,6 +57,7 @@ dnsjava = { module = "dnsjava:dnsjava", version = "3.6.3" }
5657
hadoop-client-api = { module = "org.apache.hadoop:hadoop-client-api", version.ref = "hadoop" }
5758
hadoop-client-runtime = { module = "org.apache.hadoop:hadoop-client-runtime", version.ref = "hadoop" }
5859
hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" }
60+
hive-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive" }
5961
hadoop-hdfs-client = { module = "org.apache.hadoop:hadoop-hdfs-client", version.ref = "hadoop" }
6062
hawkular-agent-prometheus-scraper = { module = "org.hawkular.agent:prometheus-scraper", version = "0.23.0.Final" }
6163
immutables-builder = { module = "org.immutables:builder", version.ref = "immutables" }

gradle/projects.main.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ polaris-version=tools/version
4343
polaris-misc-types=tools/misc-types
4444
polaris-persistence-varint=nosql/persistence/varint
4545
polaris-extensions-federation-hadoop=extensions/federation/hadoop
46+
polaris-extensions-federation-hive=extensions/federation/hive
4647

4748
polaris-config-docs-annotations=tools/config-docs/annotations
4849
polaris-config-docs-generator=tools/config-docs/generator

polaris-core/src/main/java/org/apache/polaris/core/connection/ConnectionConfigInfoDpo.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@
3434
import java.util.Map;
3535
import org.apache.polaris.core.admin.model.ConnectionConfigInfo;
3636
import org.apache.polaris.core.admin.model.HadoopConnectionConfigInfo;
37+
import org.apache.polaris.core.admin.model.HiveConnectionConfigInfo;
3738
import org.apache.polaris.core.admin.model.IcebergRestConnectionConfigInfo;
3839
import org.apache.polaris.core.connection.hadoop.HadoopConnectionConfigInfoDpo;
40+
import org.apache.polaris.core.connection.hive.HiveConnectionConfigInfoDpo;
3941
import org.apache.polaris.core.connection.iceberg.IcebergCatalogPropertiesProvider;
4042
import org.apache.polaris.core.connection.iceberg.IcebergRestConnectionConfigInfoDpo;
4143
import org.apache.polaris.core.identity.dpo.ServiceIdentityInfoDpo;
@@ -54,6 +56,7 @@
5456
@JsonSubTypes({
5557
@JsonSubTypes.Type(value = IcebergRestConnectionConfigInfoDpo.class, name = "1"),
5658
@JsonSubTypes.Type(value = HadoopConnectionConfigInfoDpo.class, name = "2"),
59+
@JsonSubTypes.Type(value = HiveConnectionConfigInfoDpo.class, name = "3"),
5760
})
5861
public abstract class ConnectionConfigInfoDpo implements IcebergCatalogPropertiesProvider {
5962
private static final Logger logger = LoggerFactory.getLogger(ConnectionConfigInfoDpo.class);
@@ -144,6 +147,11 @@ public static ConnectionConfigInfoDpo deserialize(final @Nonnull String jsonStr)
144147
protected void validateUri(String uri) {
145148
try {
146149
URI uriObj = URI.create(uri);
150+
if (connectionTypeCode == ConnectionType.HIVE.getCode()
151+
&& uriObj.getScheme().equals("thrift")) {
152+
// Hive metastore runs a thrift server.
153+
return;
154+
}
147155
URL url = uriObj.toURL();
148156
} catch (IllegalArgumentException | MalformedURLException e) {
149157
throw new IllegalArgumentException("Invalid remote URI: " + uri, e);
@@ -187,6 +195,16 @@ public static ConnectionConfigInfoDpo fromConnectionConfigInfoModelWithSecrets(
187195
null /*Service Identity Info*/,
188196
hadoopConfigModel.getWarehouse());
189197
break;
198+
case HIVE:
199+
HiveConnectionConfigInfo hiveConfigModel =
200+
(HiveConnectionConfigInfo) connectionConfigurationModel;
201+
authenticationParameters =
202+
AuthenticationParametersDpo.fromAuthenticationParametersModelWithSecrets(
203+
hiveConfigModel.getAuthenticationParameters(), secretReferences);
204+
config =
205+
new HiveConnectionConfigInfoDpo(
206+
hiveConfigModel.getUri(), authenticationParameters, hiveConfigModel.getWarehouse());
207+
break;
190208
default:
191209
throw new IllegalStateException(
192210
"Unsupported connection type: " + connectionConfigurationModel.getConnectionType());

polaris-core/src/main/java/org/apache/polaris/core/connection/ConnectionType.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@ public enum ConnectionType {
3333
NULL_TYPE(0),
3434
ICEBERG_REST(1),
3535
HADOOP(2),
36+
HIVE(3),
3637
;
3738

3839
public static final String ICEBERG_REST_FACTORY_IDENTIFIER = "iceberg_rest";
3940
public static final String HADOOP_FACTORY_IDENTIFIER = "hadoop";
41+
public static final String HIVE_FACTORY_IDENTIFIER = "hive";
4042

4143
private static final ConnectionType[] REVERSE_MAPPING_ARRAY;
4244

@@ -93,6 +95,8 @@ public String getFactoryIdentifier() {
9395
return ICEBERG_REST_FACTORY_IDENTIFIER;
9496
case HADOOP:
9597
return HADOOP_FACTORY_IDENTIFIER;
98+
case HIVE:
99+
return HIVE_FACTORY_IDENTIFIER;
96100
default:
97101
throw new UnsupportedOperationException(
98102
"No factory identifier for connection type: " + this);
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.polaris.core.connection.hive;
20+
21+
import com.fasterxml.jackson.annotation.JsonProperty;
22+
import com.google.common.base.MoreObjects;
23+
import jakarta.annotation.Nonnull;
24+
import jakarta.annotation.Nullable;
25+
import java.util.HashMap;
26+
import java.util.Map;
27+
import org.apache.iceberg.CatalogProperties;
28+
import org.apache.polaris.core.admin.model.ConnectionConfigInfo;
29+
import org.apache.polaris.core.admin.model.HiveConnectionConfigInfo;
30+
import org.apache.polaris.core.connection.AuthenticationParametersDpo;
31+
import org.apache.polaris.core.connection.ConnectionConfigInfoDpo;
32+
import org.apache.polaris.core.connection.ConnectionType;
33+
import org.apache.polaris.core.secrets.UserSecretsManager;
34+
35+
/**
36+
* The internal persistence-object counterpart to {@link
37+
* org.apache.polaris.core.admin.model.HiveConnectionConfigInfo} defined in the API model.
38+
*/
39+
public class HiveConnectionConfigInfoDpo extends ConnectionConfigInfoDpo {
40+
41+
private final String warehouse;
42+
43+
public HiveConnectionConfigInfoDpo(
44+
@JsonProperty(value = "uri", required = true) @Nonnull String uri,
45+
@JsonProperty(value = "authenticationParameters", required = false) @Nullable
46+
AuthenticationParametersDpo authenticationParameters,
47+
@JsonProperty(value = "warehouse", required = false) @Nullable String warehouse) {
48+
super(ConnectionType.HIVE.getCode(), uri, authenticationParameters);
49+
this.warehouse = warehouse;
50+
}
51+
52+
public String getWarehouse() {
53+
return warehouse;
54+
}
55+
56+
@Override
57+
public String toString() {
58+
return MoreObjects.toStringHelper(this)
59+
.add("connectionTypeCode", getConnectionTypeCode())
60+
.add("uri", getUri())
61+
.add("warehouse", getWarehouse())
62+
.add("authenticationParameters", getAuthenticationParameters().toString())
63+
.toString();
64+
}
65+
66+
@Override
67+
public @Nonnull Map<String, String> asIcebergCatalogProperties(
68+
UserSecretsManager secretsManager) {
69+
HashMap<String, String> properties = new HashMap<>();
70+
properties.put(CatalogProperties.URI, getUri());
71+
if (getWarehouse() != null) {
72+
properties.put(CatalogProperties.WAREHOUSE_LOCATION, getWarehouse());
73+
}
74+
if (getAuthenticationParameters() != null) {
75+
properties.putAll(getAuthenticationParameters().asIcebergCatalogProperties(secretsManager));
76+
}
77+
return properties;
78+
}
79+
80+
@Override
81+
public ConnectionConfigInfo asConnectionConfigInfoModel() {
82+
return HiveConnectionConfigInfo.builder()
83+
.setConnectionType(ConnectionConfigInfo.ConnectionTypeEnum.HIVE)
84+
.setUri(getUri())
85+
.setWarehouse(getWarehouse())
86+
.setAuthenticationParameters(
87+
getAuthenticationParameters().asAuthenticationParametersModel())
88+
.build();
89+
}
90+
}

runtime/server/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ dependencies {
5050
runtimeOnly("io.quarkus:quarkus-jdbc-postgresql")
5151
runtimeOnly(project(":polaris-extensions-federation-hadoop"))
5252

53+
if ((project.findProperty("NonRESTCatalogs") as String?)?.contains("HIVE") == true) {
54+
runtimeOnly(project(":polaris-extensions-federation-hive"))
55+
}
56+
5357
// enforce the Quarkus _platform_ here, to get a consistent and validated set of dependencies
5458
implementation(enforcedPlatform(libs.quarkus.bom))
5559
implementation("io.quarkus:quarkus-container-image-docker")

0 commit comments

Comments
 (0)