Skip to content

Commit efa4a18

Browse files
Support HMS Federation
1 parent b2b1453 commit efa4a18

File tree

9 files changed

+257
-0
lines changed

9 files changed

+257
-0
lines changed
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
plugins {
21+
id("polaris-client")
22+
alias(libs.plugins.jandex)
23+
}
24+
25+
dependencies {
26+
// Polaris dependencies
27+
implementation(project(":polaris-core"))
28+
29+
implementation(platform(libs.iceberg.bom))
30+
implementation("org.apache.iceberg:iceberg-api")
31+
implementation("org.apache.iceberg:iceberg-core")
32+
implementation("org.apache.iceberg:iceberg-common")
33+
// Use iceberg-hive-metastore but exclude conflicting hive dependencies
34+
implementation("org.apache.iceberg:iceberg-hive-metastore") {
35+
exclude(group = "org.apache.hive")
36+
}
37+
// Add our own Hive 4.1.0 dependencies
38+
implementation(libs.hive.metastore) {
39+
exclude("org.slf4j", "slf4j-reload4j")
40+
exclude("org.slf4j", "slf4j-log4j12")
41+
exclude("ch.qos.reload4j", "reload4j")
42+
exclude("log4j", "log4j")
43+
exclude("org.apache.zookeeper", "zookeeper")
44+
}
45+
46+
// Hadoop dependencies
47+
implementation(libs.hadoop.common) {
48+
exclude("org.slf4j", "slf4j-reload4j")
49+
exclude("org.slf4j", "slf4j-log4j12")
50+
exclude("ch.qos.reload4j", "reload4j")
51+
exclude("log4j", "log4j")
52+
exclude("org.apache.zookeeper", "zookeeper")
53+
exclude("org.apache.hadoop.thirdparty", "hadoop-shaded-protobuf_3_25")
54+
exclude("com.github.pjfanning", "jersey-json")
55+
exclude("com.sun.jersey", "jersey-core")
56+
exclude("com.sun.jersey", "jersey-server")
57+
exclude("com.sun.jersey", "jersey-servlet")
58+
exclude("io.dropwizard.metrics", "metrics-core")
59+
}
60+
61+
// CDI dependencies for runtime discovery
62+
implementation(libs.jakarta.enterprise.cdi.api)
63+
implementation(libs.smallrye.common.annotation)
64+
65+
// Logging
66+
implementation(libs.slf4j.api)
67+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.polaris.extensions.federation.hive;
20+
21+
import io.smallrye.common.annotation.Identifier;
22+
import jakarta.enterprise.context.ApplicationScoped;
23+
import org.apache.iceberg.catalog.Catalog;
24+
import org.apache.iceberg.hive.HiveCatalog;
25+
import org.apache.polaris.core.catalog.ExternalCatalogFactory;
26+
import org.apache.polaris.core.connection.AuthenticationParametersDpo;
27+
import org.apache.polaris.core.connection.AuthenticationType;
28+
import org.apache.polaris.core.connection.ConnectionConfigInfoDpo;
29+
import org.apache.polaris.core.connection.ConnectionType;
30+
import org.apache.polaris.core.connection.hive.HiveConnectionConfigInfoDpo;
31+
import org.apache.polaris.core.secrets.UserSecretsManager;
32+
import org.slf4j.Logger;
33+
import org.slf4j.LoggerFactory;
34+
35+
/** Factory class for creating a Hive catalog handle based on connection configuration. */
36+
@ApplicationScoped
37+
@Identifier(ConnectionType.HIVE_FACTORY_IDENTIFIER)
38+
public class HiveFederatedCatalogFactory implements ExternalCatalogFactory {
39+
private static final Logger LOGGER = LoggerFactory.getLogger(HiveFederatedCatalogFactory.class);
40+
41+
@Override
42+
public Catalog createCatalog(
43+
ConnectionConfigInfoDpo connectionConfigInfoDpo, UserSecretsManager userSecretsManager) {
44+
// Currently, Polaris supports Hive federation only via IMPLICIT authentication.
45+
// Hence, prior to initializing the configuration, ensure that the catalog uses
46+
// IMPLICIT authentication.
47+
AuthenticationParametersDpo authenticationParametersDpo =
48+
connectionConfigInfoDpo.getAuthenticationParameters();
49+
if (authenticationParametersDpo.getAuthenticationTypeCode()
50+
!= AuthenticationType.IMPLICIT.getCode()) {
51+
throw new IllegalStateException("Hive federation only supports IMPLICIT authentication.");
52+
}
53+
String warehouse = ((HiveConnectionConfigInfoDpo) connectionConfigInfoDpo).getWarehouse();
54+
// Unlike Hadoop, HiveCatalog does not require us to create a Configuration object, the iceberg
55+
// rest library find the default configuration by reading hive-site.xml in the classpath
56+
// (including HADOOP_CONF_DIR classpath).
57+
HiveCatalog hiveCatalog = new HiveCatalog();
58+
hiveCatalog.initialize(
59+
warehouse, connectionConfigInfoDpo.asIcebergCatalogProperties(userSecretsManager));
60+
return hiveCatalog;
61+
}
62+
}

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
[versions]
2121
checkstyle = "10.25.0"
2222
hadoop = "3.4.1"
23+
hive = "3.1.3"
2324
iceberg = "1.9.2" # Ensure to update the iceberg version in regtests to keep regtests up-to-date
2425
quarkus = "3.25.0"
2526
immutables = "2.11.3"
@@ -56,6 +57,7 @@ dnsjava = { module = "dnsjava:dnsjava", version = "3.6.3" }
5657
hadoop-client-api = { module = "org.apache.hadoop:hadoop-client-api", version.ref = "hadoop" }
5758
hadoop-client-runtime = { module = "org.apache.hadoop:hadoop-client-runtime", version.ref = "hadoop" }
5859
hadoop-common = { module = "org.apache.hadoop:hadoop-common", version.ref = "hadoop" }
60+
hive-metastore = { module = "org.apache.hive:hive-metastore", version.ref = "hive" }
5961
hadoop-hdfs-client = { module = "org.apache.hadoop:hadoop-hdfs-client", version.ref = "hadoop" }
6062
hawkular-agent-prometheus-scraper = { module = "org.hawkular.agent:prometheus-scraper", version = "0.23.0.Final" }
6163
immutables-builder = { module = "org.immutables:builder", version.ref = "immutables" }

gradle/projects.main.properties

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ polaris-version=tools/version
4343
polaris-misc-types=tools/misc-types
4444
polaris-persistence-varint=nosql/persistence/varint
4545
polaris-extensions-federation-hadoop=extensions/federation/hadoop
46+
polaris-extensions-federation-hive=extensions/federation/hive
4647

4748
polaris-config-docs-annotations=tools/config-docs/annotations
4849
polaris-config-docs-generator=tools/config-docs/generator

polaris-core/src/main/java/org/apache/polaris/core/connection/ConnectionConfigInfoDpo.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
import java.util.Map;
3333
import org.apache.polaris.core.admin.model.ConnectionConfigInfo;
3434
import org.apache.polaris.core.admin.model.HadoopConnectionConfigInfo;
35+
import org.apache.polaris.core.admin.model.HiveConnectionConfigInfo;
3536
import org.apache.polaris.core.admin.model.IcebergRestConnectionConfigInfo;
3637
import org.apache.polaris.core.connection.hadoop.HadoopConnectionConfigInfoDpo;
38+
import org.apache.polaris.core.connection.hive.HiveConnectionConfigInfoDpo;
3739
import org.apache.polaris.core.connection.iceberg.IcebergCatalogPropertiesProvider;
3840
import org.apache.polaris.core.connection.iceberg.IcebergRestConnectionConfigInfoDpo;
3941
import org.apache.polaris.core.secrets.UserSecretReference;
@@ -51,6 +53,7 @@
5153
@JsonSubTypes({
5254
@JsonSubTypes.Type(value = IcebergRestConnectionConfigInfoDpo.class, name = "1"),
5355
@JsonSubTypes.Type(value = HadoopConnectionConfigInfoDpo.class, name = "2"),
56+
@JsonSubTypes.Type(value = HiveConnectionConfigInfoDpo.class, name = "3"),
5457
})
5558
public abstract class ConnectionConfigInfoDpo implements IcebergCatalogPropertiesProvider {
5659
private static final Logger logger = LoggerFactory.getLogger(ConnectionConfigInfoDpo.class);
@@ -125,6 +128,11 @@ public static ConnectionConfigInfoDpo deserialize(final @Nonnull String jsonStr)
125128
protected void validateUri(String uri) {
126129
try {
127130
URI uriObj = URI.create(uri);
131+
if (connectionTypeCode == ConnectionType.HIVE.getCode()
132+
&& uriObj.getScheme().equals("thrift")) {
133+
// Hive metastore runs a thrift server.
134+
return;
135+
}
128136
URL url = uriObj.toURL();
129137
} catch (IllegalArgumentException | MalformedURLException e) {
130138
throw new IllegalArgumentException("Invalid remote URI: " + uri, e);
@@ -166,6 +174,16 @@ public static ConnectionConfigInfoDpo fromConnectionConfigInfoModelWithSecrets(
166174
authenticationParameters,
167175
hadoopConfigModel.getWarehouse());
168176
break;
177+
case HIVE:
178+
HiveConnectionConfigInfo hiveConfigModel =
179+
(HiveConnectionConfigInfo) connectionConfigurationModel;
180+
authenticationParameters =
181+
AuthenticationParametersDpo.fromAuthenticationParametersModelWithSecrets(
182+
hiveConfigModel.getAuthenticationParameters(), secretReferences);
183+
config =
184+
new HiveConnectionConfigInfoDpo(
185+
hiveConfigModel.getUri(), authenticationParameters, hiveConfigModel.getWarehouse());
186+
break;
169187
default:
170188
throw new IllegalStateException(
171189
"Unsupported connection type: " + connectionConfigurationModel.getConnectionType());

polaris-core/src/main/java/org/apache/polaris/core/connection/ConnectionType.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,12 @@ public enum ConnectionType {
3333
NULL_TYPE(0),
3434
ICEBERG_REST(1),
3535
HADOOP(2),
36+
HIVE(3),
3637
;
3738

3839
public static final String ICEBERG_REST_FACTORY_IDENTIFIER = "iceberg_rest";
3940
public static final String HADOOP_FACTORY_IDENTIFIER = "hadoop";
41+
public static final String HIVE_FACTORY_IDENTIFIER = "hive";
4042

4143
private static final ConnectionType[] REVERSE_MAPPING_ARRAY;
4244

@@ -93,6 +95,8 @@ public String getFactoryIdentifier() {
9395
return ICEBERG_REST_FACTORY_IDENTIFIER;
9496
case HADOOP:
9597
return HADOOP_FACTORY_IDENTIFIER;
98+
case HIVE:
99+
return HIVE_FACTORY_IDENTIFIER;
96100
default:
97101
throw new UnsupportedOperationException(
98102
"No factory identifier for connection type: " + this);
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.polaris.core.connection.hive;
20+
21+
import com.fasterxml.jackson.annotation.JsonProperty;
22+
import com.google.common.base.MoreObjects;
23+
import jakarta.annotation.Nonnull;
24+
import jakarta.annotation.Nullable;
25+
import java.util.HashMap;
26+
import java.util.Map;
27+
import org.apache.iceberg.CatalogProperties;
28+
import org.apache.polaris.core.admin.model.ConnectionConfigInfo;
29+
import org.apache.polaris.core.admin.model.HiveConnectionConfigInfo;
30+
import org.apache.polaris.core.connection.AuthenticationParametersDpo;
31+
import org.apache.polaris.core.connection.ConnectionConfigInfoDpo;
32+
import org.apache.polaris.core.connection.ConnectionType;
33+
import org.apache.polaris.core.secrets.UserSecretsManager;
34+
35+
/**
36+
* The internal persistence-object counterpart to {@link
37+
* org.apache.polaris.core.admin.model.HiveConnectionConfigInfo} defined in the API model.
38+
*/
39+
public class HiveConnectionConfigInfoDpo extends ConnectionConfigInfoDpo {
40+
41+
private final String warehouse;
42+
43+
public HiveConnectionConfigInfoDpo(
44+
@JsonProperty(value = "uri", required = true) @Nonnull String uri,
45+
@JsonProperty(value = "authenticationParameters", required = false) @Nullable
46+
AuthenticationParametersDpo authenticationParameters,
47+
@JsonProperty(value = "warehouse", required = false) @Nullable String warehouse) {
48+
super(ConnectionType.HIVE.getCode(), uri, authenticationParameters);
49+
this.warehouse = warehouse;
50+
}
51+
52+
public String getWarehouse() {
53+
return warehouse;
54+
}
55+
56+
@Override
57+
public String toString() {
58+
return MoreObjects.toStringHelper(this)
59+
.add("connectionTypeCode", getConnectionTypeCode())
60+
.add("uri", getUri())
61+
.add("warehouse", getWarehouse())
62+
.add("authenticationParameters", getAuthenticationParameters().toString())
63+
.toString();
64+
}
65+
66+
@Override
67+
public @Nonnull Map<String, String> asIcebergCatalogProperties(
68+
UserSecretsManager secretsManager) {
69+
HashMap<String, String> properties = new HashMap<>();
70+
properties.put(CatalogProperties.URI, getUri());
71+
if (getWarehouse() != null) {
72+
properties.put(CatalogProperties.WAREHOUSE_LOCATION, getWarehouse());
73+
}
74+
if (getAuthenticationParameters() != null) {
75+
properties.putAll(getAuthenticationParameters().asIcebergCatalogProperties(secretsManager));
76+
}
77+
return properties;
78+
}
79+
80+
@Override
81+
public ConnectionConfigInfo asConnectionConfigInfoModel() {
82+
return HiveConnectionConfigInfo.builder()
83+
.setConnectionType(ConnectionConfigInfo.ConnectionTypeEnum.HIVE)
84+
.setUri(getUri())
85+
.setWarehouse(getWarehouse())
86+
.setAuthenticationParameters(
87+
getAuthenticationParameters().asAuthenticationParametersModel())
88+
.build();
89+
}
90+
}

runtime/server/build.gradle.kts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ dependencies {
4949
runtimeOnly(project(":polaris-relational-jdbc"))
5050
runtimeOnly("io.quarkus:quarkus-jdbc-postgresql")
5151
runtimeOnly(project(":polaris-extensions-federation-hadoop"))
52+
runtimeOnly(project(":polaris-extensions-federation-hive"))
5253

5354
// enforce the Quarkus _platform_ here, to get a consistent and validated set of dependencies
5455
implementation(enforcedPlatform(libs.quarkus.bom))

spec/polaris-management-service.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -869,6 +869,7 @@ components:
869869
enum:
870870
- ICEBERG_REST
871871
- HADOOP
872+
- HIVE
872873
description: The type of remote catalog service represented by this connection
873874
uri:
874875
type: string
@@ -884,6 +885,7 @@ components:
884885
mapping:
885886
ICEBERG_REST: "#/components/schemas/IcebergRestConnectionConfigInfo"
886887
HADOOP: "#/components/schemas/HadoopConnectionConfigInfo"
888+
HIVE: "#/components/schemas/HiveConnectionConfigInfo"
887889

888890
IcebergRestConnectionConfigInfo:
889891
type: object
@@ -906,6 +908,16 @@ components:
906908
warehouse:
907909
type: string
908910
description: The file path to where this catalog should store tables
911+
912+
HiveConnectionConfigInfo:
913+
type: object
914+
description: Configuration necessary for connecting to a Hive Catalog
915+
allOf:
916+
- $ref: '#/components/schemas/ConnectionConfigInfo'
917+
properties:
918+
warehouse:
919+
type: string
920+
description: The warehouse location for the hive catalog.
909921

910922
AuthenticationParameters:
911923
type: object

0 commit comments

Comments
 (0)