Skip to content

Commit 4bb1899

Browse files
authored
feat(hive): Build hive-metastore-opa-authorizer from source (#1340)
* hive 4.0.1 is building * move env var definition * added todo * 3.1.3 building and working with patchable * use patchable for all versions * use non-shaded opa-authz jar for 4.x.x * adapted changelog * use local maven jars * mention hive 4.0.0 removal in changelog * set mvn authorizer version * use bash shell for regex operations * use bash in hive docker file * fix jar names
1 parent 576b002 commit 4bb1899

File tree

7 files changed

+126
-14
lines changed

7 files changed

+126
-14
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,21 @@ All notable changes to this project will be documented in this file.
77
### Added
88

99
- superset: Add 6.0.0-rc2 ([#1337]).
10+
- hive: Build [hive-metastore-opa-authorizer](https://github.com/boschglobal/hive-metastore-opa-authorizer) from source and add to image ([#1340]).
1011

1112
### Changed
1213

1314
- airflow: Extend list of providers for 3.0.6 ([#1336])
1415
- airflow: Bump celery version to 5.5.3 for Airflow 3.x ([#1343]).
1516

17+
### Removed
18+
19+
- hive: Remove `4.0.0` ([#1340]).
20+
1621
[#1336]: https://github.com/stackabletech/docker-images/pull/1336
1722
[#1337]: https://github.com/stackabletech/docker-images/pull/1337
1823
[#1343]: https://github.com/stackabletech/docker-images/pull/1343
24+
[#1340]: https://github.com/stackabletech/docker-images/pull/1340
1925

2026
## [25.11.0] - 2025-11-07
2127

hive/Dockerfile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# check=error=true
33

44
FROM local-image/hadoop/hadoop AS hadoop-builder
5+
FROM local-image/hive/hive-metastore-opa-authorizer AS hive-metastore-opa-authorizer-builder
56

67
FROM local-image/java-devel AS hive-builder
78

@@ -44,6 +45,10 @@ ENV NEW_VERSION="${PRODUCT_VERSION}-stackable${RELEASE_VERSION}"
4445
# thus taking a bit (which is annoying while development)
4546
RUN /stackable/patchable --images-repo-root=src checkout hive ${PRODUCT_VERSION} > /tmp/HIVE_SOURCE_DIR
4647

48+
# Use bash for regex machting, otherwise docker lint is complaining:
49+
# hive/Dockerfile:51 SC3014 warning: In POSIX sh, == in place of = is undefined.
50+
SHELL ["/bin/bash", "-c"]
51+
4752
# Make expensive maven build a separate layer for better caching
4853
# Cache mounts are owned by root by default
4954
# We need to explicitly give the uid to use
@@ -184,6 +189,8 @@ COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hive-${PRODU
184189
COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION} /stackable/hadoop-${HADOOP_VERSION}-stackable${RELEASE_VERSION}
185190
COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/*-src.tar.gz /stackable
186191
COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-builder /stackable/jmx /stackable/jmx
192+
COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-metastore-opa-authorizer-builder /stackable/opa-authorizer-bin /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/lib
193+
COPY --chown=${STACKABLE_USER_UID}:0 --from=hive-metastore-opa-authorizer-builder /stackable/opa-authorizer-src /stackable
187194
COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/jmx /stackable/jmx
188195
COPY --chown=${STACKABLE_USER_UID}:0 hive/stackable/bin/start-metastore /stackable/apache-hive-metastore-${PRODUCT_VERSION}-stackable${RELEASE_VERSION}-bin/bin
189196

@@ -230,8 +237,8 @@ EOF
230237

231238
USER ${STACKABLE_USER_UID}
232239

233-
ENV HADOOP_HOME=/stackable/hadoop
234240
ENV HIVE_HOME=/stackable/hive-metastore
241+
ENV HADOOP_HOME=/stackable/hadoop
235242
ENV PATH="${PATH}":/stackable/hadoop/bin:/stackable/hive-metastore/bin
236243

237244
# The following 2 env-vars are required for common hadoop scripts even if the respective libraries are never used.

hive/boil-config.toml

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
java-base = "11"
44
java-devel = "8"
55
"hadoop/hadoop" = "3.3.6"
6+
# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer
7+
"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-3.1.3-hadoop-3.3.6"
68

79
[versions."3.1.3".build-arguments]
810
jmx-exporter-version = "1.3.0"
@@ -11,24 +13,13 @@ aws-java-sdk-bundle-version = "1.12.367"
1113
azure-storage-version = "7.0.1"
1214
azure-keyvault-core-version = "1.0.0"
1315

14-
[versions."4.0.0".local-images]
15-
# Hive 4 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
16-
java-base = "11"
17-
java-devel = "8"
18-
"hadoop/hadoop" = "3.3.6"
19-
20-
[versions."4.0.0".build-arguments]
21-
jmx-exporter-version = "1.3.0"
22-
# Keep consistent with the dependency from Hadoop: https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-aws/3.3.6
23-
aws-java-sdk-bundle-version = "1.12.367"
24-
azure-storage-version = "7.0.1"
25-
azure-keyvault-core-version = "1.0.0"
26-
2716
[versions."4.0.1".local-images]
2817
# Hive 4.0 must be built with Java 8 (according to GitHub README) but seems to run on Java 11
2918
java-base = "11"
3019
java-devel = "8"
3120
"hadoop/hadoop" = "3.3.6"
21+
# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer
22+
"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.0.1-hadoop-3.3.6"
3223

3324
[versions."4.0.1".build-arguments]
3425
jmx-exporter-version = "1.3.0"
@@ -42,6 +33,8 @@ azure-keyvault-core-version = "1.0.0"
4233
java-base = "17"
4334
java-devel = "17"
4435
"hadoop/hadoop" = "3.4.2"
36+
# hive-metastore-opa-authorizer from: https://github.com/boschglobal/hive-metastore-opa-authorizer
37+
"hive/hive-metastore-opa-authorizer" = "v1.0.0-hive-4.1.0-hadoop-3.4.2"
4538

4639
[versions."4.1.0".build-arguments]
4740
jmx-exporter-version = "1.3.0"
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# syntax=docker/dockerfile:1.16.0@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7
2+
# check=error=true
3+
4+
FROM local-image/hadoop/hadoop AS hadoop-builder
5+
6+
FROM local-image/java-devel
7+
8+
ARG AUTHORIZER_VERSION
9+
ARG HIVE_VERSION
10+
ARG HADOOP_HADOOP_VERSION
11+
# Reassign the arg to `HADOOP_VERSION` for better readability.
12+
ENV HADOOP_VERSION=${HADOOP_HADOOP_VERSION}
13+
ARG STACKABLE_USER_UID
14+
# Setting this to anything other than "true" will keep the cache folders around (e.g. for Maven, NPM etc.)
15+
# This can be used to speed up builds when disk space is of no concern.
16+
ARG DELETE_CACHES="true"
17+
18+
USER ${STACKABLE_USER_UID}
19+
WORKDIR /stackable
20+
21+
COPY --chown=${STACKABLE_USER_UID}:0 hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml /stackable/src/hive/hive-metastore-opa-authorizer/stackable/patches/patchable.toml
22+
COPY --chown=${STACKABLE_USER_UID}:0 hive/hive-metastore-opa-authorizer/stackable/patches/${AUTHORIZER_VERSION} /stackable/src/hive/hive-metastore-opa-authorizer/stackable/patches/${AUTHORIZER_VERSION}
23+
24+
COPY --chown=${STACKABLE_USER_UID}:0 --from=hadoop-builder /stackable/patched-libs /stackable/patched-libs
25+
26+
# Use bash for regex machting, otherwise docker lint is complaining:
27+
# hive/hive-metastore-opa-authorizer/Dockerfile:26 SC3015 warning: In POSIX sh, =~ regex matching is undefined.
28+
SHELL ["/bin/bash", "-c"]
29+
30+
# Make expensive maven build a separate layer for better caching
31+
# Cache mounts are owned by root by default
32+
# We need to explicitly give the uid to use
33+
RUN --mount=type=cache,id=maven-hive-metastore-opa-authorizer-${AUTHORIZER_VERSION},uid=${STACKABLE_USER_UID},target=/stackable/.m2/repository <<EOF
34+
# for moving nested artifacts out of target folder
35+
mkdir -p /stackable/opa-authorizer-bin
36+
# for moving sources out of target folder
37+
mkdir -p /stackable/opa-authorizer-src
38+
39+
cd "$(/stackable/patchable --images-repo-root=src checkout hive/hive-metastore-opa-authorizer ${AUTHORIZER_VERSION})"
40+
41+
# Create snapshot of the source code including custom patches
42+
tar -czf /stackable/opa-authorizer-src/hive-metastore-opa-authorizer-${AUTHORIZER_VERSION}-hive-${HIVE_VERSION}-hadoop-${HADOOP_VERSION}-src.tar.gz .
43+
44+
# Make Maven aware of custom Stackable libraries
45+
cp -r /stackable/patched-libs/maven/* /stackable/.m2/repository
46+
47+
# Set version in the output jars(s)
48+
mvn versions:set -DnewVersion=${AUTHORIZER_VERSION}
49+
50+
# The if part can be removed once we do no longer support Hive 3.x.x
51+
if [[ "${HIVE_VERSION}" =~ ^3 ]]; then
52+
mvn clean package -DskipTests -Dhive.version=${HIVE_VERSION} -Dhadoop.version=${HADOOP_VERSION} -f hms-v3/pom.xml
53+
54+
mv hms-v3/target/com.bosch.bdps.hms3-${HIVE_VERSION}-${HADOOP_VERSION}-${AUTHORIZER_VERSION}.jar /stackable/opa-authorizer-bin
55+
else
56+
mvn clean package -DskipTests -Dhive.version=${HIVE_VERSION} -Dhadoop.version=${HADOOP_VERSION} -f hms-v4/pom.xml
57+
58+
# The hive-metastore-opa-authorizer offers a shaded jar from version 4.x.x. Using the shaded jar leads to problems with schema tool at pod startup.
59+
# mv hms-v4/target/com.bosch.bdps.hms4-${HIVE_VERSION}-${HADOOP_VERSION}-dev.jar /stackable/opa-authorizer-bin
60+
mv hms-v4/target/hms4-${AUTHORIZER_VERSION}.jar /stackable/opa-authorizer-bin
61+
fi
62+
63+
# We're removing these to make the intermediate layer smaller
64+
# This can be necessary even though it's only a builder image because the GitHub Action Runners only have very limited space available
65+
# and we are sometimes running into errors because we're out of space.
66+
# Therefore, we try to clean up all layers as much as possible.
67+
if [ "${DELETE_CACHES}" = "true" ] ; then
68+
rm -rf /stackable/.m2/repository/*
69+
rm -rf /stackable/.npm/*
70+
rm -rf /stackable/.cache/*
71+
rm -rf /stackable/src
72+
fi
73+
74+
# fix permissions
75+
chmod --recursive g=u /stackable/opa-authorizer-bin
76+
EOF
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[versions."v1.0.0-hive-3.1.3-hadoop-3.3.6".local-images]
2+
"java-devel" = "11"
3+
"hadoop/hadoop" = "3.3.6"
4+
5+
[versions."v1.0.0-hive-3.1.3-hadoop-3.3.6".build-arguments]
6+
authorizer-version = "v1.0.0"
7+
hive-version = "3.1.3"
8+
delete-caches = "true"
9+
10+
[versions."v1.0.0-hive-4.0.1-hadoop-3.3.6".local-images]
11+
"java-devel" = "11"
12+
"hadoop/hadoop" = "3.3.6"
13+
14+
[versions."v1.0.0-hive-4.0.1-hadoop-3.3.6".build-arguments]
15+
authorizer-version = "v1.0.0"
16+
hive-version = "4.0.1"
17+
delete-caches = "true"
18+
19+
[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".local-images]
20+
"java-devel" = "17"
21+
"hadoop/hadoop" = "3.4.2"
22+
23+
[versions."v1.0.0-hive-4.1.0-hadoop-3.4.2".build-arguments]
24+
authorizer-version = "v1.0.0"
25+
hive-version = "4.1.0"
26+
delete-caches = "true"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
upstream = "https://github.com/boschglobal/hive-metastore-opa-authorizer"
2+
default-mirror = "https://github.com/stackabletech/hive-metastore-opa-authorizer"
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
mirror = "https://github.com/stackabletech/hive-metastore-opa-authorizer"
2+
base = "1925fee7512d4afba4a9d83c303aa241d0e5412e"

0 commit comments

Comments
 (0)