From 58454f37d27f749199a9cb82f40aadd4e896df7a Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 7 Mar 2023 12:32:29 +0100 Subject: [PATCH] Add ORC support --- spark/Dockerfile | 3 ++- spark/spark-defaults.conf | 6 ++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/spark/Dockerfile b/spark/Dockerfile index 53630b8..bffa396 100644 --- a/spark/Dockerfile +++ b/spark/Dockerfile @@ -60,7 +60,8 @@ RUN curl https://dlcdn.apache.org/spark/spark-${SPARK_VERSION}/spark-${SPARK_VER && rm -rf spark-${SPARK_VERSION}-bin-hadoop3.tgz # Download iceberg spark runtime -RUN curl https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.1.0/iceberg-spark-runtime-3.3_2.12-1.1.0.jar -Lo /opt/spark/jars/iceberg-spark-runtime-3.3_2.12-1.1.0.jar +#RUN curl https://repo1.maven.org/maven2/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.1.0/iceberg-spark-runtime-3.3_2.12-1.1.0.jar -Lo /opt/spark/jars/iceberg-spark-runtime-3.3_2.12-1.1.0.jar +RUN curl https://repository.apache.org/content/groups/snapshots/org/apache/iceberg/iceberg-spark-runtime-3.3_2.12/1.2.0-SNAPSHOT/iceberg-spark-runtime-3.3_2.12-1.2.0-20230307.001541-120.jar -Lo /opt/spark/jars/iceberg-spark-runtime-3.3_2.12-1.2.0.jar ENV TABULAR_VERSION=0.50.4 diff --git a/spark/spark-defaults.conf b/spark/spark-defaults.conf index 3d40e64..9ec8240 100755 --- a/spark/spark-defaults.conf +++ b/spark/spark-defaults.conf @@ -25,9 +25,15 @@ spark.sql.catalog.demo.catalog-impl org.apache.iceberg.rest.RESTCatalog spark.sql.catalog.demo.uri http://rest:8181 spark.sql.catalog.demo.io-impl org.apache.iceberg.aws.s3.S3FileIO spark.sql.catalog.demo.warehouse s3a://warehouse/wh/ +spark.sql.catalog.demo.s3.access-key-id admin +spark.sql.catalog.demo.s3.secret-access-key password spark.sql.catalog.demo.s3.endpoint http://minio:9000 +spark.sql.catalog.demo.hadoop.fs.s3a.access.key admin +spark.sql.catalog.demo.hadoop.fs.s3a.secret.key password +spark.sql.catalog.demo.hadoop.fs.s3a.endpoint http://minio:9000 spark.sql.defaultCatalog demo spark.eventLog.enabled true spark.eventLog.dir /home/iceberg/spark-events spark.history.fs.logDirectory /home/iceberg/spark-events spark.sql.catalogImplementation in-memory +spark.jars.packages org.apache.hadoop:hadoop-aws:3.3.4