|
| 1 | +/* |
| 2 | + * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | + * or more contributor license agreements. See the NOTICE file |
| 4 | + * distributed with this work for additional information |
| 5 | + * regarding copyright ownership. The ASF licenses this file |
| 6 | + * to you under the Apache License, Version 2.0 (the |
| 7 | + * "License"); you may not use this file except in compliance |
| 8 | + * with the License. You may obtain a copy of the License at |
| 9 | + * |
| 10 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | + * |
| 12 | + * Unless required by applicable law or agreed to in writing, |
| 13 | + * software distributed under the License is distributed on an |
| 14 | + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | + * KIND, either express or implied. See the License for the |
| 16 | + * specific language governing permissions and limitations |
| 17 | + * under the License. |
| 18 | + */ |
| 19 | + |
| 20 | +String sparkMajorVersion = '4.0' |
| 21 | +String scalaVersion = '2.13' |
| 22 | + |
| 23 | +JavaVersion javaVersion = JavaVersion.current() |
| 24 | +if (javaVersion != JavaVersion.VERSION_17 && javaVersion != JavaVersion.VERSION_21) { |
| 25 | + throw new GradleException("Spark 4.0 build requires JDK 17 or 21 but was executed with JDK " + javaVersion) |
| 26 | +} |
| 27 | + |
| 28 | +def sparkProjects = [ |
| 29 | + project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}"), |
| 30 | + project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}"), |
| 31 | + project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}"), |
| 32 | +] |
| 33 | + |
| 34 | +configure(sparkProjects) { |
| 35 | + configurations { |
| 36 | + all { |
| 37 | + resolutionStrategy { |
| 38 | + force "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}:${libs.versions.jackson215.get()}" |
| 39 | + force "com.fasterxml.jackson.core:jackson-databind:${libs.versions.jackson215.get()}" |
| 40 | + force "com.fasterxml.jackson.core:jackson-core:${libs.versions.jackson215.get()}" |
| 41 | + } |
| 42 | + } |
| 43 | + } |
| 44 | +} |
| 45 | + |
| 46 | +project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") { |
| 47 | + apply plugin: 'scala' |
| 48 | + apply plugin: 'com.github.alisiikh.scalastyle' |
| 49 | + |
| 50 | + sourceSets { |
| 51 | + main { |
| 52 | + scala.srcDirs = ['src/main/scala', 'src/main/java'] |
| 53 | + java.srcDirs = [] |
| 54 | + } |
| 55 | + } |
| 56 | + |
| 57 | + dependencies { |
| 58 | + implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow') |
| 59 | + api project(':iceberg-api') |
| 60 | + annotationProcessor libs.immutables.value |
| 61 | + compileOnly libs.immutables.value |
| 62 | + implementation project(':iceberg-common') |
| 63 | + implementation project(':iceberg-core') |
| 64 | + implementation project(':iceberg-data') |
| 65 | + implementation project(':iceberg-orc') |
| 66 | + implementation project(':iceberg-parquet') |
| 67 | + implementation project(':iceberg-arrow') |
| 68 | + implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}") |
| 69 | + implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}") |
| 70 | + |
| 71 | + compileOnly libs.errorprone.annotations |
| 72 | + compileOnly libs.avro.avro |
| 73 | + compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") { |
| 74 | + exclude group: 'org.apache.avro', module: 'avro' |
| 75 | + exclude group: 'org.apache.arrow' |
| 76 | + exclude group: 'org.apache.parquet' |
| 77 | + // to make sure netty libs only come from project(':iceberg-arrow') |
| 78 | + exclude group: 'io.netty', module: 'netty-buffer' |
| 79 | + exclude group: 'io.netty', module: 'netty-common' |
| 80 | + exclude group: 'org.roaringbitmap' |
| 81 | + } |
| 82 | + |
| 83 | + compileOnly "org.apache.datafusion:comet-spark-spark3.5_2.13:${libs.versions.comet.get()}" |
| 84 | + |
| 85 | + implementation libs.parquet.column |
| 86 | + implementation libs.parquet.hadoop |
| 87 | + |
| 88 | + implementation("${libs.orc.core.get().module}:${libs.versions.orc.get()}:nohive") { |
| 89 | + exclude group: 'org.apache.hadoop' |
| 90 | + exclude group: 'commons-lang' |
| 91 | + // These artifacts are shaded and included in the orc-core fat jar |
| 92 | + exclude group: 'com.google.protobuf', module: 'protobuf-java' |
| 93 | + exclude group: 'org.apache.hive', module: 'hive-storage-api' |
| 94 | + } |
| 95 | + |
| 96 | + implementation(libs.arrow.vector) { |
| 97 | + exclude group: 'io.netty', module: 'netty-buffer' |
| 98 | + exclude group: 'io.netty', module: 'netty-common' |
| 99 | + exclude group: 'com.google.code.findbugs', module: 'jsr305' |
| 100 | + } |
| 101 | + |
| 102 | + implementation libs.caffeine |
| 103 | + |
| 104 | + testImplementation(libs.hadoop3.minicluster) { |
| 105 | + exclude group: 'org.apache.avro', module: 'avro' |
| 106 | + // to make sure netty libs only come from project(':iceberg-arrow') |
| 107 | + exclude group: 'io.netty', module: 'netty-buffer' |
| 108 | + exclude group: 'io.netty', module: 'netty-common' |
| 109 | + } |
| 110 | + testImplementation project(path: ':iceberg-hive-metastore') |
| 111 | + testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| 112 | + testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') |
| 113 | + testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') |
| 114 | + testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts') |
| 115 | + testImplementation (project(path: ':iceberg-open-api', configuration: 'testFixturesRuntimeElements')) { |
| 116 | + transitive = false |
| 117 | + } |
| 118 | + testImplementation libs.sqlite.jdbc |
| 119 | + testImplementation libs.awaitility |
| 120 | + testImplementation(testFixtures(project(':iceberg-parquet'))) |
| 121 | + // runtime dependencies for running REST Catalog based integration test |
| 122 | + testRuntimeOnly libs.jetty.servlet |
| 123 | + } |
| 124 | + |
| 125 | + test { |
| 126 | + useJUnitPlatform() |
| 127 | + } |
| 128 | + |
| 129 | + tasks.withType(Test) { |
| 130 | + // Vectorized reads need more memory |
| 131 | + maxHeapSize '3160m' |
| 132 | + } |
| 133 | +} |
| 134 | + |
| 135 | +project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}") { |
| 136 | + apply plugin: 'java-library' |
| 137 | + apply plugin: 'scala' |
| 138 | + apply plugin: 'com.github.alisiikh.scalastyle' |
| 139 | + apply plugin: 'antlr' |
| 140 | + |
| 141 | + configurations { |
| 142 | + /* |
| 143 | + The Gradle Antlr plugin erroneously adds both antlr-build and runtime dependencies to the runtime path. This |
| 144 | + bug https://github.com/gradle/gradle/issues/820 exists because older versions of Antlr do not have separate |
| 145 | + runtime and implementation dependencies and they do not want to break backwards compatibility. So to only end up with |
| 146 | + the runtime dependency on the runtime classpath we remove the dependencies added by the plugin here. Then add |
| 147 | + the runtime dependency back to only the runtime configuration manually. |
| 148 | + */ |
| 149 | + implementation { |
| 150 | + extendsFrom = extendsFrom.findAll { it != configurations.antlr } |
| 151 | + } |
| 152 | + } |
| 153 | + |
| 154 | + dependencies { |
| 155 | + implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}") |
| 156 | + implementation libs.roaringbitmap |
| 157 | + |
| 158 | + compileOnly "org.scala-lang:scala-library" |
| 159 | + compileOnly project(path: ':iceberg-bundled-guava', configuration: 'shadow') |
| 160 | + compileOnly project(':iceberg-api') |
| 161 | + compileOnly project(':iceberg-core') |
| 162 | + compileOnly project(':iceberg-common') |
| 163 | + compileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") |
| 164 | + compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") { |
| 165 | + exclude group: 'org.apache.avro', module: 'avro' |
| 166 | + exclude group: 'org.apache.arrow' |
| 167 | + exclude group: 'org.apache.parquet' |
| 168 | + // to make sure netty libs only come from project(':iceberg-arrow') |
| 169 | + exclude group: 'io.netty', module: 'netty-buffer' |
| 170 | + exclude group: 'io.netty', module: 'netty-common' |
| 171 | + exclude group: 'org.roaringbitmap' |
| 172 | + } |
| 173 | + compileOnly libs.errorprone.annotations |
| 174 | + |
| 175 | + testImplementation project(path: ':iceberg-data') |
| 176 | + testImplementation project(path: ':iceberg-parquet') |
| 177 | + testImplementation project(path: ':iceberg-hive-metastore') |
| 178 | + testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') |
| 179 | + testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts') |
| 180 | + testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts') |
| 181 | + testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| 182 | + testImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts') |
| 183 | + testImplementation (project(path: ':iceberg-open-api', configuration: 'testFixturesRuntimeElements')) { |
| 184 | + transitive = false |
| 185 | + } |
| 186 | + // runtime dependencies for running REST Catalog based integration test |
| 187 | + testRuntimeOnly libs.jetty.servlet |
| 188 | + testRuntimeOnly libs.sqlite.jdbc |
| 189 | + |
| 190 | + testImplementation libs.avro.avro |
| 191 | + testImplementation libs.parquet.hadoop |
| 192 | + testImplementation libs.awaitility |
| 193 | + testImplementation "org.apache.datafusion:comet-spark-spark3.5_2.13:${libs.versions.comet.get()}" |
| 194 | + testImplementation(testFixtures(project(':iceberg-parquet'))) |
| 195 | + |
| 196 | + // Required because we remove antlr plugin dependencies from the compile configuration, see note above |
| 197 | + runtimeOnly libs.antlr.runtime413 |
| 198 | + antlr libs.antlr.antlr413 |
| 199 | + } |
| 200 | + |
| 201 | + test { |
| 202 | + useJUnitPlatform() |
| 203 | + } |
| 204 | + |
| 205 | + generateGrammarSource { |
| 206 | + maxHeapSize = "64m" |
| 207 | + arguments += ['-visitor', '-package', 'org.apache.spark.sql.catalyst.parser.extensions'] |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}") { |
| 212 | + apply plugin: 'com.gradleup.shadow' |
| 213 | + |
| 214 | + tasks.jar.dependsOn tasks.shadowJar |
| 215 | + |
| 216 | + sourceSets { |
| 217 | + integration { |
| 218 | + java.srcDir "$projectDir/src/integration/java" |
| 219 | + resources.srcDir "$projectDir/src/integration/resources" |
| 220 | + } |
| 221 | + } |
| 222 | + |
| 223 | + configurations { |
| 224 | + implementation { |
| 225 | + exclude group: 'org.apache.spark' |
| 226 | + // included in Spark |
| 227 | + exclude group: 'org.slf4j' |
| 228 | + exclude group: 'org.apache.commons' |
| 229 | + exclude group: 'commons-pool' |
| 230 | + exclude group: 'commons-codec' |
| 231 | + exclude group: 'org.xerial.snappy' |
| 232 | + exclude group: 'javax.xml.bind' |
| 233 | + exclude group: 'javax.annotation' |
| 234 | + exclude group: 'com.github.luben' |
| 235 | + exclude group: 'com.ibm.icu' |
| 236 | + exclude group: 'org.glassfish' |
| 237 | + exclude group: 'org.abego.treelayout' |
| 238 | + exclude group: 'org.antlr' |
| 239 | + exclude group: 'org.scala-lang' |
| 240 | + exclude group: 'org.scala-lang.modules' |
| 241 | + } |
| 242 | + } |
| 243 | + |
| 244 | + dependencies { |
| 245 | + api project(':iceberg-api') |
| 246 | + implementation project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") |
| 247 | + implementation project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}") |
| 248 | + implementation project(':iceberg-aws') |
| 249 | + implementation project(':iceberg-azure') |
| 250 | + implementation(project(':iceberg-aliyun')) { |
| 251 | + exclude group: 'edu.umd.cs.findbugs', module: 'findbugs' |
| 252 | + exclude group: 'org.apache.httpcomponents', module: 'httpclient' |
| 253 | + exclude group: 'commons-logging', module: 'commons-logging' |
| 254 | + } |
| 255 | + implementation project(':iceberg-gcp') |
| 256 | + implementation project(':iceberg-hive-metastore') |
| 257 | + implementation(project(':iceberg-nessie')) { |
| 258 | + exclude group: 'com.google.code.findbugs', module: 'jsr305' |
| 259 | + } |
| 260 | + implementation (project(':iceberg-snowflake')) { |
| 261 | + exclude group: 'net.snowflake' , module: 'snowflake-jdbc' |
| 262 | + } |
| 263 | + |
| 264 | + integrationImplementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}" |
| 265 | + integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}" |
| 266 | + integrationImplementation libs.junit.jupiter |
| 267 | + integrationImplementation libs.junit.platform.launcher |
| 268 | + integrationImplementation libs.slf4j.simple |
| 269 | + integrationImplementation libs.assertj.core |
| 270 | + integrationImplementation project(path: ':iceberg-api', configuration: 'testArtifacts') |
| 271 | + integrationImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts') |
| 272 | + integrationImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts') |
| 273 | + integrationImplementation project(path: ":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts') |
| 274 | + |
| 275 | + // runtime dependencies for running Hive Catalog based integration test |
| 276 | + integrationRuntimeOnly project(':iceberg-hive-metastore') |
| 277 | + // runtime dependencies for running REST Catalog based integration test |
| 278 | + integrationRuntimeOnly project(path: ':iceberg-core', configuration: 'testArtifacts') |
| 279 | + integrationRuntimeOnly (project(path: ':iceberg-open-api', configuration: 'testFixturesRuntimeElements')) { |
| 280 | + transitive = false |
| 281 | + } |
| 282 | + integrationRuntimeOnly libs.jetty.servlet |
| 283 | + integrationRuntimeOnly libs.sqlite.jdbc |
| 284 | + |
| 285 | + // Not allowed on our classpath, only the runtime jar is allowed |
| 286 | + integrationCompileOnly project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}") |
| 287 | + integrationCompileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") |
| 288 | + integrationCompileOnly project(':iceberg-api') |
| 289 | + } |
| 290 | + |
| 291 | + shadowJar { |
| 292 | + configurations = [project.configurations.runtimeClasspath] |
| 293 | + |
| 294 | + zip64 true |
| 295 | + |
| 296 | + // include the LICENSE and NOTICE files for the shaded Jar |
| 297 | + from(projectDir) { |
| 298 | + include 'LICENSE' |
| 299 | + include 'NOTICE' |
| 300 | + } |
| 301 | + |
| 302 | + // Relocate dependencies to avoid conflicts |
| 303 | + relocate 'com.google.errorprone', 'org.apache.iceberg.shaded.com.google.errorprone' |
| 304 | + relocate 'com.google.flatbuffers', 'org.apache.iceberg.shaded.com.google.flatbuffers' |
| 305 | + relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml' |
| 306 | + relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes' |
| 307 | + relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework' |
| 308 | + relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro' |
| 309 | + relocate 'avro.shaded', 'org.apache.iceberg.shaded.org.apache.avro.shaded' |
| 310 | + relocate 'com.thoughtworks.paranamer', 'org.apache.iceberg.shaded.com.thoughtworks.paranamer' |
| 311 | + relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet' |
| 312 | + relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded' |
| 313 | + relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc' |
| 314 | + relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift' |
| 315 | + relocate 'org.apache.hc.client5', 'org.apache.iceberg.shaded.org.apache.hc.client5' |
| 316 | + relocate 'org.apache.hc.core5', 'org.apache.iceberg.shaded.org.apache.hc.core5' |
| 317 | + // relocate Arrow and related deps to shade Iceberg specific version |
| 318 | + relocate 'io.netty', 'org.apache.iceberg.shaded.io.netty' |
| 319 | + relocate 'org.apache.arrow', 'org.apache.iceberg.shaded.org.apache.arrow' |
| 320 | + relocate 'com.carrotsearch', 'org.apache.iceberg.shaded.com.carrotsearch' |
| 321 | + relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra' |
| 322 | + relocate 'org.roaringbitmap', 'org.apache.iceberg.shaded.org.roaringbitmap' |
| 323 | + relocate 'org.apache.datasketches', 'org.apache.iceberg.shaded.org.apache.datasketches' |
| 324 | + |
| 325 | + archiveClassifier.set(null) |
| 326 | + } |
| 327 | + |
| 328 | + task integrationTest(type: Test) { |
| 329 | + useJUnitPlatform() |
| 330 | + description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}" |
| 331 | + group = "verification" |
| 332 | + jvmArgs += project.property('extraJvmArgs') |
| 333 | + testClassesDirs = sourceSets.integration.output.classesDirs |
| 334 | + classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path) |
| 335 | + inputs.file(shadowJar.archiveFile.get().asFile.path) |
| 336 | + } |
| 337 | + integrationTest.dependsOn shadowJar |
| 338 | + check.dependsOn integrationTest |
| 339 | + |
| 340 | + jar { |
| 341 | + enabled = false |
| 342 | + } |
| 343 | +} |
| 344 | + |
0 commit comments