Skip to content

Commit 30ed308

Browse files
committed
Spark: Copy back 4.1 as 4.0
1 parent 35a9a27 commit 30ed308

File tree

563 files changed

+137168
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

563 files changed

+137168
-0
lines changed

spark/v4.0/build.gradle

Lines changed: 344 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,344 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
String sparkMajorVersion = '4.0'
21+
String scalaVersion = '2.13'
22+
23+
JavaVersion javaVersion = JavaVersion.current()
24+
if (javaVersion != JavaVersion.VERSION_17 && javaVersion != JavaVersion.VERSION_21) {
25+
throw new GradleException("Spark 4.0 build requires JDK 17 or 21 but was executed with JDK " + javaVersion)
26+
}
27+
28+
def sparkProjects = [
29+
project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}"),
30+
project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}"),
31+
project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}"),
32+
]
33+
34+
configure(sparkProjects) {
35+
configurations {
36+
all {
37+
resolutionStrategy {
38+
force "com.fasterxml.jackson.module:jackson-module-scala_${scalaVersion}:${libs.versions.jackson215.get()}"
39+
force "com.fasterxml.jackson.core:jackson-databind:${libs.versions.jackson215.get()}"
40+
force "com.fasterxml.jackson.core:jackson-core:${libs.versions.jackson215.get()}"
41+
}
42+
}
43+
}
44+
}
45+
46+
project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
47+
apply plugin: 'scala'
48+
apply plugin: 'com.github.alisiikh.scalastyle'
49+
50+
sourceSets {
51+
main {
52+
scala.srcDirs = ['src/main/scala', 'src/main/java']
53+
java.srcDirs = []
54+
}
55+
}
56+
57+
dependencies {
58+
implementation project(path: ':iceberg-bundled-guava', configuration: 'shadow')
59+
api project(':iceberg-api')
60+
annotationProcessor libs.immutables.value
61+
compileOnly libs.immutables.value
62+
implementation project(':iceberg-common')
63+
implementation project(':iceberg-core')
64+
implementation project(':iceberg-data')
65+
implementation project(':iceberg-orc')
66+
implementation project(':iceberg-parquet')
67+
implementation project(':iceberg-arrow')
68+
implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}")
69+
implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}")
70+
71+
compileOnly libs.errorprone.annotations
72+
compileOnly libs.avro.avro
73+
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") {
74+
exclude group: 'org.apache.avro', module: 'avro'
75+
exclude group: 'org.apache.arrow'
76+
exclude group: 'org.apache.parquet'
77+
// to make sure netty libs only come from project(':iceberg-arrow')
78+
exclude group: 'io.netty', module: 'netty-buffer'
79+
exclude group: 'io.netty', module: 'netty-common'
80+
exclude group: 'org.roaringbitmap'
81+
}
82+
83+
compileOnly "org.apache.datafusion:comet-spark-spark3.5_2.13:${libs.versions.comet.get()}"
84+
85+
implementation libs.parquet.column
86+
implementation libs.parquet.hadoop
87+
88+
implementation("${libs.orc.core.get().module}:${libs.versions.orc.get()}:nohive") {
89+
exclude group: 'org.apache.hadoop'
90+
exclude group: 'commons-lang'
91+
// These artifacts are shaded and included in the orc-core fat jar
92+
exclude group: 'com.google.protobuf', module: 'protobuf-java'
93+
exclude group: 'org.apache.hive', module: 'hive-storage-api'
94+
}
95+
96+
implementation(libs.arrow.vector) {
97+
exclude group: 'io.netty', module: 'netty-buffer'
98+
exclude group: 'io.netty', module: 'netty-common'
99+
exclude group: 'com.google.code.findbugs', module: 'jsr305'
100+
}
101+
102+
implementation libs.caffeine
103+
104+
testImplementation(libs.hadoop3.minicluster) {
105+
exclude group: 'org.apache.avro', module: 'avro'
106+
// to make sure netty libs only come from project(':iceberg-arrow')
107+
exclude group: 'io.netty', module: 'netty-buffer'
108+
exclude group: 'io.netty', module: 'netty-common'
109+
}
110+
testImplementation project(path: ':iceberg-hive-metastore')
111+
testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
112+
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
113+
testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
114+
testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts')
115+
testImplementation (project(path: ':iceberg-open-api', configuration: 'testFixturesRuntimeElements')) {
116+
transitive = false
117+
}
118+
testImplementation libs.sqlite.jdbc
119+
testImplementation libs.awaitility
120+
testImplementation(testFixtures(project(':iceberg-parquet')))
121+
// runtime dependencies for running REST Catalog based integration test
122+
testRuntimeOnly libs.jetty.servlet
123+
}
124+
125+
test {
126+
useJUnitPlatform()
127+
}
128+
129+
tasks.withType(Test) {
130+
// Vectorized reads need more memory
131+
maxHeapSize '3160m'
132+
}
133+
}
134+
135+
project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}") {
136+
apply plugin: 'java-library'
137+
apply plugin: 'scala'
138+
apply plugin: 'com.github.alisiikh.scalastyle'
139+
apply plugin: 'antlr'
140+
141+
configurations {
142+
/*
143+
The Gradle Antlr plugin erroneously adds both antlr-build and runtime dependencies to the runtime path. This
144+
bug https://github.com/gradle/gradle/issues/820 exists because older versions of Antlr do not have separate
145+
runtime and implementation dependencies and they do not want to break backwards compatibility. So to only end up with
146+
the runtime dependency on the runtime classpath we remove the dependencies added by the plugin here. Then add
147+
the runtime dependency back to only the runtime configuration manually.
148+
*/
149+
implementation {
150+
extendsFrom = extendsFrom.findAll { it != configurations.antlr }
151+
}
152+
}
153+
154+
dependencies {
155+
implementation("org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}")
156+
implementation libs.roaringbitmap
157+
158+
compileOnly "org.scala-lang:scala-library"
159+
compileOnly project(path: ':iceberg-bundled-guava', configuration: 'shadow')
160+
compileOnly project(':iceberg-api')
161+
compileOnly project(':iceberg-core')
162+
compileOnly project(':iceberg-common')
163+
compileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
164+
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") {
165+
exclude group: 'org.apache.avro', module: 'avro'
166+
exclude group: 'org.apache.arrow'
167+
exclude group: 'org.apache.parquet'
168+
// to make sure netty libs only come from project(':iceberg-arrow')
169+
exclude group: 'io.netty', module: 'netty-buffer'
170+
exclude group: 'io.netty', module: 'netty-common'
171+
exclude group: 'org.roaringbitmap'
172+
}
173+
compileOnly libs.errorprone.annotations
174+
175+
testImplementation project(path: ':iceberg-data')
176+
testImplementation project(path: ':iceberg-parquet')
177+
testImplementation project(path: ':iceberg-hive-metastore')
178+
testImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
179+
testImplementation project(path: ':iceberg-core', configuration: 'testArtifacts')
180+
testImplementation project(path: ':iceberg-data', configuration: 'testArtifacts')
181+
testImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
182+
testImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts')
183+
testImplementation (project(path: ':iceberg-open-api', configuration: 'testFixturesRuntimeElements')) {
184+
transitive = false
185+
}
186+
// runtime dependencies for running REST Catalog based integration test
187+
testRuntimeOnly libs.jetty.servlet
188+
testRuntimeOnly libs.sqlite.jdbc
189+
190+
testImplementation libs.avro.avro
191+
testImplementation libs.parquet.hadoop
192+
testImplementation libs.awaitility
193+
testImplementation "org.apache.datafusion:comet-spark-spark3.5_2.13:${libs.versions.comet.get()}"
194+
testImplementation(testFixtures(project(':iceberg-parquet')))
195+
196+
// Required because we remove antlr plugin dependencies from the compile configuration, see note above
197+
runtimeOnly libs.antlr.runtime413
198+
antlr libs.antlr.antlr413
199+
}
200+
201+
test {
202+
useJUnitPlatform()
203+
}
204+
205+
generateGrammarSource {
206+
maxHeapSize = "64m"
207+
arguments += ['-visitor', '-package', 'org.apache.spark.sql.catalyst.parser.extensions']
208+
}
209+
}
210+
211+
project(":iceberg-spark:iceberg-spark-runtime-${sparkMajorVersion}_${scalaVersion}") {
212+
apply plugin: 'com.gradleup.shadow'
213+
214+
tasks.jar.dependsOn tasks.shadowJar
215+
216+
sourceSets {
217+
integration {
218+
java.srcDir "$projectDir/src/integration/java"
219+
resources.srcDir "$projectDir/src/integration/resources"
220+
}
221+
}
222+
223+
configurations {
224+
implementation {
225+
exclude group: 'org.apache.spark'
226+
// included in Spark
227+
exclude group: 'org.slf4j'
228+
exclude group: 'org.apache.commons'
229+
exclude group: 'commons-pool'
230+
exclude group: 'commons-codec'
231+
exclude group: 'org.xerial.snappy'
232+
exclude group: 'javax.xml.bind'
233+
exclude group: 'javax.annotation'
234+
exclude group: 'com.github.luben'
235+
exclude group: 'com.ibm.icu'
236+
exclude group: 'org.glassfish'
237+
exclude group: 'org.abego.treelayout'
238+
exclude group: 'org.antlr'
239+
exclude group: 'org.scala-lang'
240+
exclude group: 'org.scala-lang.modules'
241+
}
242+
}
243+
244+
dependencies {
245+
api project(':iceberg-api')
246+
implementation project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
247+
implementation project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}")
248+
implementation project(':iceberg-aws')
249+
implementation project(':iceberg-azure')
250+
implementation(project(':iceberg-aliyun')) {
251+
exclude group: 'edu.umd.cs.findbugs', module: 'findbugs'
252+
exclude group: 'org.apache.httpcomponents', module: 'httpclient'
253+
exclude group: 'commons-logging', module: 'commons-logging'
254+
}
255+
implementation project(':iceberg-gcp')
256+
implementation project(':iceberg-hive-metastore')
257+
implementation(project(':iceberg-nessie')) {
258+
exclude group: 'com.google.code.findbugs', module: 'jsr305'
259+
}
260+
implementation (project(':iceberg-snowflake')) {
261+
exclude group: 'net.snowflake' , module: 'snowflake-jdbc'
262+
}
263+
264+
integrationImplementation "org.scala-lang.modules:scala-collection-compat_${scalaVersion}:${libs.versions.scala.collection.compat.get()}"
265+
integrationImplementation "org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}"
266+
integrationImplementation libs.junit.jupiter
267+
integrationImplementation libs.junit.platform.launcher
268+
integrationImplementation libs.slf4j.simple
269+
integrationImplementation libs.assertj.core
270+
integrationImplementation project(path: ':iceberg-api', configuration: 'testArtifacts')
271+
integrationImplementation project(path: ':iceberg-hive-metastore', configuration: 'testArtifacts')
272+
integrationImplementation project(path: ":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts')
273+
integrationImplementation project(path: ":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}", configuration: 'testArtifacts')
274+
275+
// runtime dependencies for running Hive Catalog based integration test
276+
integrationRuntimeOnly project(':iceberg-hive-metastore')
277+
// runtime dependencies for running REST Catalog based integration test
278+
integrationRuntimeOnly project(path: ':iceberg-core', configuration: 'testArtifacts')
279+
integrationRuntimeOnly (project(path: ':iceberg-open-api', configuration: 'testFixturesRuntimeElements')) {
280+
transitive = false
281+
}
282+
integrationRuntimeOnly libs.jetty.servlet
283+
integrationRuntimeOnly libs.sqlite.jdbc
284+
285+
// Not allowed on our classpath, only the runtime jar is allowed
286+
integrationCompileOnly project(":iceberg-spark:iceberg-spark-extensions-${sparkMajorVersion}_${scalaVersion}")
287+
integrationCompileOnly project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}")
288+
integrationCompileOnly project(':iceberg-api')
289+
}
290+
291+
shadowJar {
292+
configurations = [project.configurations.runtimeClasspath]
293+
294+
zip64 true
295+
296+
// include the LICENSE and NOTICE files for the shaded Jar
297+
from(projectDir) {
298+
include 'LICENSE'
299+
include 'NOTICE'
300+
}
301+
302+
// Relocate dependencies to avoid conflicts
303+
relocate 'com.google.errorprone', 'org.apache.iceberg.shaded.com.google.errorprone'
304+
relocate 'com.google.flatbuffers', 'org.apache.iceberg.shaded.com.google.flatbuffers'
305+
relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml'
306+
relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes'
307+
relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework'
308+
relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro'
309+
relocate 'avro.shaded', 'org.apache.iceberg.shaded.org.apache.avro.shaded'
310+
relocate 'com.thoughtworks.paranamer', 'org.apache.iceberg.shaded.com.thoughtworks.paranamer'
311+
relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet'
312+
relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded'
313+
relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc'
314+
relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift'
315+
relocate 'org.apache.hc.client5', 'org.apache.iceberg.shaded.org.apache.hc.client5'
316+
relocate 'org.apache.hc.core5', 'org.apache.iceberg.shaded.org.apache.hc.core5'
317+
// relocate Arrow and related deps to shade Iceberg specific version
318+
relocate 'io.netty', 'org.apache.iceberg.shaded.io.netty'
319+
relocate 'org.apache.arrow', 'org.apache.iceberg.shaded.org.apache.arrow'
320+
relocate 'com.carrotsearch', 'org.apache.iceberg.shaded.com.carrotsearch'
321+
relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra'
322+
relocate 'org.roaringbitmap', 'org.apache.iceberg.shaded.org.roaringbitmap'
323+
relocate 'org.apache.datasketches', 'org.apache.iceberg.shaded.org.apache.datasketches'
324+
325+
archiveClassifier.set(null)
326+
}
327+
328+
task integrationTest(type: Test) {
329+
useJUnitPlatform()
330+
description = "Test Spark3 Runtime Jar against Spark ${sparkMajorVersion}"
331+
group = "verification"
332+
jvmArgs += project.property('extraJvmArgs')
333+
testClassesDirs = sourceSets.integration.output.classesDirs
334+
classpath = sourceSets.integration.runtimeClasspath + files(shadowJar.archiveFile.get().asFile.path)
335+
inputs.file(shadowJar.archiveFile.get().asFile.path)
336+
}
337+
integrationTest.dependsOn shadowJar
338+
check.dependsOn integrationTest
339+
340+
jar {
341+
enabled = false
342+
}
343+
}
344+

0 commit comments

Comments
 (0)