Skip to content

Commit 7df730b

Browse files
committed
Spark: Initial support for 4.1-preview2
1 parent 30ed308 commit 7df730b

File tree

14 files changed

+67
-10
lines changed

14 files changed

+67
-10
lines changed

.github/workflows/spark-ci.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,21 @@ jobs:
7272
strategy:
7373
matrix:
7474
jvm: [11, 17, 21]
75-
spark: ['3.4', '3.5', '4.0']
75+
spark: ['3.4', '3.5', '4.0', '4.1']
7676
scala: ['2.12', '2.13']
7777
exclude:
7878
# Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369)
7979
# Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831)
8080
- jvm: 11
8181
spark: '4.0'
82+
- jvm: 11
83+
spark: '4.1'
8284
- jvm: 21
8385
spark: '3.4'
8486
- spark: '4.0'
8587
scala: '2.12'
88+
- spark: '4.1'
89+
scala: '2.12'
8690
env:
8791
SPARK_LOCAL_IP: localhost
8892
steps:

build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,9 @@ allprojects {
118118
group = "org.apache.iceberg"
119119
version = projectVersion
120120
repositories {
121+
maven {
122+
url "https://repository.apache.org/content/repositories/orgapachespark-1503/"
123+
}
121124
mavenCentral()
122125
mavenLocal()
123126
}

dev/stage-binaries.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
SCALA_VERSION=2.12
2222
FLINK_VERSIONS=1.20,2.0,2.1
23-
SPARK_VERSIONS=3.4,3.5,4.0
23+
SPARK_VERSIONS=3.4,3.5,4.0,4.1
2424
KAFKA_VERSIONS=3
2525

2626
./gradlew -Prelease -DscalaVersion=$SCALA_VERSION -DflinkVersions=$FLINK_VERSIONS -DsparkVersions=$SPARK_VERSIONS -DkafkaVersions=$KAFKA_VERSIONS publishApachePublicationToMavenRepository --no-parallel --no-configuration-cache

gradle.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ jmhJsonOutputPath=build/reports/jmh/results.json
1818
jmhIncludeRegex=.*
1919
systemProp.defaultFlinkVersions=2.1
2020
systemProp.knownFlinkVersions=1.20,2.0,2.1
21-
systemProp.defaultSparkVersions=4.0
22-
systemProp.knownSparkVersions=3.4,3.5,4.0
21+
systemProp.defaultSparkVersions=4.1
22+
systemProp.knownSparkVersions=3.4,3.5,4.0,4.1
2323
systemProp.defaultKafkaVersions=3
2424
systemProp.knownKafkaVersions=3
2525
systemProp.defaultScalaVersion=2.12

gradle/libs.versions.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ snowflake-jdbc = "3.26.1"
8585
spark34 = "3.4.4"
8686
spark35 = "3.5.6"
8787
spark40 = "4.0.1"
88+
spark41 = "4.1.0-preview2"
8889
sqlite-jdbc = "3.50.3.0"
8990
testcontainers = "1.21.3"
9091
tez08 = { strictly = "0.8.4"} # see rich version usage explanation above
@@ -154,6 +155,7 @@ jackson214-bom = { module = "com.fasterxml.jackson:jackson-bom", version.ref =
154155
jackson215-bom = { module = "com.fasterxml.jackson:jackson-bom", version.ref = "jackson215" }
155156
jaxb-api = { module = "javax.xml.bind:jaxb-api", version.ref = "jaxb-api" }
156157
jaxb-runtime = { module = "org.glassfish.jaxb:jaxb-runtime", version.ref = "jaxb-runtime" }
158+
jetbrain-annotations = { module = "org.jetbrains:annotations", version = "17.0.0" }
157159
kafka-clients = { module = "org.apache.kafka:kafka-clients", version.ref = "kafka" }
158160
kafka-connect-api = { module = "org.apache.kafka:connect-api", version.ref = "kafka" }
159161
kafka-connect-json = { module = "org.apache.kafka:connect-json", version.ref = "kafka" }

jmh.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ if (sparkVersions.contains("4.0")) {
5353
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-4.0_2.13"))
5454
}
5555

56+
if (sparkVersions.contains("4.1")) {
57+
jmhProjects.add(project(":iceberg-spark:iceberg-spark-4.1_2.13"))
58+
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-4.1_2.13"))
59+
}
60+
5661
configure(jmhProjects) {
5762
apply plugin: 'me.champeau.jmh'
5863
apply plugin: 'io.morethan.jmhreport'

settings.gradle

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,18 @@ if (sparkVersions.contains("4.0")) {
175175
project(":iceberg-spark:spark-runtime-4.0_2.13").name = "iceberg-spark-runtime-4.0_2.13"
176176
}
177177

178+
if (sparkVersions.contains("4.1")) {
179+
include ":iceberg-spark:spark-4.1_2.13"
180+
include ":iceberg-spark:spark-extensions-4.1_2.13"
181+
include ":iceberg-spark:spark-runtime-4.1_2.13"
182+
project(":iceberg-spark:spark-4.1_2.13").projectDir = file('spark/v4.1/spark')
183+
project(":iceberg-spark:spark-4.1_2.13").name = "iceberg-spark-4.1_2.13"
184+
project(":iceberg-spark:spark-extensions-4.1_2.13").projectDir = file('spark/v4.1/spark-extensions')
185+
project(":iceberg-spark:spark-extensions-4.1_2.13").name = "iceberg-spark-extensions-4.1_2.13"
186+
project(":iceberg-spark:spark-runtime-4.1_2.13").projectDir = file('spark/v4.1/spark-runtime')
187+
project(":iceberg-spark:spark-runtime-4.1_2.13").name = "iceberg-spark-runtime-4.1_2.13"
188+
}
189+
178190
if (kafkaVersions.contains("3")) {
179191
include 'kafka-connect'
180192
project(':kafka-connect').name = 'iceberg-kafka-connect'

spark/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,7 @@ if (sparkVersions.contains("3.5")) {
3131
if (sparkVersions.contains("4.0")) {
3232
apply from: file("$projectDir/v4.0/build.gradle")
3333
}
34+
35+
if (sparkVersions.contains("4.1")) {
36+
apply from: file("$projectDir/v4.1/build.gradle")
37+
}

spark/v4.1/build.gradle

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,23 @@
1717
* under the License.
1818
*/
1919

20-
String sparkMajorVersion = '4.0'
20+
String sparkMajorVersion = '4.1'
2121
String scalaVersion = '2.13'
2222

2323
JavaVersion javaVersion = JavaVersion.current()
2424
if (javaVersion != JavaVersion.VERSION_17 && javaVersion != JavaVersion.VERSION_21) {
25-
throw new GradleException("Spark 4.0 build requires JDK 17 or 21 but was executed with JDK " + javaVersion)
25+
throw new GradleException("Spark 4.1 build requires JDK 17 or 21 but was executed with JDK " + javaVersion)
26+
}
27+
28+
// Set target to JDK17 for Spark 4.1 to fix following error
29+
// "spark/v4.1/spark/src/main/scala/org/apache/spark/sql/stats/ThetaSketchAgg.scala:52:12: Class java.lang.Record not found"
30+
plugins.withType(ScalaPlugin.class) {
31+
tasks.withType(ScalaCompile.class) {
32+
scalaCompileOptions.keepAliveMode.set(KeepAliveMode.DAEMON)
33+
sourceCompatibility = "17"
34+
targetCompatibility = "17"
35+
scalaCompileOptions.additionalParameters.add("-release:17")
36+
}
2637
}
2738

2839
def sparkProjects = [
@@ -69,8 +80,9 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
6980
implementation("org.apache.datasketches:datasketches-java:${libs.versions.datasketches.get()}")
7081

7182
compileOnly libs.errorprone.annotations
83+
compileOnly libs.jetbrain.annotations
7284
compileOnly libs.avro.avro
73-
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") {
85+
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark41.get()}") {
7486
exclude group: 'org.apache.avro', module: 'avro'
7587
exclude group: 'org.apache.arrow'
7688
exclude group: 'org.apache.parquet'

spark/v4.1/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import org.antlr.v4.runtime.tree.TerminalNodeImpl
2828
import org.apache.iceberg.common.DynConstructors
2929
import org.apache.iceberg.spark.ExtendedParser
3030
import org.apache.iceberg.spark.ExtendedParser.RawOrderField
31-
import org.apache.iceberg.spark.procedures.SparkProcedures
3231
import org.apache.spark.sql.AnalysisException
3332
import org.apache.spark.sql.SparkSession
3433
import org.apache.spark.sql.catalyst.FunctionIdentifier

0 commit comments

Comments
 (0)