Skip to content

Commit 7c1ebb6

Browse files
committed
Spark: Initial support for 4.1-preview1
1 parent 9b9092a commit 7c1ebb6

File tree

11 files changed

+49
-8
lines changed

11 files changed

+49
-8
lines changed

.github/workflows/spark-ci.yml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,17 +72,21 @@ jobs:
7272
strategy:
7373
matrix:
7474
jvm: [11, 17, 21]
75-
spark: ['3.4', '3.5', '4.0']
75+
spark: ['3.4', '3.5', '4.0', '4.1']
7676
scala: ['2.12', '2.13']
7777
exclude:
7878
# Spark 3.5 is the first version not failing on Java 21 (https://issues.apache.org/jira/browse/SPARK-42369)
7979
# Full Java 21 support is coming in Spark 4 (https://issues.apache.org/jira/browse/SPARK-43831)
8080
- jvm: 11
8181
spark: '4.0'
82+
- jvm: 11
83+
spark: '4.1'
8284
- jvm: 21
8385
spark: '3.4'
8486
- spark: '4.0'
8587
scala: '2.12'
88+
- spark: '4.1'
89+
scala: '2.12'
8690
env:
8791
SPARK_LOCAL_IP: localhost
8892
steps:

dev/stage-binaries.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
SCALA_VERSION=2.12
2222
FLINK_VERSIONS=1.19,1.20,2.0
23-
SPARK_VERSIONS=3.4,3.5,4.0
23+
SPARK_VERSIONS=3.4,3.5,4.0,4.1
2424
KAFKA_VERSIONS=3
2525

2626
./gradlew -Prelease -DscalaVersion=$SCALA_VERSION -DflinkVersions=$FLINK_VERSIONS -DsparkVersions=$SPARK_VERSIONS -DkafkaVersions=$KAFKA_VERSIONS publishApachePublicationToMavenRepository --no-parallel --no-configuration-cache

gradle.properties

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ jmhJsonOutputPath=build/reports/jmh/results.json
1818
jmhIncludeRegex=.*
1919
systemProp.defaultFlinkVersions=2.1
2020
systemProp.knownFlinkVersions=1.20,2.0,2.1
21-
systemProp.defaultSparkVersions=4.0
22-
systemProp.knownSparkVersions=3.4,3.5,4.0
21+
systemProp.defaultSparkVersions=4.1
22+
systemProp.knownSparkVersions=3.4,3.5,4.0,4.1
2323
systemProp.defaultKafkaVersions=3
2424
systemProp.knownKafkaVersions=3
2525
systemProp.defaultScalaVersion=2.12

gradle/libs.versions.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ snowflake-jdbc = "3.26.1"
8585
spark34 = "3.4.4"
8686
spark35 = "3.5.6"
8787
spark40 = "4.0.1"
88+
spark41 = "4.1.0-preview1"
8889
sqlite-jdbc = "3.50.3.0"
8990
testcontainers = "1.21.3"
9091
tez08 = { strictly = "0.8.4"} # see rich version usage explanation above

jmh.gradle

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ if (sparkVersions.contains("4.0")) {
5353
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-4.0_2.13"))
5454
}
5555

56+
if (sparkVersions.contains("4.1")) {
57+
jmhProjects.add(project(":iceberg-spark:iceberg-spark-4.1_2.13"))
58+
jmhProjects.add(project(":iceberg-spark:iceberg-spark-extensions-4.1_2.13"))
59+
}
60+
5661
configure(jmhProjects) {
5762
apply plugin: 'me.champeau.jmh'
5863
apply plugin: 'io.morethan.jmhreport'

settings.gradle

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,18 @@ if (sparkVersions.contains("4.0")) {
175175
project(":iceberg-spark:spark-runtime-4.0_2.13").name = "iceberg-spark-runtime-4.0_2.13"
176176
}
177177

178+
if (sparkVersions.contains("4.1")) {
179+
include ":iceberg-spark:spark-4.1_2.13"
180+
include ":iceberg-spark:spark-extensions-4.1_2.13"
181+
include ":iceberg-spark:spark-runtime-4.1_2.13"
182+
project(":iceberg-spark:spark-4.1_2.13").projectDir = file('spark/v4.1/spark')
183+
project(":iceberg-spark:spark-4.1_2.13").name = "iceberg-spark-4.1_2.13"
184+
project(":iceberg-spark:spark-extensions-4.1_2.13").projectDir = file('spark/v4.1/spark-extensions')
185+
project(":iceberg-spark:spark-extensions-4.1_2.13").name = "iceberg-spark-extensions-4.1_2.13"
186+
project(":iceberg-spark:spark-runtime-4.1_2.13").projectDir = file('spark/v4.1/spark-runtime')
187+
project(":iceberg-spark:spark-runtime-4.1_2.13").name = "iceberg-spark-runtime-4.1_2.13"
188+
}
189+
178190
if (kafkaVersions.contains("3")) {
179191
include 'kafka-connect'
180192
project(':kafka-connect').name = 'iceberg-kafka-connect'

spark/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,7 @@ if (sparkVersions.contains("3.5")) {
3131
if (sparkVersions.contains("4.0")) {
3232
apply from: file("$projectDir/v4.0/build.gradle")
3333
}
34+
35+
if (sparkVersions.contains("4.1")) {
36+
apply from: file("$projectDir/v4.1/build.gradle")
37+
}

spark/v4.1/build.gradle

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717
* under the License.
1818
*/
1919

20-
String sparkMajorVersion = '4.0'
20+
String sparkMajorVersion = '4.1'
2121
String scalaVersion = '2.13'
2222

2323
JavaVersion javaVersion = JavaVersion.current()
2424
if (javaVersion != JavaVersion.VERSION_17 && javaVersion != JavaVersion.VERSION_21) {
25-
throw new GradleException("Spark 4.0 build requires JDK 17 or 21 but was executed with JDK " + javaVersion)
25+
throw new GradleException("Spark 4.1 build requires JDK 17 or 21 but was executed with JDK " + javaVersion)
2626
}
2727

2828
def sparkProjects = [
@@ -70,7 +70,7 @@ project(":iceberg-spark:iceberg-spark-${sparkMajorVersion}_${scalaVersion}") {
7070

7171
compileOnly libs.errorprone.annotations
7272
compileOnly libs.avro.avro
73-
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark40.get()}") {
73+
compileOnly("org.apache.spark:spark-hive_${scalaVersion}:${libs.versions.spark41.get()}") {
7474
exclude group: 'org.apache.avro', module: 'avro'
7575
exclude group: 'org.apache.arrow'
7676
exclude group: 'org.apache.parquet'

spark/v4.1/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/parser/extensions/IcebergSparkSqlExtensionsParser.scala

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import org.antlr.v4.runtime.tree.TerminalNodeImpl
2828
import org.apache.iceberg.common.DynConstructors
2929
import org.apache.iceberg.spark.ExtendedParser
3030
import org.apache.iceberg.spark.ExtendedParser.RawOrderField
31-
import org.apache.iceberg.spark.procedures.SparkProcedures
3231
import org.apache.spark.sql.AnalysisException
3332
import org.apache.spark.sql.SparkSession
3433
import org.apache.spark.sql.catalyst.FunctionIdentifier

spark/v4.1/spark/src/main/java/org/apache/iceberg/spark/BaseCatalog.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,17 @@ public UnboundProcedure loadProcedure(Identifier ident) {
6060
throw new RuntimeException("Procedure " + ident + " not found");
6161
}
6262

63+
@Override
64+
public Identifier[] listProcedures(String[] namespace) {
65+
if (isSystemNamespace(namespace)) {
66+
return SparkProcedures.names().stream()
67+
.map(name -> Identifier.of(namespace, name))
68+
.toArray(Identifier[]::new);
69+
} else {
70+
return new Identifier[0];
71+
}
72+
}
73+
6374
@Override
6475
public boolean isFunctionNamespace(String[] namespace) {
6576
// Allow for empty namespace, as Spark's storage partitioned joins look up

0 commit comments

Comments
 (0)