Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,32 @@ RUN curl -sL https://deb.nodesource.com/setup_0.12 | bash - && \
npm install -g bower

# for Apache Spark demos
ENV APACHE_SPARK_VERSION 3.4.4
ENV APACHE_SPARK_VERSION 3.5.7
ARG SCALA_VERSION=2.12

RUN apt-get -y update && \
apt-get -y install software-properties-common

RUN \
echo "===> add webupd8 repository..." && \
echo "deb http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee /etc/apt/sources.list.d/webupd8team-java.list && \
echo "deb-src http://ppa.launchpad.net/webupd8team/java/ubuntu trusty main" | tee -a /etc/apt/sources.list.d/webupd8team-java.list && \
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys EEA14886 && \
apt-get update

RUN echo "===> install Java" && \
echo debconf shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
echo debconf shared/accepted-oracle-license-v1-1 seen true | debconf-set-selections && \
DEBIAN_FRONTEND=noninteractive apt-get install -y --force-yes oracle-java8-installer oracle-java8-set-default && \
apt-get clean && \
update-java-alternatives -s java-8-oracle

RUN cd /tmp && \
if [ "$SCALA_VERSION" = "2.13" ]; then APACHE_SPARK_CUSTOM_NAME=hadoop3-scala2.13; else APACHE_SPARK_CUSTOM_NAME=hadoop3; fi && \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
ln -snf /usr/local/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} /usr/local/spark
RUN JAVA_8=`update-alternatives --list java | grep java-1.8.0-openjdk` || echo $JAVA_8 && \
if [ "x$JAVA_8" = "x" ]; then \
apt-get -y update ; \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java ; \
apt-get clean ; \
rm -rf /var/lib/apt/lists/* ; \
update-ca-certificates -f ; \
JAVA_8=`update-java-alternatives --list | grep java-1.8.0-openjdk | awk '{print $NF}'` ; \
update-java-alternatives --set $JAVA_8 ; \
fi

RUN if [ "$SCALA_VERSION" = "2.13" ]; then APACHE_SPARK_CUSTOM_NAME=hadoop3-scala2.13; else APACHE_SPARK_CUSTOM_NAME=hadoop3; fi && \
SPARK_TGZ_NAME=spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} && \
if [ ! -d "/usr/local/$SPARK_TGZ_NAME" ]; then \
cd /tmp ; \
wget -q https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/${SPARK_TGZ_NAME}.tgz?action=download -O ${SPARK_TGZ_NAME}.tgz ; \
tar -xzf ${SPARK_TGZ_NAME}.tgz -C /usr/local ; \
rm ${SPARK_TGZ_NAME}.tgz ; \
ln -snf /usr/local/$SPARK_TGZ_NAME /usr/local/spark ; \
fi

# R support
RUN apt-get update && \
Expand All @@ -62,7 +62,7 @@ RUN apt-get update && \
rm -rf /var/lib/apt/lists/*

ENV SPARK_HOME /usr/local/spark
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.5-src.zip
ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.9.7-src.zip
ENV PYSPARK_PYTHON /home/main/anaconda2/envs/python3/bin/python
ENV R_LIBS_USER $SPARK_HOME/R/lib

Expand Down
34 changes: 20 additions & 14 deletions Dockerfile.toree-dev
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,30 @@ FROM jupyter/all-spark-notebook
USER root

# Spark dependencies
ARG APACHE_SPARK_VERSION=3.4.4
ARG APACHE_SPARK_VERSION=3.5.7
ARG SCALA_VERSION=2.12

RUN apt-get -y update && \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* && \
update-ca-certificates -f && \
JAVA_8=`update-alternatives --list java | grep java-8-openjdk` && \
update-alternatives --set java $JAVA_8
RUN JAVA_8=`update-alternatives --list java | grep java-1.8.0-openjdk` || echo $JAVA_8 && \
if [ "x$JAVA_8" = "x" ]; then \
apt-get -y update ; \
apt-get install -y --no-install-recommends openjdk-8-jdk ca-certificates-java ; \
apt-get clean ; \
rm -rf /var/lib/apt/lists/* ; \
update-ca-certificates -f ; \
JAVA_8=`update-java-alternatives --list | grep java-1.8.0-openjdk | awk '{print $NF}'` ; \
update-java-alternatives --set $JAVA_8 ; \
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use update-java-alternatives, so that other JVM commands like javac, jstack also get updated

fi

# Installing Spark3
RUN cd /tmp && \
if [ "$SCALA_VERSION" = "2.13" ]; then APACHE_SPARK_CUSTOM_NAME=hadoop3-scala2.13; else APACHE_SPARK_CUSTOM_NAME=hadoop3; fi && \
wget -q https://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
tar xzf spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz -C /usr/local && \
rm spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME}.tgz && \
ln -snf /usr/local/spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} /usr/local/spark
RUN if [ "$SCALA_VERSION" = "2.13" ]; then APACHE_SPARK_CUSTOM_NAME=hadoop3-scala2.13; else APACHE_SPARK_CUSTOM_NAME=hadoop3; fi && \
SPARK_TGZ_NAME=spark-${APACHE_SPARK_VERSION}-bin-${APACHE_SPARK_CUSTOM_NAME} && \
if [ ! -d "/usr/local/$SPARK_TGZ_NAME" ]; then \
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

install the specific version of spark only when the base image does not have that.

cd /tmp ; \
wget -q https://www.apache.org/dyn/closer.lua/spark/spark-${APACHE_SPARK_VERSION}/${SPARK_TGZ_NAME}.tgz?action=download -O ${SPARK_TGZ_NAME}.tgz ; \
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

closer.lua is recommended by ASF infra, which prefers to download the tgz from the dlcdn site, and falls back to the archive site if unavailable

https://infra.apache.org/release-download-pages.html#download-scripts

tar -xzf ${SPARK_TGZ_NAME}.tgz -C /usr/local ; \
rm ${SPARK_TGZ_NAME}.tgz ; \
ln -snf /usr/local/$SPARK_TGZ_NAME /usr/local/spark ; \
fi

# Remove other scala kernels
RUN cd /opt/conda/share/jupyter/kernels/ && \
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ IS_SNAPSHOT?=true
SNAPSHOT:=-SNAPSHOT
endif

APACHE_SPARK_VERSION?=3.4.4
APACHE_SPARK_VERSION?=3.5.7
SCALA_VERSION?=2.12
IMAGE?=jupyter/all-spark-notebook:latest
EXAMPLE_IMAGE?=apache/toree-examples
Expand Down
4 changes: 2 additions & 2 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import scala.util.Properties
import sbtassembly.AssemblyOption

lazy val scala212 = "2.12.17"
lazy val scala212 = "2.12.18"
lazy val scala213 = "2.13.8"
lazy val defaultScalaVersion = sys.env.get("SCALA_VERSION") match {
case Some("2.13") => scala213
Expand All @@ -34,7 +34,7 @@ ThisBuild / crossScalaVersions := Seq(scala212, scala213)
ThisBuild / scalaVersion := defaultScalaVersion
ThisBuild / Dependencies.sparkVersion := {
val envVar = "APACHE_SPARK_VERSION"
val defaultVersion = "3.4.4"
val defaultVersion = "3.5.7"

Properties.envOrNone(envVar) match {
case None =>
Expand Down
2 changes: 1 addition & 1 deletion etc/kernel.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
},
"display_name": "Apache Toree (development)",
"env": {
"PYTHONPATH": "/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9.5-src.zip",
"PYTHONPATH": "/usr/local/spark/python:/usr/local/spark/python/lib/py4j-0.10.9.7-src.zip",
"SPARK_HOME": "/usr/local/spark",
"CAPTURE_STANDARD_ERR": "true",
"MAX_INTERPRETER_THREADS": "16",
Expand Down
3 changes: 3 additions & 0 deletions plugins/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ Test / fork := true
// Needed for type inspection
libraryDependencies ++= Seq(
Dependencies.scalaReflect.value,
Dependencies.asm,
Dependencies.asmCommons,
Dependencies.asmUtil,
Dependencies.clapper,
Dependencies.slf4jApi
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
package org.apache.toree.plugins

import java.io.File
import org.clapper.classutil.{ClassInfo, ClassFinder}
import org.clapper.classutil.{ClassFinder, ClassInfo}
import org.objectweb.asm.Opcodes
import org.slf4j.LoggerFactory

import scala.annotation.tailrec
Expand Down Expand Up @@ -52,7 +53,7 @@ class PluginSearcher {
*
* @return The new class finder
*/
protected def newClassFinder(): ClassFinder = ClassFinder(classpath)
protected def newClassFinder(): ClassFinder = ClassFinder(classpath, Some(Opcodes.ASM9))
Copy link
Member Author

@pan3793 pan3793 Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

some modern version libs, like Jackson, use multi release jars, which main classes are built against Java 8, but ship optional classes compiled with higher JDK version, we must use newer ASM lib to make it work.

see also bmc/classutil#45


/**
* Creates a new class finder for the given paths.
Expand All @@ -61,7 +62,7 @@ class PluginSearcher {
*
* @return The new class finder
*/
protected def newClassFinder(paths: Seq[File]): ClassFinder = ClassFinder(paths)
protected def newClassFinder(paths: Seq[File]): ClassFinder = ClassFinder(paths, Some(Opcodes.ASM9))

/**
* Loads all class information using the provided class finder.
Expand Down
14 changes: 9 additions & 5 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,24 @@ object Dependencies {
val pekkoSlf4j = "org.apache.pekko" %% "pekko-slf4j" % pekkoVersion // Apache v2
val pekkoTestkit = "org.apache.pekko" %% "pekko-testkit" % pekkoVersion // Apache v2

val clapper = "org.clapper" %% "classutil" % "1.5.1" // BSD 3-clause license, used for detecting plugins
val asmVersion = "9.9"
Copy link
Member Author

@pan3793 pan3793 Oct 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Upgrade ASM libs used by org.clapper:classutil to address

25/10/07 03:10:47 WARN Main$$anon$1: No external magics provided to PluginManager!
Exception in thread "main" java.lang.IllegalArgumentException: Unsupported class file major version 61
	at shadeasm.org.objectweb.asm.ClassReader.<init>(ClassReader.java:195)
	at shadeasm.org.objectweb.asm.ClassReader.<init>(ClassReader.java:176)
	at shadeasm.org.objectweb.asm.ClassReader.<init>(ClassReader.java:162)
	at shadeasm.org.objectweb.asm.ClassReader.<init>(ClassReader.java:283)
	at shadeclapper.org.clapper.classutil.asm.ClassFile$.load(ClassFinderImpl.scala:222)
	at shadeclapper.org.clapper.classutil.ClassFinder.classData(ClassFinder.scala:404)
	at shadeclapper.org.clapper.classutil.ClassFinder.$anonfun$processOpenZip$2(ClassFinder.scala:359)
	at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
	at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486)
	at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492)
	at scala.collection.Iterator.toStream(Iterator.scala:1417)
	at scala.collection.Iterator.toStream$(Iterator.scala:1416)
	at scala.collection.AbstractIterator.toStream(Iterator.scala:1431)
	at scala.collection.Iterator.$anonfun$toStream$1(Iterator.scala:1417)
	at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1173)
	at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1163)
	at scala.collection.immutable.Stream.$anonfun$$plus$plus$1(Stream.scala:372)
	at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1173)
	at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1163)
	at scala.collection.immutable.StreamIterator.$anonfun$next$1(Stream.scala:1061)
	at scala.collection.immutable.StreamIterator$LazyCell.v$lzycompute(Stream.scala:1050)
	at scala.collection.immutable.StreamIterator$LazyCell.v(Stream.scala:1050)
	at scala.collection.immutable.StreamIterator.hasNext(Stream.scala:1055)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
	at scala.collection.Iterator.foreach(Iterator.scala:943)
	at scala.collection.Iterator.foreach$(Iterator.scala:943)
	at scala.collection.AbstractIterator.foreach(Iterator.scala:1431)
	at scala.collection.generic.Growable.$plus$plus$eq(Growable.scala:62)
	at scala.collection.generic.Growable.$plus$plus$eq$(Growable.scala:53)
	at scala.collection.immutable.Map$MapBuilderImpl.$plus$plus$eq(Map.scala:648)
	at scala.collection.immutable.Map$MapBuilderImpl.$plus$plus$eq(Map.scala:595)
	at scala.collection.TraversableOnce.toMap(TraversableOnce.scala:372)
	at scala.collection.TraversableOnce.toMap$(TraversableOnce.scala:370)
	at scala.collection.AbstractIterator.toMap(Iterator.scala:1431)
	at shadeclapper.org.clapper.classutil.ClassFinder$.classInfoMap(ClassFinder.scala:445)
	at org.apache.toree.plugins.PluginSearcher.loadClassMap(PluginSearcher.scala:80)
	at org.apache.toree.plugins.PluginSearcher.internalClassInfo$lzycompute(PluginSearcher.scala:36)
	at org.apache.toree.plugins.PluginSearcher.internalClassInfo(PluginSearcher.scala:35)
	at org.apache.toree.plugins.PluginSearcher.internal$lzycompute(PluginSearcher.scala:39)
	at org.apache.toree.plugins.PluginSearcher.internal(PluginSearcher.scala:39)
	at org.apache.toree.plugins.PluginManager.internalPlugins$lzycompute(PluginManager.scala:45)
	at org.apache.toree.plugins.PluginManager.internalPlugins(PluginManager.scala:44)
	at org.apache.toree.plugins.PluginManager.initialize(PluginManager.scala:80)
	at org.apache.toree.boot.layer.StandardComponentInitialization.initializePlugins(ComponentInitialization.scala:219)
	at org.apache.toree.boot.layer.StandardComponentInitialization.initializeComponents(ComponentInitialization.scala:83)
	at org.apache.toree.boot.layer.StandardComponentInitialization.initializeComponents$(ComponentInitialization.scala:69)
	at org.apache.toree.Main$$anon$1.initializeComponents(Main.scala:35)
	at org.apache.toree.boot.KernelBootstrap.initialize(KernelBootstrap.scala:102)
	at org.apache.toree.Main$.delayedEndpoint$org$apache$toree$Main$1(Main.scala:35)
	at org.apache.toree.Main$delayedInit$body.apply(Main.scala:24)
	at scala.Function0.apply$mcV$sp(Function0.scala:39)
	at scala.Function0.apply$mcV$sp$(Function0.scala:39)
	at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:17)
	at scala.App.$anonfun$main$1$adapted(App.scala:80)
	at scala.collection.immutable.List.foreach(List.scala:431)
	at scala.App.main(App.scala:80)
	at scala.App.main$(App.scala:78)
	at org.apache.toree.Main$.main(Main.scala:24)
	at org.apache.toree.Main.main(Main.scala)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:1034)
	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:199)
	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:222)
	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1125)
	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1134)
	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

val asm = "org.ow2.asm" % "asm" % asmVersion // Apache v2
val asmCommons = "org.ow2.asm" % "asm-commons" % asmVersion // Apache v2
val asmUtil = "org.ow2.asm" % "asm-util" % asmVersion // Apache v2
val clapper = "org.clapper" %% "classutil" % "1.5.1" // Apache v2, used for detecting plugins
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/bmc/classutil

Version 1.5.0 and on are licensed under the Apache License, version 2.0.


val commonsExec = "org.apache.commons" % "commons-exec" % "1.3" // Apache v2

val config = "com.typesafe" % "config" % "1.4.3" // Apache v2

val coursierVersion = "2.0.0"
val coursierVersion = "2.0.16"
val coursier = "io.get-coursier" %% "coursier" % coursierVersion // Apache v2
val coursierCache = "io.get-coursier" %% "coursier-cache" % coursierVersion // Apache v2

val ivy = "org.apache.ivy" % "ivy" % "2.5.1" // Apache v2

// use the same jackson version in test than the one provided at runtime by Spark 3.4.x
val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" % "2.14.2" // Apache v2
// use the same jackson version in test than the one provided at runtime by Spark 3.5.x
val jacksonDatabind = "com.fasterxml.jackson.core" % "jackson-databind" % "2.15.2" // Apache v2

val jeroMq = "org.zeromq" % "jeromq" % "0.5.3" // MPL v2

Expand All @@ -57,7 +61,7 @@ object Dependencies {
val scalaTestMockito = "org.scalatestplus" %% "mockito-4-11" % "3.2.16.0" // Apache v2
val mockitoInline = "org.mockito" % "mockito-inline" % "4.11.0" // MIT

val slf4jApi = "org.slf4j" % "slf4j-api" % "2.0.6" // MIT
val slf4jApi = "org.slf4j" % "slf4j-api" % "2.0.7" // MIT

val sparkVersion = settingKey[String]("Version of Apache Spark to use in Toree") // defined in root build
val sparkCore = Def.setting{ "org.apache.spark" %% "spark-core" % sparkVersion.value } // Apache v2
Expand Down