From 93763c0da61a5f5a1480a51783b800e1038cd5d8 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 17 Feb 2025 17:34:58 +0800 Subject: [PATCH 1/5] [SPARK-51243][CORE][ML] Configurable allow native BLAS --- .../scala/org/apache/spark/SparkContext.scala | 15 +++++++++++++++ .../spark/launcher/SparkSubmitCommandBuilder.java | 5 +++++ .../scala/org/apache/spark/ml/linalg/BLAS.scala | 7 +++++-- .../org/apache/spark/deploy/yarn/Client.scala | 4 ++++ 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index ce62f8f5d5473..e613fad661744 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -432,6 +432,7 @@ class SparkContext(config: SparkConf) extends Logging { SparkContext.supplementJavaModuleOptions(_conf) SparkContext.supplementJavaIPv6Options(_conf) + SparkContext.supplementBlasOptions(_conf) _driverLogger = DriverLogger(_conf) @@ -3435,6 +3436,20 @@ object SparkContext extends Logging { supplement(DRIVER_JAVA_OPTIONS) supplement(EXECUTOR_JAVA_OPTIONS) } + + private def supplementBlasOptions(conf: SparkConf): Unit = { + conf.getOption("spark.ml.allowNativeBlas").foreach { allowNativeBlas => + def supplement(key: OptionalConfigEntry[String]): Unit = { + val v = conf.get(key) match { + case Some(opts) => s"-Dspark.ml.allowNativeBlas=$allowNativeBlas $opts" + case None => s"-Dspark.ml.allowNativeBlas=$allowNativeBlas" + } + conf.set(key.key, v) + } + supplement(DRIVER_JAVA_OPTIONS) + supplement(EXECUTOR_JAVA_OPTIONS) + } + } } /** diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index bdbb954dbe087..d58484821b538 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -348,6 +348,11 @@ private List buildSparkSubmitCommand(Map env) config.get(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH)); } + if (config.containsKey("spark.ml.allowNativeBlas")) { + String preferNativeBlas = config.get("spark.ml.allowNativeBlas"); + addOptionString(cmd, "-Dspark.ml.allowNativeBlas=" + preferNativeBlas); + } + // SPARK-36796: Always add some JVM runtime default options to submit command addOptionString(cmd, JavaModuleOptions.defaultModuleOptions()); addOptionString(cmd, "-Dderby.connection.requireAuthentication=false"); diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala index d07eb890dc325..ea3adacc27796 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala @@ -39,8 +39,11 @@ private[spark] object BLAS extends Serializable { // For level-3 routines, we use the native BLAS. private[spark] def nativeBLAS: NetlibBLAS = { if (_nativeBLAS == null) { - _nativeBLAS = - try { NetlibNativeBLAS.getInstance } catch { case _: Throwable => javaBLAS } + _nativeBLAS = System.getProperty("spark.ml.allowNativeBlas", "true") match { + case "true" => + try { NetlibNativeBLAS.getInstance } catch { case _: Throwable => javaBLAS } + case _ => javaBLAS + } } _nativeBLAS } diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index e742fd620f0c4..ca349fdef16f2 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -1050,6 +1050,10 @@ private[spark] class Client( javaOpts += s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}" + sparkConf.getOption("spark.ml.allowNativeBlas").foreach { allowNativeBlas => + javaOpts += s"-Dspark.ml.allowNativeBlas=$allowNativeBlas" + } + // SPARK-37106: To start AM with Java 17, `JavaModuleOptions.defaultModuleOptions` // is added by default. javaOpts += JavaModuleOptions.defaultModuleOptions() From 0ad50a635186b5bf448d481c044096e8552c1488 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 18 Feb 2025 15:22:17 +0800 Subject: [PATCH 2/5] Address comments --- .../org/apache/spark/util/SparkEnvUtils.scala | 4 ++ .../scala/org/apache/spark/SparkContext.scala | 46 +++++++------------ docs/ml-linalg-guide.md | 2 + .../launcher/SparkSubmitCommandBuilder.java | 4 +- mllib-local/pom.xml | 5 ++ .../org/apache/spark/ml/linalg/BLAS.scala | 14 ++++-- 6 files changed, 39 insertions(+), 36 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala index b54e6ee5d7309..abb57ae75b125 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala @@ -27,6 +27,10 @@ private[spark] trait SparkEnvUtils { System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null } + /** + * Whether allow using native BLAS library if available. + */ + val allowNativeBlas = "true".equals(System.getProperty("spark.ml.allowNativeBlas", "true")) } object SparkEnvUtils extends SparkEnvUtils diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index e613fad661744..8e47de9eb4675 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -3409,46 +3409,34 @@ object SparkContext extends Logging { } } + private def supplement( + conf: SparkConf, key: OptionalConfigEntry[String], javaOpts: String): Unit = { + val v = conf.get(key) match { + case Some(opts) => s"$javaOpts $opts" + case None => javaOpts + } + conf.set(key.key, v) + } + /** * SPARK-36796: This is a helper function to supplement some JVM runtime options to * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions`. */ private def supplementJavaModuleOptions(conf: SparkConf): Unit = { - def supplement(key: OptionalConfigEntry[String]): Unit = { - val v = conf.get(key) match { - case Some(opts) => s"${JavaModuleOptions.defaultModuleOptions()} $opts" - case None => JavaModuleOptions.defaultModuleOptions() - } - conf.set(key.key, v) - } - supplement(DRIVER_JAVA_OPTIONS) - supplement(EXECUTOR_JAVA_OPTIONS) + supplement(conf, DRIVER_JAVA_OPTIONS, JavaModuleOptions.defaultModuleOptions()) + supplement(conf, EXECUTOR_JAVA_OPTIONS, JavaModuleOptions.defaultModuleOptions()) } private def supplementJavaIPv6Options(conf: SparkConf): Unit = { - def supplement(key: OptionalConfigEntry[String]): Unit = { - val v = conf.get(key) match { - case Some(opts) => s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6} $opts" - case None => s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}" - } - conf.set(key.key, v) - } - supplement(DRIVER_JAVA_OPTIONS) - supplement(EXECUTOR_JAVA_OPTIONS) + val opts = s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}" + supplement(conf, DRIVER_JAVA_OPTIONS, opts) + supplement(conf, EXECUTOR_JAVA_OPTIONS, opts) } private def supplementBlasOptions(conf: SparkConf): Unit = { - conf.getOption("spark.ml.allowNativeBlas").foreach { allowNativeBlas => - def supplement(key: OptionalConfigEntry[String]): Unit = { - val v = conf.get(key) match { - case Some(opts) => s"-Dspark.ml.allowNativeBlas=$allowNativeBlas $opts" - case None => s"-Dspark.ml.allowNativeBlas=$allowNativeBlas" - } - conf.set(key.key, v) - } - supplement(DRIVER_JAVA_OPTIONS) - supplement(EXECUTOR_JAVA_OPTIONS) - } + val opts = s"-Dspark.ml.allowNativeBlas=${Utils.allowNativeBlas}" + supplement(conf, DRIVER_JAVA_OPTIONS, opts) + supplement(conf, EXECUTOR_JAVA_OPTIONS, opts) } } diff --git a/docs/ml-linalg-guide.md b/docs/ml-linalg-guide.md index 6e91d81f49760..d9702f881379d 100644 --- a/docs/ml-linalg-guide.md +++ b/docs/ml-linalg-guide.md @@ -76,6 +76,8 @@ You can also point `dev.ludovic.netlib` to specific libraries names and paths. F If native libraries are not properly configured in the system, the Java implementation (javaBLAS) will be used as fallback option. +You can also set `spark.ml.allowNativeBlas` to `false` to disable native BLAS and always use the Java implementation. + ## Spark Configuration The default behavior of multi-threading in either Intel MKL or OpenBLAS may not be optimal with Spark's execution model [^1]. diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index d58484821b538..992e6870a0fbc 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -349,8 +349,8 @@ private List buildSparkSubmitCommand(Map env) } if (config.containsKey("spark.ml.allowNativeBlas")) { - String preferNativeBlas = config.get("spark.ml.allowNativeBlas"); - addOptionString(cmd, "-Dspark.ml.allowNativeBlas=" + preferNativeBlas); + String allowNativeBlas = config.get("spark.ml.allowNativeBlas"); + addOptionString(cmd, "-Dspark.ml.allowNativeBlas=" + allowNativeBlas); } // SPARK-36796: Always add some JVM runtime default options to submit command diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index d0d310b9371df..a52f5ae444074 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -66,6 +66,11 @@ org.apache.spark spark-tags_${scala.binary.version} + + org.apache.spark + spark-common-utils_${scala.binary.version} + ${project.version} +