diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala index b54e6ee5d7309..63f25949dd705 100644 --- a/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala +++ b/common/utils/src/main/scala/org/apache/spark/util/SparkEnvUtils.scala @@ -27,6 +27,10 @@ private[spark] trait SparkEnvUtils { System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null } + /** + * Whether allow using native BLAS/LAPACK/ARPACK libraries if available. + */ + val allowNativeBlas = "true".equals(System.getProperty("netlib.allowNativeBlas", "true")) } object SparkEnvUtils extends SparkEnvUtils diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index ddb327da5f199..702765d922214 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -432,6 +432,7 @@ class SparkContext(config: SparkConf) extends Logging { SparkContext.supplementJavaModuleOptions(_conf) SparkContext.supplementJavaIPv6Options(_conf) + SparkContext.supplementBlasOptions(_conf) _driverLogger = DriverLogger(_conf) @@ -3408,32 +3409,35 @@ object SparkContext extends Logging { } } + private def supplementJavaOpts( + conf: SparkConf, key: OptionalConfigEntry[String], javaOpts: String): Unit = { + val v = conf.get(key) match { + case Some(opts) => s"$javaOpts $opts" + case None => javaOpts + } + conf.set(key.key, v) + } + /** * SPARK-36796: This is a helper function to supplement some JVM runtime options to * `spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions`. */ private def supplementJavaModuleOptions(conf: SparkConf): Unit = { - def supplement(key: OptionalConfigEntry[String]): Unit = { - val v = conf.get(key) match { - case Some(opts) => s"${JavaModuleOptions.defaultModuleOptions()} $opts" - case None => JavaModuleOptions.defaultModuleOptions() - } - conf.set(key.key, v) - } - supplement(DRIVER_JAVA_OPTIONS) - supplement(EXECUTOR_JAVA_OPTIONS) + val opts = JavaModuleOptions.defaultModuleOptions() + supplementJavaOpts(conf, DRIVER_JAVA_OPTIONS, opts) + supplementJavaOpts(conf, EXECUTOR_JAVA_OPTIONS, opts) } private def supplementJavaIPv6Options(conf: SparkConf): Unit = { - def supplement(key: OptionalConfigEntry[String]): Unit = { - val v = conf.get(key) match { - case Some(opts) => s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6} $opts" - case None => s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}" - } - conf.set(key.key, v) - } - supplement(DRIVER_JAVA_OPTIONS) - supplement(EXECUTOR_JAVA_OPTIONS) + val opts = s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}" + supplementJavaOpts(conf, DRIVER_JAVA_OPTIONS, opts) + supplementJavaOpts(conf, EXECUTOR_JAVA_OPTIONS, opts) + } + + private def supplementBlasOptions(conf: SparkConf): Unit = { + val opts = s"-Dnetlib.allowNativeBlas=${Utils.allowNativeBlas}" + supplementJavaOpts(conf, DRIVER_JAVA_OPTIONS, opts) + supplementJavaOpts(conf, EXECUTOR_JAVA_OPTIONS, opts) } } diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index 2c7c2f120b935..0bda32821ff0b 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -2893,4 +2893,12 @@ package object config { .checkValue(v => v.forall(Set("stdout", "stderr").contains), "The value only can be one or more of 'stdout, stderr'.") .createWithDefault(Seq("stdout", "stderr")) + + private[spark] val SPARK_ML_ALLOW_NATIVE_BLAS = + ConfigBuilder("spark.ml.allowNativeBlas") + .doc("Whether allow using native BLAS/LAPACK/ARPACK implementations when native " + + "libraries are available. If disabled, always use Java implementations.") + .version("4.1.0") + .booleanConf + .createWithDefault(true) } diff --git a/docs/ml-linalg-guide.md b/docs/ml-linalg-guide.md index 6e91d81f49760..aa1471f0df995 100644 --- a/docs/ml-linalg-guide.md +++ b/docs/ml-linalg-guide.md @@ -46,8 +46,7 @@ The installation should be done on all nodes of the cluster. Generic version of For Debian / Ubuntu: ``` -sudo apt-get install libopenblas-base -sudo update-alternatives --config libblas.so.3 +sudo apt-get install libopenblas-dev ``` For CentOS / RHEL: ``` @@ -76,6 +75,8 @@ You can also point `dev.ludovic.netlib` to specific libraries names and paths. F If native libraries are not properly configured in the system, the Java implementation (javaBLAS) will be used as fallback option. +You can also set spark conf `spark.ml.allowNativeBlas` or Java system property `netlib.allowNativeBlas` to `false` to disable native BLAS and always use the Java implementation. + ## Spark Configuration The default behavior of multi-threading in either Intel MKL or OpenBLAS may not be optimal with Spark's execution model [^1]. diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java index 5efa3bef78bcc..785eeabba2438 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java @@ -338,6 +338,11 @@ private List buildSparkSubmitCommand(Map env) config.get(SparkLauncher.DRIVER_EXTRA_LIBRARY_PATH)); } + if (config.containsKey("spark.ml.allowNativeBlas")) { + String allowNativeBlas = config.get("spark.ml.allowNativeBlas"); + addOptionString(cmd, "-Dnetlib.allowNativeBlas=" + allowNativeBlas); + } + // SPARK-36796: Always add some JVM runtime default options to submit command addOptionString(cmd, JavaModuleOptions.defaultModuleOptions()); addOptionString(cmd, "-Dderby.connection.requireAuthentication=false"); diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index d0d310b9371df..a52f5ae444074 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -66,6 +66,11 @@ org.apache.spark spark-tags_${scala.binary.version} + + org.apache.spark + spark-common-utils_${scala.binary.version} + ${project.version} +