From 210ecb24a0513fa0f07258d9dbcdd57af9b5447e Mon Sep 17 00:00:00 2001 From: Vu Date: Sat, 2 May 2026 13:45:36 +0700 Subject: [PATCH] Add dx_shm_size and dx_ipc_mode WDL runtime hints New opt-in runtime hints allow WDL tasks to configure Docker's shared memory size and IPC namespace mode, which are needed for workloads that require large /dev/shm allocations (e.g. distributed deep learning with collective communication libraries). Both hints are optional and validated against strict allowlists to prevent shell-meta injection into the rendered docker run command. Depends on wdlTools 0.17.18 release. --- RELEASE_NOTES.md | 14 ++ build.sbt | 2 +- .../scala/dx/core/languages/wdl/Runtime.scala | 57 ++++++++ .../core/languages/wdl/RuntimeHintsTest.scala | 124 ++++++++++++++++++ doc/ExpertOptions.md | 56 +++++++- .../dx/executor/wdl/WdlTaskExecutor.scala | 6 +- 6 files changed, 256 insertions(+), 3 deletions(-) create mode 100644 core/src/test/scala/dx/core/languages/wdl/RuntimeHintsTest.scala diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index a9e4ddd46..22735e534 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -2,6 +2,20 @@ ## Unreleased +* Added two opt-in WDL runtime hints, `dx_shm_size` and `dx_ipc_mode`, that emit + `--shm-size` and `--ipc` flags on the generated `docker run` command. Allows + multi-GPU workloads (e.g. PyTorch + NCCL) to override Docker's default 64 MB + `/dev/shm`. Values are ordinary WDL expressions evaluated per-task and can be + overridden per job invocation via the existing `overrides___` input + (`dx run ... -i overrides___='{"runtime": {"dx_shm_size": "16g"}}'`). + WDL only — CWL containers go through `cwltool` and are not affected. + See [Additional DNAnexus-specific runtime settings](doc/ExpertOptions.md#additional-dnanexus-specific-runtime-settings). + (APPS-3954) + +### Dependency updates + +* wdlTools 0.17.17 → 0.17.18 (adds `shmSize`/`ipcMode` to `TaskCommandFileGenerator`) + ## 2.15.0 2025-09-29 * Added support for new region in OCI Ashburn diff --git a/build.sbt b/build.sbt index 0e3d5f843..7395beca6 100644 --- a/build.sbt +++ b/build.sbt @@ -130,7 +130,7 @@ lazy val dependencies = val dxFileAccessProtocolsVersion = "0.5.6" val dxYamlVersion = "0.1.1" val cwlScalaVersion = "0.8.5" - val wdlToolsVersion = "0.17.17" + val wdlToolsVersion = "0.17.18" val typesafeVersion = "1.4.1" val sprayVersion = "1.3.6" val scalatestVersion = "3.2.9" diff --git a/core/src/main/scala/dx/core/languages/wdl/Runtime.scala b/core/src/main/scala/dx/core/languages/wdl/Runtime.scala index d6d3f00e5..3f1b5379d 100644 --- a/core/src/main/scala/dx/core/languages/wdl/Runtime.scala +++ b/core/src/main/scala/dx/core/languages/wdl/Runtime.scala @@ -31,8 +31,45 @@ object Runtime { val GiB: Double = 1024 * 1024 * 1024 val DxHintsKey = "dnanexus" val DxInstanceTypeKey = "dx_instance_type" + val DxShmSizeKey = "dx_shm_size" + val DxIpcModeKey = "dx_ipc_mode" case object InstanceType extends DxRuntimeHint(Some(DxInstanceTypeKey), "instance_type", Vector(T_String)) + case object ShmSize + extends DxRuntimeHint(Some(DxShmSizeKey), "shm_size", Vector(T_String)) + case object IpcMode + extends DxRuntimeHint(Some(DxIpcModeKey), "ipc_mode", Vector(T_String)) + + // Positive integer with optional SI suffix (b, k, m, g — case-insensitive). + // Leading zeros and a bare "0" are rejected because Docker errors on --shm-size=0. + private val ShmSizePattern = "^[1-9][0-9]*[bkmgBKMG]?$".r + + // Docker --ipc accepted modes per https://docs.docker.com/reference/cli/docker/container/run/#ipc. + // For container: we use Docker's container-name grammar + // (https://docs.docker.com/reference/cli/docker/container/run/#name) — this is the security + // boundary: anything else here would let a malicious WDL author inject arbitrary docker flags + // because the value is interpolated into a shell-rendered docker run command. + private val IpcModePattern = + "^(none|private|shareable|host|container:[A-Za-z0-9][A-Za-z0-9_.-]{0,127})$".r + + def validateShmSize(value: String): Unit = { + if (ShmSizePattern.findFirstIn(value).isEmpty) { + throw new EvalException( + s"invalid ${DxShmSizeKey} value '${value}': must be a positive integer optionally " + + s"followed by b/k/m/g (e.g. '8g', '1024m')" + ) + } + } + + def validateIpcMode(value: String): Unit = { + if (IpcModePattern.findFirstIn(value).isEmpty) { + throw new EvalException( + s"invalid ${DxIpcModeKey} value '${value}': must be one of " + + s"'none', 'private', 'shareable', 'host', or 'container:' " + + s"(e.g. 'container:my-container')" + ) + } + } } case class Runtime(wdlVersion: WdlVersion, @@ -159,4 +196,24 @@ case class Runtime(wdlVersion: WdlVersion, def returnCodes: Option[Set[Int]] = { runtimeAttrs.runtime.map(_.returnCodes).getOrElse(Some(WdlRuntime.ReturnCodesDefault)) } + + lazy val shmSize: Option[String] = { + getDxHint(Runtime.ShmSize).map { + case V_String(s) => + Runtime.validateShmSize(s) + s + case other => + throw new EvalException(s"invalid ${Runtime.DxShmSizeKey} value ${other}") + } + } + + lazy val ipcMode: Option[String] = { + getDxHint(Runtime.IpcMode).map { + case V_String(s) => + Runtime.validateIpcMode(s) + s + case other => + throw new EvalException(s"invalid ${Runtime.DxIpcModeKey} value ${other}") + } + } } diff --git a/core/src/test/scala/dx/core/languages/wdl/RuntimeHintsTest.scala b/core/src/test/scala/dx/core/languages/wdl/RuntimeHintsTest.scala new file mode 100644 index 000000000..aa9a1ca91 --- /dev/null +++ b/core/src/test/scala/dx/core/languages/wdl/RuntimeHintsTest.scala @@ -0,0 +1,124 @@ +package dx.core.languages.wdl + +import org.scalatest.flatspec.AnyFlatSpec +import org.scalatest.matchers.should.Matchers +import wdlTools.eval.{DefaultEvalPaths, Eval, EvalException} +import wdlTools.eval.WdlValues._ +import wdlTools.syntax.{Quoting, SourceLocation, WdlVersion} +import wdlTools.types.{WdlTypes, TypedAbstractSyntax => TAT} +import dx.util.{FileSourceResolver, Logger} + +import scala.collection.immutable.SeqMap + +class RuntimeHintsTest extends AnyFlatSpec with Matchers { + private val evaluator: Eval = + Eval(DefaultEvalPaths.empty, + Some(WdlVersion.V1), + Vector.empty, + FileSourceResolver.get, + Logger.get) + + private def stringExpr(s: String): TAT.Expr = + TAT.ValueString(s, WdlTypes.T_String, Quoting.Double)(SourceLocation.empty) + + private def runtimeWith(entries: (String, String)*): Runtime = { + val rt = SeqMap(entries.map { case (k, v) => k -> stringExpr(v) }: _*) + Runtime( + WdlVersion.V1, + Some(TAT.RuntimeSection(rt)(SourceLocation.empty)), + None, + evaluator + ) + } + + private def hintsRuntimeV2(dxFields: (String, String)*): Runtime = { + val dnanexusInner = SeqMap(dxFields.map { + case (k, v) => k -> TAT.MetaValueString(v, Quoting.Double)(SourceLocation.empty) + }: _*) + val hints = SeqMap( + Runtime.DxHintsKey -> + TAT.MetaValueObject(dnanexusInner)(SourceLocation.empty) + ) + Runtime( + WdlVersion.V2, + None, + Some(TAT.MetaSection(hints)(SourceLocation.empty)), + evaluator + ) + } + + it should "return None when dx_shm_size is not set" in { + runtimeWith().shmSize shouldBe None + runtimeWith().ipcMode shouldBe None + } + + it should "extract dx_shm_size from runtime block (WDL 1.x)" in { + runtimeWith(Runtime.DxShmSizeKey -> "8g").shmSize shouldBe Some("8g") + } + + it should "extract dx_ipc_mode from runtime block (WDL 1.x)" in { + runtimeWith(Runtime.DxIpcModeKey -> "host").ipcMode shouldBe Some("host") + } + + it should "extract shm_size and ipc_mode from hints.dnanexus block (WDL 2.0)" in { + val rt = hintsRuntimeV2("shm_size" -> "4g", "ipc_mode" -> "shareable") + rt.shmSize shouldBe Some("4g") + rt.ipcMode shouldBe Some("shareable") + } + + it should "reject malformed dx_shm_size values" in { + val rt = runtimeWith(Runtime.DxShmSizeKey -> "garbage") + val ex = intercept[EvalException](rt.shmSize) + ex.getMessage should include(Runtime.DxShmSizeKey) + } + + it should "reject malformed dx_ipc_mode values" in { + val rt = runtimeWith(Runtime.DxIpcModeKey -> "weird-value") + val ex = intercept[EvalException](rt.ipcMode) + ex.getMessage should include(Runtime.DxIpcModeKey) + } + + it should "accept all valid shm_size suffixes" in { + Seq("64", "64b", "64k", "64m", "8g", "1024M", "2G").foreach { v => + noException should be thrownBy Runtime.validateShmSize(v) + } + } + + it should "reject zero and leading-zero shm_size values" in { + Seq("0", "0g", "00", "0064m").foreach { v => + an[EvalException] should be thrownBy Runtime.validateShmSize(v) + } + } + + it should "accept all valid ipc_mode values" in { + Seq("none", "private", "shareable", "host", "container:my-container").foreach { v => + noException should be thrownBy Runtime.validateIpcMode(v) + } + } + + it should "reject ipc_mode values that could enable shell injection" in { + // The container:.+ form is the security boundary: anything beyond Docker's container-name + // grammar (https://docs.docker.com/reference/cli/docker/container/run/#name) could allow + // a malicious WDL author to inject extra docker flags via the shell-rendered run command. + Seq( + "container:foo --privileged", + "container:foo;rm -rf /", + "container:foo$(whoami)", + "container:foo`id`", + "container:", + "container:.bad", + "weird-value", + "host;rm -rf /" + ).foreach { v => + an[EvalException] should be thrownBy Runtime.validateIpcMode(v) + } + } + + it should "exercise the WDL 1.0 customer scenario from APPS-3954" in { + // Customer's WDL has `runtime { dx_shm_size: "8g" }` directly, no hints block. + // This trace pins down: V1 runtime block -> getDxHint(ShmSize) -> Some("8g"). + val rt = runtimeWith(Runtime.DxShmSizeKey -> "8g") + rt.shmSize shouldBe Some("8g") + rt.getDxHint(Runtime.ShmSize) shouldBe Some(V_String("8g")) + } +} diff --git a/doc/ExpertOptions.md b/doc/ExpertOptions.md index c364afb94..68d83522b 100644 --- a/doc/ExpertOptions.md +++ b/doc/ExpertOptions.md @@ -785,7 +785,61 @@ Similarly, these attributes can be specified in the WDL workflow, but their repr * `developer`: Boolean - whether the applet is a developer, i.e. can create new applets * `projectCreation`: Boolean - whether the applet can create new projects * `dx_ignore_reuse`: Boolean - whether to allow the outputs of the applet to be reused - +* `dx_shm_size`: String - sets `--shm-size=` on the generated `docker run` command, allowing the task to override Docker's default 64 MB `/dev/shm`. Accepts a positive integer optionally suffixed with `b`, `k`, `m`, or `g` (e.g. `"8g"`, `"1024m"`). Useful for multi-GPU workloads using NCCL or other shared-memory IPC. NVIDIA's NCCL [troubleshooting guide](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/troubleshooting.html) lists `1g` as the practical floor; PyTorch / Hugging Face docs commonly recommend `8g` or `16g` as a generous default. The dominant consumer is usually the PyTorch DataLoader, not NCCL itself, so size to `num_workers × per-sample size` plus headroom. +* `dx_ipc_mode`: String - sets `--ipc=` on the generated `docker run` command. Accepts `host`, `none`, `private`, `shareable`, or `container:`. Setting `"host"` lets the container share the worker's IPC namespace, which is another way to bypass the small default `/dev/shm`. NVIDIA recommends `--ipc=host` as an alternative to `--shm-size` for NCCL workloads. + +Example for a multi-GPU NCCL job: + +```wdl +runtime { + docker: "pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime" + dx_instance_type: "mem2_ssd2_gpu4_v2_x96" + dx_shm_size: "8g" +} +``` + +#### Setting values dynamically + +Both attributes are ordinary WDL `runtime` expressions, so they can reference task inputs and computed declarations — they are evaluated when the task runs, not at compile time: + +```wdl +input { + Int gpu_count = 4 + Int dataloader_workers = 8 +} +Int shm_gb = max(2, gpu_count * 2) +runtime { + dx_shm_size: "${shm_gb}g" +} +``` + +#### Per-job-invocation override + +The values set in WDL can be overridden for a single job invocation without editing the WDL source, using dxCompiler's `overrides___` synthetic input. This accepts a JSON object with a `runtime` (and/or `hints`) key: + +```bash +dx run my-applet -i overrides___='{"runtime": {"dx_shm_size": "16g", "dx_ipc_mode": "host"}}' +``` + +For WDL 2.0 hints: + +```bash +dx run my-applet -i overrides___='{"hints": {"dnanexus": {"shm_size": "16g"}}}' +``` + +#### WDL 1.x vs WDL 2.0 + +In WDL 1.x, set `dx_shm_size` and `dx_ipc_mode` directly under `runtime` (as in the example above). In WDL 2.0 (`development`), the `runtime` section no longer accepts arbitrary keys, so set them under `hints.dnanexus` as `shm_size` and `ipc_mode`: + +```wdl +hints { + dnanexus: { + shm_size: "8g" + ipc_mode: "host" + } +} +``` + ### Native DNAnexus executable You can also specify a native DNAnexus app(let) that will be called as a task at runtime by adding key `dx_app` in the `runtime` section. See [Calling existing app(let)s](#calling-existing-applets) for more details. diff --git a/executorWdl/src/main/scala/dx/executor/wdl/WdlTaskExecutor.scala b/executorWdl/src/main/scala/dx/executor/wdl/WdlTaskExecutor.scala index aa220e6f6..aa2ff3e18 100644 --- a/executorWdl/src/main/scala/dx/executor/wdl/WdlTaskExecutor.scala +++ b/executorWdl/src/main/scala/dx/executor/wdl/WdlTaskExecutor.scala @@ -226,7 +226,11 @@ case class WdlTaskExecutor(task: TAT.Task, dockerUtils.getImage(images.sortBy(!_.startsWith(DxPath.DxUriPrefix))) Some(resolvedImage, jobMeta.workerPaths) } - generator.apply(Some(command), jobMeta.workerPaths, container) + generator.apply(Some(command), + jobMeta.workerPaths, + container, + shmSize = runtime.shmSize, + ipcMode = runtime.ipcMode) (true, runtime.returnCodes) } }