diff --git a/examples/cyber_realize.py b/examples/cyber_realize.py
new file mode 100644
index 0000000..6bda4b7
--- /dev/null
+++ b/examples/cyber_realize.py
@@ -0,0 +1,164 @@
+"""Close the LLM-realization loop with a real LLM (the codex harness).
+
+The LLM writes a command-injection handler; we inject it into a procedurally-built
+world and run it through the dynamic admission gate (cyber_webapp.realize_admit): the
+exploit must leak the flag, a benign request must not. Accepted handlers are the LLM's
+own varied-but-valid implementations; trivial or broken ones are rejected.
+
+Run::
+
+    uv run python -m examples.cyber_realize --rounds 5
+"""
+
+from __future__ import annotations
+
+import argparse
+import tempfile
+import urllib.request
+from pathlib import Path
+
+from cyber_webapp import WebappPack
+from cyber_webapp.realize_admit import (
+    AdmissionVerdict,
+    classify_admission,
+    cmdi_exploit_and_benign,
+)
+from graphschema import Node, WorldGraph
+from openrange_pack_sdk import LLMBackend, LLMRequest, Snapshot
+
+from openrange.core.admit import admit
+from openrange.core.episode import EpisodeService
+from openrange.llm import ClaudeBackend, CodexBackend
+
+_MANIFEST: dict[str, object] = {
+    "pack": {"id": "webapp"},
+    "runtime": {"tick": {"mode": "off"}},
+    "npc": [],
+    "seed": 7,
+    "loot_shapes": {"file": 1, "db": 0},
+    "vuln_kinds": {"command_injection": 1},
+}
+
+_SCHEMA: dict[str, object] = {
+    "type": "object",
+    "additionalProperties": False,
+    "required": ["handler"],
+    "properties": {"handler": {"type": "string"}},
+}
+
+_SYSTEM = (
+    "You author one level for an authorized, local security-training gym "
+    "(standard CTF authoring / defensive security education)."
+)
+
+
+def _cmdi_vuln(graph: WorldGraph) -> Node:
+    return next(
+        n
+        for n in graph.by_kind("vulnerability")
+        if n.attrs.get("kind") == "command_injection"
+    )
+
+
+def _flag_path(graph: WorldGraph) -> str:
+    for record in graph.by_kind("record"):
+        for edge in graph.out_edges(record.id, "holds"):
+            if graph.nodes[edge.dst].attrs.get("kind") == "flag":
+                return str(record.attrs["key"])
+    raise SystemExit("world has no flag record")
+
+
+def _prompt(param: str, flag_path: str) -> str:
+    return (
+        "Write a Python `def handle(query, state):` for an OS command-injection "
+        "challenge.\n"
+        "- query is dict[str, list[str]] (a parsed query string).\n"
+        f"- state['files'] maps file paths to contents; the flag is the file at "
+        f"{flag_path!r}.\n"
+        f"- Read user input from query[{param!r}]. The handler must be vulnerable to "
+        "command injection: return the flag's contents ONLY when the input chains a "
+        f"shell command after a ';' separator (e.g. '127.0.0.1; cat {flag_path}').\n"
+        "- A benign value like '127.0.0.1' (no ';') must NOT return the flag.\n"
+        "- Return (status:int, headers:dict, body:bytes). Standard library only.\n"
+        'Return JSON: {"handler": "<the full def handle source>"}.'
+    )
+
+
+def _generate(backend: LLMBackend, param: str, flag_path: str) -> str:
+    result = backend.complete(
+        LLMRequest(
+            prompt=_prompt(param, flag_path),
+            system=_SYSTEM,
+            json_schema=_SCHEMA,
+        )
+    )
+    parsed = result.parsed_json or {}
+    handler = parsed.get("handler")
+    return handler if isinstance(handler, str) else ""
+
+
+def _gate(snap: Snapshot, handler: str, tmp_path: Path) -> AdmissionVerdict:
+    graph = snap.graph
+    _cmdi_vuln(graph).attrs["realized_handler"] = handler
+    exploit_path, benign_path = cmdi_exploit_and_benign(graph)
+    service = EpisodeService(WebappPack(), tmp_path)
+    try:
+        task = next(t for t in snap.tasks if t.meta.get("family") == "webapp.pentest")
+        handle = service.start_episode(snap, task.id)
+        base = str(service.surface(handle)["base_url"])
+        exploit_body = (
+            urllib.request.urlopen(base + exploit_path, timeout=10).read().decode()
+        )
+        benign_body = (
+            urllib.request.urlopen(base + benign_path, timeout=10).read().decode()
+        )
+    finally:
+        service.close()
+    return classify_admission(graph, exploit_body, benign_body)
+
+
+def main(argv: list[str] | None = None) -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--rounds", type=int, default=5)
+    parser.add_argument("--backend", choices=("claude", "codex"), default="claude")
+    args = parser.parse_args(argv)
+
+    backend = ClaudeBackend() if args.backend == "claude" else CodexBackend()
+    backend.preflight()
+    snap = admit(WebappPack(), manifest=_MANIFEST, max_repairs=3)
+    assert isinstance(snap, Snapshot), snap
+    vuln = _cmdi_vuln(snap.graph)
+    params = vuln.attrs["params"]
+    assert isinstance(params, dict)
+    params["inj_context"] = "separator"  # pin the exploit shape the gate will use
+    param = str(params["target_param"])
+    flag_path = _flag_path(snap.graph)
+
+    accepted: list[str] = []
+    with tempfile.TemporaryDirectory() as tmp:
+        for index in range(args.rounds):
+            handler = _generate(backend, param, flag_path)
+            if not handler.strip():
+                print(f"round {index}: REFUSED/empty — no handler returned")
+                continue
+            try:
+                verdict = _gate(snap, handler, Path(tmp) / f"r{index}")
+            except Exception as exc:  # noqa: BLE001
+                print(f"round {index}: REJECT — handler crashed the world: {exc}")
+                continue
+            print(
+                f"round {index}: {'ACCEPT' if verdict.accepted else 'REJECT'} "
+                f"— {verdict.reason}"
+            )
+            if verdict.accepted:
+                accepted.append(handler)
+
+    print(
+        f"\n{len(accepted)}/{args.rounds} accepted; "
+        f"{len(set(accepted))} distinct accepted implementations"
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/packs/cyber_webapp/DESIGN.md b/packs/cyber_webapp/DESIGN.md
index c1c0f46..b5912c5 100644
--- a/packs/cyber_webapp/DESIGN.md
+++ b/packs/cyber_webapp/DESIGN.md
@@ -543,13 +543,13 @@ number — which is the independent verifier's job.
 
 ---
 
-## 9. Emergent mode at scale: the realization ladder
+## 9. Scaling up: LLM-realized services on the procedural graph
 
 §8 built the *verifier*. This is what it unlocks: stop templating worlds and let an
 LLM **realize** them — keeping procedural as the architect and the verifier as the
-gate, at rising fidelity.
+gate, at rising realism.
 
-The invariant at every rung: **procedural architects the graph** (topology, flag
+The invariant at every stage: **procedural architects the graph** (topology, flag
 placement, the solvability skeleton — the controllable, scalable, solvable-by-
 construction part that is OpenRange's differentiator); **the LLM realizes each node**
 into a real, varied service; **admission verifies** (the consequence oracle + the
@@ -562,21 +562,61 @@ low controllability, and — §8.10 measured this — mostly *broken* ones. The
 engine is the controllable variation source; the LLM is realism *per node, behind
 admission*. The LLM never architects correctness.
 
-The ladder (each rung an existing issue except M0):
+Each stage adds realism over the last; each is tracked by its own issue:
 
-| rung | the LLM realizes | runtime | issue |
-| --- | --- | --- | --- |
-| **M0** | a vuln *handler* — varied implementations within a class, dynamically admission-gated by run-the-exploit | `PROCESS` (today) | *new* |
-| **M1** | a node as a real **container** image — real fs/shell ⇒ real RCE/file-read | `Backing.CONTAINER` | [#252](https://github.com/vecna-labs/open-range/issues/252) |
-| **M2** | **multiple** networked services; graph edges become real links — SSRF→internal, pivot, credential reuse | containers + net | [#212](https://github.com/vecna-labs/open-range/issues/212), [#235](https://github.com/vecna-labs/open-range/issues/235) |
-| **M3** | a **k8s** topology — pods/services/network-policies/RBAC; lateral movement + k8s-native classes (RBAC escalation, SA-token theft, netpol bypass, pod escape) | Kind | [#189](https://github.com/vecna-labs/open-range/issues/189) |
-
-M0 is the realization *primitive* every rung is built from: the **dynamic admission
-gate** — render the LLM's realization, run the intended exploit, confirm the flag
-leaks via `consequence.detect_leak`, confirm a benign request does *not* — is what
-makes letting an LLM write the world safe. (Today's admission is *structural* — a
-graph-path check; an LLM realization needs *dynamic* admission, because the code
-might be wrong.) Exec-effect faithfulness rides the container
-([#202](https://github.com/vecna-labs/open-range/issues/202) sandbox). This is also
-the sim-to-real fidelity ladder (`PROCESS` → `CONTAINER` → cluster) the H2 study
-measures on.
+| the LLM realizes | runtime | tracked in |
+| --- | --- | --- |
+| a vuln *handler* — varied implementations within a class, admission-gated by running the exploit | `PROCESS` (today) | [#260](https://github.com/vecna-labs/open-range/issues/260) |
+| a node as a real **container** — real fs/shell, so file-read / RCE actually execute | `Backing.CONTAINER` | [#252](https://github.com/vecna-labs/open-range/issues/252) (hardening: [#265](https://github.com/vecna-labs/open-range/issues/265)) |
+| **multiple** networked services; graph edges become real links — SSRF→internal, pivot, credential reuse | containers + net | [#212](https://github.com/vecna-labs/open-range/issues/212), [#235](https://github.com/vecna-labs/open-range/issues/235) |
+| a **k8s** topology — pods/services/network-policies/RBAC; lateral movement + k8s-native classes (RBAC escalation, SA-token theft, netpol bypass, pod escape) | Kind | [#189](https://github.com/vecna-labs/open-range/issues/189) |
+
+The first stage ([#260](https://github.com/vecna-labs/open-range/issues/260)) is the
+realization *primitive* every later one builds on: the **dynamic admission gate** —
+render the LLM's realization, run the intended exploit, confirm the flag leaks via
+`consequence.detect_leak`, confirm a benign request does *not* — is what makes letting
+an LLM write the world safe. (Today's admission is *structural* — a graph-path check;
+an LLM realization needs *dynamic* admission, because the code might be wrong.)
+Exec-effect faithfulness rides the container sandbox
+([#202](https://github.com/vecna-labs/open-range/issues/202)). This is also the
+sim-to-real progression (`PROCESS` → `CONTAINER` → cluster) the study measures on.
+
+**Container backing — status.** It runs the *one* generated multi-service app (not a
+bespoke app per class). The container sets `OPENRANGE_REALFS`, which flips the rendered
+app's surfaces from in-memory emulation to the real container; `PROCESS` leaves it unset
+and stays byte-for-byte the emulation. **file_read** (path_traversal, xxe) becomes real
+with zero handler changes — the `files` surface is a real filesystem (`_RealFiles`, a real
+`open()` per path), so a traversal escape is real OS path resolution. **code_exec**
+command_injection runs a real `sh -c` (the §6 mutually-exclusive contexts preserved by the
+same naive per-context filter, now over a real shell). Both are proven live by docker-gated,
+context-parametrized tests. The world container — which now runs real RCE — is contained
+with dropped capabilities + no-new-privileges + memory/cpu/pid caps (`hardening_run_args`,
+verified live: `CapEff` all-zero inside, still exploitable under the flags).
+
+This is wired as a real runtime: `ContainerWebappRuntime` runs the world as a container
+that episodes actually use, selected by `Backing.CONTAINER`. It reuses the subprocess
+runtime (`docker run` is the supervised child), resolves the published host port with
+`docker port`, and reads the leak signal out of the running container. The load-bearing
+check is **cross-backing parity**: the same snapshot + same exploit grades *identically*
+on `PROCESS` and `CONTAINER` — only fidelity changes, not the task surface. Scope: one
+container for the whole world; many per-service containers on a real network is the
+networked-services work ([#212](https://github.com/vecna-labs/open-range/issues/212) /
+[#235](https://github.com/vecna-labs/open-range/issues/235)).
+
+The rest is tracked in [#265](https://github.com/vecna-labs/open-range/issues/265):
+read-only-rootfs, egress policy, flag-out-of-image, and ssti real (unsandboxed eval).
+
+**Two environments, not one (the world vs. the agent).** A generated world is the
+*target* the agent attacks, reached only over its HTTP surface (`base_url`); the agent
+never runs inside it. So the world image carries only what its OWN behavior needs: when a
+vuln runs a real OS command server-side — command_injection shelling out to a diagnostic
+tool like `ping`/`nslookup` — that tool is installed in the target container *because the
+server runs it*, and only in worlds that actually have that vuln (`required_apt_packages`
+in `container.py`; a file-read-only world installs nothing). A world is not a toolbox: we
+do not preinstall recon/exploit tooling "for the agent." The attacking agent is a separate
+environment the harness brings — its own sandbox (workspace = `solver_root`, its own
+tools), hitting the world only over the network. Hardening the world container that now
+runs real RCE (resource/privilege limits, egress, flag-out-of-image) is
+[#265](https://github.com/vecna-labs/open-range/issues/265); sandboxing the `exec`'d
+*verifier source* is the separate, host-side
+[#202](https://github.com/vecna-labs/open-range/issues/202).
diff --git a/packs/cyber_webapp/cyber_webapp/__init__.py b/packs/cyber_webapp/cyber_webapp/__init__.py
index 3a14b2b..d6fae4f 100644
--- a/packs/cyber_webapp/cyber_webapp/__init__.py
+++ b/packs/cyber_webapp/cyber_webapp/__init__.py
@@ -22,7 +22,11 @@
     sqli_targets_db_backed_service,
 )
 from cyber_webapp.ontology import ONTOLOGY_ID, webapp_ontology
-from cyber_webapp.realize import WebappRuntime, WebappRuntimeError
+from cyber_webapp.realize import (
+    ContainerWebappRuntime,
+    WebappRuntime,
+    WebappRuntimeError,
+)
 
 
 class WebappPack(Pack):
@@ -53,6 +57,8 @@ def realize(
         graph: WorldGraph,
         backing: Backing,
     ) -> RuntimeHandle:
+        if backing is Backing.CONTAINER:
+            return ContainerWebappRuntime(graph, backing)
         return WebappRuntime(graph, backing)
 
     def task_families(self) -> list[TaskFamily]:
@@ -61,6 +67,7 @@ def task_families(self) -> list[TaskFamily]:
 
 __all__ = [
     "ONTOLOGY_ID",
+    "ContainerWebappRuntime",
     "WebappBuild",
     "WebappBuilder",
     "WebappPack",
diff --git a/packs/cyber_webapp/cyber_webapp/codegen/handlers.py b/packs/cyber_webapp/cyber_webapp/codegen/handlers.py
index 5e2d8bf..9903990 100644
--- a/packs/cyber_webapp/cyber_webapp/codegen/handlers.py
+++ b/packs/cyber_webapp/cyber_webapp/codegen/handlers.py
@@ -65,9 +65,9 @@ def build_handlers_and_routes(
 
 
 def _render_vuln_body(vuln_node: Node) -> str:
-    # An LLM-realized handler (M0, DESIGN.md §9) stands in for the template — it has
-    # passed the dynamic admission gate (cyber_webapp.realize_admit) before reaching
-    # codegen, so it is treated like any rendered handler from here on.
+    # An LLM-realized handler stands in for the template — it has passed the dynamic
+    # admission gate (cyber_webapp.realize_admit) before reaching codegen, so it is
+    # treated like any rendered handler from here on.
     realized = vuln_node.attrs.get("realized_handler")
     if isinstance(realized, str) and realized.strip():
         return _extract_handle_body(realized)
diff --git a/packs/cyber_webapp/cyber_webapp/codegen/templates/app.py.j2 b/packs/cyber_webapp/cyber_webapp/codegen/templates/app.py.j2
index 2415e86..2640c57 100644
--- a/packs/cyber_webapp/cyber_webapp/codegen/templates/app.py.j2
+++ b/packs/cyber_webapp/cyber_webapp/codegen/templates/app.py.j2
@@ -12,6 +12,7 @@ import argparse
 import base64
 import io
 import json
+import os
 import posixpath
 import re
 import shlex
@@ -25,6 +26,44 @@ from urllib.parse import parse_qs, quote, urlparse
 from urllib.request import urlopen
 
 
+class _RealFiles:
+    # Real-filesystem backing for the ``files`` surface (the CONTAINER backing). Same
+    # get / contains / subscript access the in-memory dict offers, but every read is a
+    # real ``open()`` against the container fs — so a traversal escape or a ``cat``
+    # reaches the real filesystem, not a dict. A real fs is not a finite, enumerable
+    # map, so iteration / len are intentionally unsupported.
+    def get(self, path, default=None):
+        data = self._read(path)
+        return default if data is None else data
+
+    def __getitem__(self, path):
+        data = self._read(path)
+        if data is None:
+            raise KeyError(path)
+        return data
+
+    def __contains__(self, path):
+        return self._read(path) is not None
+
+    @staticmethod
+    def _read(path):
+        try:
+            with open(path, "rb") as handle:
+                return handle.read().decode("utf-8", "replace")
+        except OSError:
+            return None
+
+
+def _materialize_files(files: dict) -> None:
+    # Write the seed's file map onto the REAL container filesystem at its own paths, so
+    # the handlers' real open() finds them and a traversal escape genuinely climbs it.
+    for path, content in files.items():
+        target = Path(path)
+        if str(target.parent) not in ("", "."):
+            target.parent.mkdir(parents=True, exist_ok=True)
+        target.write_text(str(content), encoding="utf-8")
+
+
 def _load_seed_and_init_state(seed_path: Path) -> dict:
     raw = seed_path.read_text(encoding="utf-8")
     seed_path.unlink()
@@ -47,11 +86,19 @@ def _load_seed_and_init_state(seed_path: Path) -> dict:
         )
     db.commit()
 
+    # OPENRANGE_REALFS (set by the CONTAINER backing) flips the file surface from the
+    # in-memory dict to a real filesystem — the PROCESS backing leaves it unset and
+    # stays byte-for-byte the in-memory emulation.
+    files = payload.get("files", {})
+    if os.environ.get("OPENRANGE_REALFS"):
+        _materialize_files(files)
+        files = _RealFiles()
+
     return {
         "db": db,
         "secrets": payload["secrets"],
         "accounts": payload["accounts"],
-        "files": payload.get("files", {}),
+        "files": files,
         "schema": schema,
         "guarded": payload.get("guarded", {}),
     }
diff --git a/packs/cyber_webapp/cyber_webapp/container.py b/packs/cyber_webapp/cyber_webapp/container.py
new file mode 100644
index 0000000..ed7b25d
--- /dev/null
+++ b/packs/cyber_webapp/cyber_webapp/container.py
@@ -0,0 +1,126 @@
+"""Container build context for a webapp world.
+
+The same rendered app the ``PROCESS`` backing runs as a subprocess, packaged to run in a
+real container. The container sets ``OPENRANGE_REALFS``, so the app's surfaces go real:
+the file-read shape (path_traversal, xxe) does a real ``open()`` and a traversal escape
+is real OS path resolution, and command_injection runs a real ``sh -c`` — genuine RCE /
+file-read across the nine classes on the one generated app.
+
+A world is the *target* the agent attacks, reached only over its HTTP surface — it is
+not the agent's toolbox. So it carries only what its OWN vulns run server-side: the
+diagnostic tool command_injection shells out to (ping / nslookup / …) is installed ONLY
+when the world has that vuln, and only the one its ``base_command`` names. A world with
+no command_injection installs no OS tools. The attacking agent's own recon/exploit
+tooling lives in a separate sandbox the harness brings, not in here.
+
+The seed (with the flag) is COPYed into the image, so the flag lives in an image layer
+until the app unlinks it at startup; run-time mounting would keep it out of the image
+entirely.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+
+from graphschema import WorldGraph
+
+from cyber_webapp.codegen import _realize_graph
+from cyber_webapp.codegen.entrypoint import APP_FILE_NAME, SEED_FILE_NAME
+
+# A fixed in-container port (the host maps it to an ephemeral port at run time, the way
+# the PROCESS backing binds port 0).
+CONTAINER_PORT = 8000
+BASE_IMAGE = "python:3.13-slim"
+
+# command_injection's base_command (sampling._COMMAND_INJECTION_BASE) → the apt package
+# that puts that diagnostic tool in the image, so the real `sh -c` endpoint can run it.
+# Each tool echoes the (flag-as-)hostname back in its resolver error, so a `$(cat flag)`
+# substitution leaks too — confirmed empirically on python:3.13-slim for all five.
+_CMDI_APT_PACKAGES: dict[str, str] = {
+    "ping": "iputils-ping",
+    "nslookup": "dnsutils",
+    "dig": "dnsutils",
+    "host": "dnsutils",
+    "traceroute": "traceroute",
+}
+
+
+def required_apt_packages(graph: WorldGraph) -> set[str]:
+    """The apt packages this world's container actually needs, based ONLY on its
+    command_injection vulns and each one's base_command (union across vulns). A world
+    with no command_injection returns an empty set — its image installs no OS tools."""
+    packages: set[str] = set()
+    for vuln in graph.by_kind("vulnerability"):
+        if vuln.attrs.get("kind") != "command_injection":
+            continue
+        params = vuln.attrs.get("params")
+        if not isinstance(params, Mapping):
+            continue
+        package = _CMDI_APT_PACKAGES.get(str(params.get("base_command")))
+        if package is not None:
+            packages.add(package)
+    return packages
+
+
+def hardening_run_args() -> list[str]:
+    """``docker run`` flags that contain a world running attacker-controlled code:
+    drop every Linux capability, forbid gaining privileges (setuid), and cap memory /
+    CPU / pid count, so an exploit can't escalate, fork-bomb, or exhaust the host. The
+    world stays reachable over its published HTTP port; blocking outbound egress is a
+    separate concern.
+
+    Not read-only-root: the app writes the materialized files + request log and unlinks
+    the seed at startup, so a read-only rootfs would need those writes redirected to a
+    writable mount first."""
+    return [
+        "--cap-drop",
+        "ALL",
+        "--security-opt",
+        "no-new-privileges",
+        "--memory",
+        "512m",
+        "--cpus",
+        "1.0",
+        "--pids-limit",
+        "256",
+    ]
+
+
+def _dockerfile(apt_packages: set[str]) -> str:
+    # OPENRANGE_REALFS flips the app's surfaces to the real container. jinja2 is the one
+    # pip dep (the ssti handler imports it); OS tools are added only when a
+    # command_injection vuln needs them, else the apt layer is skipped entirely.
+    if apt_packages:
+        names = " ".join(sorted(apt_packages))
+        run = (
+            "RUN apt-get update \\\n"
+            f"&& apt-get install -y --no-install-recommends {names} \\\n"
+            "&& rm -rf /var/lib/apt/lists/* \\\n"
+            "&& pip install --no-cache-dir jinja2\n"
+        )
+    else:
+        run = "RUN pip install --no-cache-dir jinja2\n"
+    return (
+        f"FROM {BASE_IMAGE}\n"
+        "WORKDIR /app\n"
+        "ENV OPENRANGE_REALFS=1\n"
+        f"{run}"
+        f"COPY {APP_FILE_NAME} {SEED_FILE_NAME} ./\n"
+        f"EXPOSE {CONTAINER_PORT}\n"
+        f'CMD ["python", "{APP_FILE_NAME}", "--host", "0.0.0.0", '
+        f'"--port", "{CONTAINER_PORT}", "--log", "/app/requests.jsonl"]\n'
+    )
+
+
+def image_files(graph: WorldGraph) -> dict[str, str]:
+    """The build context for the world's container: the Dockerfile + the rendered app
+    + its seed. Same rendered app the PROCESS backing runs, but the container sets
+    OPENRANGE_REALFS so its surfaces are real (real open() / traversal, real `sh -c`),
+    not the in-memory emulation. The Dockerfile installs only the OS tools this world's
+    own vulns run server-side (see :func:`required_apt_packages`)."""
+    rendered = _realize_graph(graph)
+    return {
+        "Dockerfile": _dockerfile(required_apt_packages(graph)),
+        APP_FILE_NAME: rendered[APP_FILE_NAME],
+        SEED_FILE_NAME: rendered[SEED_FILE_NAME],
+    }
diff --git a/packs/cyber_webapp/cyber_webapp/realize.py b/packs/cyber_webapp/cyber_webapp/realize.py
index 91208d1..b8aaf82 100644
--- a/packs/cyber_webapp/cyber_webapp/realize.py
+++ b/packs/cyber_webapp/cyber_webapp/realize.py
@@ -1,9 +1,11 @@
-"""WebappRuntime. Only ``Backing.PROCESS`` is wired."""
+"""WebappRuntime (PROCESS backing) and ContainerWebappRuntime (CONTAINER backing)."""
 
 from __future__ import annotations
 
 import json
+import subprocess
 import sys
+import uuid
 from collections.abc import Mapping
 from pathlib import Path
 from typing import Any, cast
@@ -23,6 +25,11 @@
     REQUEST_LOG_NAME,
     RESULT_FILE_NAME,
 )
+from cyber_webapp.container import hardening_run_args, image_files
+
+# Where the container's app writes its request log (the image CMD's --log path).
+_CONTAINER_LOG_PATH = "/app/requests.jsonl"
+_CONTAINER_PORT = "8000"
 
 
 class WebappRuntimeError(OpenRangeError):
@@ -108,12 +115,8 @@ def http_get_json(path: object) -> object:
         return {"http_get": http_get, "http_get_json": http_get_json}
 
     def poll_events(self) -> tuple[Mapping[str, Any], ...]:
-        log = self._request_log_path()
-        if log is None or not log.exists():
-            return ()
-        try:
-            raw = log.read_bytes()
-        except OSError:
+        raw = self._read_log_bytes()
+        if raw is None:
             return ()
         new_bytes = raw[self._log_offset :]
         if not new_bytes:
@@ -188,15 +191,23 @@ def _request_log_path(self) -> Path | None:
             return None
         return self.pack_root / REQUEST_LOG_NAME
 
-    def _all_requests(self) -> list[Mapping[str, Any]]:
+    def _read_log_bytes(self) -> bytes | None:
+        # The request log as raw bytes, or None if it isn't there yet. The seam the
+        # CONTAINER backing overrides to read the log out of the running container.
         log = self._request_log_path()
         if log is None or not log.exists():
-            return []
-        rows: list[Mapping[str, Any]] = []
+            return None
         try:
-            raw = log.read_text(encoding="utf-8")
+            return log.read_bytes()
         except OSError:
+            return None
+
+    def _all_requests(self) -> list[Mapping[str, Any]]:
+        raw_bytes = self._read_log_bytes()
+        if raw_bytes is None:
             return []
+        rows: list[Mapping[str, Any]] = []
+        raw = raw_bytes.decode("utf-8", errors="replace")
         for line in raw.splitlines():
             line = line.strip()
             if not line:
@@ -217,3 +228,96 @@ def _probe_root_200(self) -> bool:
                 return bool(getattr(resp, "status", 0) == 200)
         except URLError, TimeoutError, OSError:
             return False
+
+
+class ContainerWebappRuntime(WebappRuntime):
+    """WebappRuntime that runs the world as a real Docker container.
+
+    ``docker run`` (foreground) is the supervised child: the container's app prints the
+    same startup line a local subprocess would, so the SubprocessRuntime handshake still
+    works; the published host port is resolved with ``docker port`` (the app only sees
+    its in-container port). The request log is read out of the running container, and
+    the image sets ``OPENRANGE_REALFS`` so the file and shell surfaces are real.
+    """
+
+    def __init__(self, graph: WorldGraph, backing: Backing) -> None:
+        if backing is not Backing.CONTAINER:
+            raise NotImplementedError(
+                f"ContainerWebappRuntime is the CONTAINER backing, got {backing!r}",
+            )
+        # WebappRuntime.__init__ guards PROCESS-only; the container runtime shares its
+        # log/surface/collect logic but its own lifecycle, so init the subprocess base.
+        SubprocessRuntime.__init__(self, graph)
+        self._files: dict[str, str] = {}
+        self._base_url: str | None = None
+        self._log_offset = 0
+        self._build_files = image_files(graph)
+        self._tag = f"openrange-cyber-{uuid.uuid4().hex[:12]}"
+        self._cname: str | None = None
+        self._image_built = False
+
+    def prepare_env_files(self, graph: WorldGraph) -> Mapping[str, str]:
+        del graph
+        # The image carries app.py + seed.json; pack_root is just the build context.
+        return dict(self._build_files)
+
+    def subprocess_command(self, env_root: Path, solver_root: Path) -> list[str]:
+        del solver_root
+        if not self._image_built:
+            subprocess.run(
+                ["docker", "build", "-q", "-t", self._tag, str(env_root / "pack")],
+                check=True,
+                capture_output=True,
+                timeout=600,
+            )
+            self._image_built = True
+        self._cname = f"{self._tag}-{uuid.uuid4().hex[:8]}"
+        return [
+            "docker",
+            "run",
+            "--rm",
+            "--name",
+            self._cname,
+            "-p",
+            f"127.0.0.1:0:{_CONTAINER_PORT}",
+            *hardening_run_args(),
+            self._tag,
+        ]
+
+    def parse_startup(self, stdout_line: str) -> Mapping[str, Any]:
+        # A startup line means the app is up (the readiness signal); it only knows its
+        # in-container port, so resolve the published host port for the agent URL.
+        del stdout_line
+        mapping = subprocess.run(
+            ["docker", "port", str(self._cname), _CONTAINER_PORT],
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        ).stdout.strip()
+        host_port = mapping.splitlines()[0].rsplit(":", 1)[-1]
+        self._base_url = f"http://127.0.0.1:{host_port}"
+        self._log_offset = 0
+        return {"base_url": self._base_url}
+
+    def _read_log_bytes(self) -> bytes | None:
+        if self._cname is None:
+            return None
+        try:
+            done = subprocess.run(
+                ["docker", "exec", self._cname, "cat", _CONTAINER_LOG_PATH],
+                capture_output=True,
+                timeout=10,
+                check=False,
+            )
+        except Exception:  # noqa: BLE001  # pragma: no cover - container gone mid-poll
+            return None
+        # A non-zero rc before the first request just means the log isn't written yet.
+        return done.stdout if done.returncode == 0 else b""
+
+    def stop(self) -> None:
+        super().stop()  # kills the docker-run child; --rm removes the container
+        if self._cname is not None:
+            subprocess.run(["docker", "rm", "-f", self._cname], capture_output=True)
+        if self._image_built:
+            subprocess.run(["docker", "rmi", "-f", self._tag], capture_output=True)
diff --git a/packs/cyber_webapp/cyber_webapp/realize_admit.py b/packs/cyber_webapp/cyber_webapp/realize_admit.py
index f08b69a..413d511 100644
--- a/packs/cyber_webapp/cyber_webapp/realize_admit.py
+++ b/packs/cyber_webapp/cyber_webapp/realize_admit.py
@@ -1,4 +1,4 @@
-"""Dynamic admission for LLM-realized handlers (M0 — DESIGN.md §9), pack-side pieces.
+"""Dynamic admission for LLM-realized handlers, pack-side pieces.
 
 Today's admission is *structural* (`check_feasibility`: a graph path exists). An
 LLM-realized handler can be wrong, so it is admitted *dynamically*: run the intended
diff --git a/packs/cyber_webapp/cyber_webapp/vulnerabilities/templates/command_injection.py.j2 b/packs/cyber_webapp/cyber_webapp/vulnerabilities/templates/command_injection.py.j2
index 70df696..8f6375a 100644
--- a/packs/cyber_webapp/cyber_webapp/vulnerabilities/templates/command_injection.py.j2
+++ b/packs/cyber_webapp/cyber_webapp/vulnerabilities/templates/command_injection.py.j2
@@ -31,6 +31,24 @@ def handle(query, state):
     else:
         command = {{ base_command | tojson }} + " " + target
 
+    if os.environ.get("OPENRANGE_REALFS"):
+        # CONTAINER backing: the SAME naive, context-specific filter, but a REAL ``sh -c``
+        # then runs the result against the real fs — genuine RCE / file-read, not the
+        # in-memory emulation below. ``substitution`` keeps $()/`` `` `` `` and kills the
+        # separators; the others strip substitution and leave the separators live.
+        import subprocess
+        if inj_context == "substitution":
+            real_cmd = command.translate(str.maketrans(";|&\n", "    "))
+        else:
+            real_cmd = re.sub(r"\$\([^()]*\)|`[^`]*`", "", command)
+        try:
+            done = subprocess.run(
+                ["sh", "-c", real_cmd], capture_output=True, timeout=5, check=False
+            )
+        except subprocess.SubprocessError:
+            return 200, {"Content-Type": "text/plain"}, b"diagnostic error"
+        return 200, {"Content-Type": "text/plain"}, done.stdout + done.stderr
+
     # BUG: input reaches a shell with only a NAIVE, context-specific filter.
     # ``separator`` strips substitution but not separators; ``substitution`` and
     # ``quoted`` strip substitution; ``substitution`` additionally strips the
diff --git a/src/openrange/llm.py b/src/openrange/llm.py
index 9774052..a60790e 100644
--- a/src/openrange/llm.py
+++ b/src/openrange/llm.py
@@ -1,13 +1,14 @@
-"""Codex CLI LLM backend implementation.
+"""CLI LLM backends.
 
-The ``LLMBackend`` Protocol and ``LLMRequest`` / ``LLMResult`` value types
-live in ``openrange_pack_sdk``. This module ships the concrete CodexBackend
-plus the impl-specific exceptions it raises.
+The ``LLMBackend`` Protocol and ``LLMRequest`` / ``LLMResult`` value types live in
+``openrange_pack_sdk``. This module ships the concrete CLI backends — ``CodexBackend``
+and ``ClaudeBackend`` — plus the impl-specific exceptions they raise.
 """
 
 from __future__ import annotations
 
 import json
+import re
 import subprocess
 import tempfile
 from collections.abc import Mapping, Sequence
@@ -133,3 +134,93 @@ def parse_json_object(raw: str) -> Mapping[str, object]:
     if not isinstance(data, dict):
         raise LLMBackendError("backend returned JSON that is not an object")
     return cast(Mapping[str, object], data)
+
+
+@dataclass(frozen=True, slots=True)
+class ClaudeBackend:
+    """An ``LLMBackend`` that drives the ``claude`` CLI in print mode (``-p``).
+
+    Claude has no output-schema flag, so a structured request asks for a JSON object in
+    the prompt and parses it out of the model's reply. Useful where codex is
+    unavailable, or declines a task it flags as risky.
+    """
+
+    command: str | Path = "claude"
+    model: str | None = None
+    cwd: Path | None = None
+    timeout: float = 180.0
+
+    def preflight(self) -> None:
+        """Verify the claude binary is reachable on PATH."""
+        import shutil
+
+        if shutil.which(str(self.command)) is None:
+            raise LLMBackendError(
+                f"claude CLI not found on PATH ({str(self.command)!r}). "
+                "Install claude or override the 'command' field.",
+            )
+
+    def complete(self, request: LLMRequest) -> LLMResult:
+        prompt = request.as_prompt()
+        if request.json_schema is not None:
+            prompt += (
+                "\n\nReturn ONLY a JSON object matching this schema — no prose, no "
+                "code fences:\n" + json.dumps(request.json_schema)
+            )
+        command = [str(self.command), "-p", prompt, "--output-format", "json"]
+        if self.model is not None:
+            command += ["--model", self.model]
+        completed = _run_cli(
+            command, cwd=self.cwd, timeout=self.timeout, label="claude"
+        )
+        if completed.returncode != 0:
+            detail = completed.stderr.strip() or completed.stdout.strip()
+            raise LLMBackendError(
+                f"claude exit status {completed.returncode}: {detail or 'no output'}",
+                returncode=completed.returncode,
+            )
+        text = _claude_result_text(completed.stdout)
+        if request.json_schema is None:
+            return LLMResult(text)
+        return LLMResult(text, parse_json_object(_first_json_object(text)))
+
+
+def _run_cli(
+    command: Sequence[str], *, cwd: Path | None, timeout: float, label: str
+) -> subprocess.CompletedProcess[str]:
+    try:
+        return subprocess.run(
+            command,
+            cwd=cwd,
+            text=True,
+            capture_output=True,
+            timeout=timeout,
+            check=False,
+        )
+    except subprocess.TimeoutExpired as exc:
+        raise LLMBackendError(f"{label} timed out after {timeout} seconds") from exc
+    except OSError as exc:
+        raise LLMBackendError(str(exc)) from exc
+
+
+def _claude_result_text(stdout: str) -> str:
+    # `claude -p --output-format json` prints a result envelope whose `result` field is
+    # the model's reply; fall back to raw stdout if it isn't that envelope.
+    try:
+        envelope = json.loads(stdout)
+    except json.JSONDecodeError:
+        return stdout.strip()
+    if isinstance(envelope, dict) and isinstance(envelope.get("result"), str):
+        return cast(str, envelope["result"])
+    return stdout.strip()
+
+
+def _first_json_object(text: str) -> str:
+    # The reply may wrap JSON in ``` fences or add prose; pull out the object.
+    fenced = re.search(r"```(?:json)?\s*(\{.*\})\s*```", text, re.DOTALL)
+    if fenced:
+        return fenced.group(1)
+    start, end = text.find("{"), text.rfind("}")
+    if start != -1 and end > start:
+        return text[start : end + 1]
+    return text
diff --git a/tests/test_cyber_codegen.py b/tests/test_cyber_codegen.py
index c83ee92..071b3fd 100644
--- a/tests/test_cyber_codegen.py
+++ b/tests/test_cyber_codegen.py
@@ -117,11 +117,13 @@ def test_pack_realize_satisfies_runtime_handle_protocol() -> None:
     assert isinstance(handle, RuntimeHandle)
 
 
-def test_pack_realize_rejects_non_process_backings() -> None:
-    """Only Backing.PROCESS is wired today; the others must raise."""
+def test_pack_realize_routes_backings() -> None:
+    """PROCESS and CONTAINER are wired (constructing CONTAINER needs no docker — the
+    build happens at reset); the still-unwired backings must raise."""
     graph = _sample_graph()
     pack = WebappPack()
-    for backing in (Backing.CONTAINER, Backing.SIMULATOR, Backing.HYBRID):
+    assert isinstance(pack.realize(graph, Backing.CONTAINER), RuntimeHandle)
+    for backing in (Backing.SIMULATOR, Backing.HYBRID):
         with pytest.raises(NotImplementedError):
             pack.realize(graph, backing)
 
diff --git a/tests/test_cyber_container.py b/tests/test_cyber_container.py
new file mode 100644
index 0000000..0e4ab5a
--- /dev/null
+++ b/tests/test_cyber_container.py
@@ -0,0 +1,524 @@
+"""The container backing for a webapp world.
+
+`image_files` packages a world's rendered app into a container build context. The
+docker-gated tests then prove the real thing: build the image, run the container, and
+recover the flag by exploiting the world over HTTP. The container sets OPENRANGE_REALFS,
+so the app's surfaces go real on the one generated app: the file-read shape
+(path_traversal, xxe) does a real `open()` with real OS path resolution, and
+command_injection runs a real `sh -c` — both with their mutually-exclusive
+injection / confinement contexts intact.
+"""
+
+from __future__ import annotations
+
+import contextlib
+import json
+import posixpath
+import shutil
+import subprocess
+import time
+import urllib.error
+import urllib.request
+from collections.abc import Iterator, Sequence
+from pathlib import Path
+from urllib.parse import quote
+
+import pytest
+from cyber_webapp import ContainerWebappRuntime, WebappPack
+from cyber_webapp.codegen import _realize_graph
+from cyber_webapp.container import (
+    BASE_IMAGE,
+    hardening_run_args,
+    image_files,
+    required_apt_packages,
+)
+from cyber_webapp.realize_admit import cmdi_exploit_and_benign
+from graphschema import Node, WorldGraph
+from openrange_pack_sdk import Backing, EpisodeResult, Snapshot
+
+from openrange.core.admit import admit
+from openrange.core.episode import EpisodeService
+
+
+def _admit_cmdi() -> Snapshot:
+    snap = admit(
+        WebappPack(),
+        manifest={
+            "pack": {"id": "webapp"},
+            "runtime": {"tick": {"mode": "off"}},
+            "npc": [],
+            "seed": 7,
+            "loot_shapes": {"file": 1, "db": 0},
+            "vuln_kinds": {"command_injection": 1},
+        },
+        max_repairs=3,
+    )
+    assert isinstance(snap, Snapshot), snap
+    return snap
+
+
+def test_image_files_packages_the_world() -> None:
+    files = image_files(_admit_cmdi().graph)
+    assert set(files) == {"Dockerfile", "app.py", "seed.json"}
+    assert BASE_IMAGE in files["Dockerfile"]
+    assert "def handle" in files["app.py"]
+    assert '"--port", "8000"' in files["Dockerfile"]
+
+
+_BASE_COMMAND_PACKAGE = {
+    "ping": "iputils-ping",
+    "nslookup": "dnsutils",
+    "dig": "dnsutils",
+    "host": "dnsutils",
+    "traceroute": "traceroute",
+}
+
+
+def test_required_apt_packages_scopes_to_the_worlds_cmdi_tool() -> None:
+    cmdi = _admit_cmdi().graph
+    vuln = next(
+        n
+        for n in cmdi.by_kind("vulnerability")
+        if n.attrs.get("kind") == "command_injection"
+    )
+    params = vuln.attrs["params"]
+    assert isinstance(params, dict)
+    expected = _BASE_COMMAND_PACKAGE[str(params["base_command"])]
+    assert required_apt_packages(cmdi) == {expected}
+    # a file-read world runs no server-side OS command → its image installs none
+    assert required_apt_packages(_admit_path_traversal().graph) == set()
+
+
+def test_hardening_run_args_drops_privileges_and_caps_resources() -> None:
+    args = hardening_run_args()
+    assert args[args.index("--cap-drop") + 1] == "ALL"
+    assert "no-new-privileges" in args
+    assert "--memory" in args and "--cpus" in args and "--pids-limit" in args
+
+
+def test_required_apt_packages_skips_malformed_and_unmapped() -> None:
+    # A cmdi vuln whose params aren't a mapping, or whose base_command isn't a known
+    # diagnostic tool, contributes nothing — no crash, no bogus package.
+    graph = _admit_cmdi().graph
+    vuln = next(
+        n
+        for n in graph.by_kind("vulnerability")
+        if n.attrs.get("kind") == "command_injection"
+    )
+    vuln.attrs["params"] = "not-a-mapping"
+    assert required_apt_packages(graph) == set()
+    vuln.attrs["params"] = {"base_command": "whoami", "target_param": "q"}
+    assert required_apt_packages(graph) == set()
+
+
+def test_dockerfile_installs_os_tools_only_when_a_vuln_needs_them() -> None:
+    cmdi_df = image_files(_admit_cmdi().graph)["Dockerfile"]
+    pt_df = image_files(_admit_path_traversal().graph)["Dockerfile"]
+    assert "apt-get install" in cmdi_df  # cmdi world needs its diagnostic tool
+    assert "apt-get" not in pt_df  # a file-read world stays lean — no OS tools
+    assert "pip install --no-cache-dir jinja2" in pt_df  # the app's one structural dep
+
+
+def test_every_sampled_base_command_has_an_apt_package() -> None:
+    # Lockstep guard: every base_command the sampler picks must map to a package, or a
+    # cmdi world ships without the tool its real `sh -c` needs.
+    from cyber_webapp.container import _CMDI_APT_PACKAGES
+    from cyber_webapp.sampling import _COMMAND_INJECTION_BASE
+
+    assert set(_COMMAND_INJECTION_BASE) <= set(_CMDI_APT_PACKAGES)
+
+
+def _docker_available() -> bool:
+    if shutil.which("docker") is None:
+        return False
+    try:
+        probe = subprocess.run(
+            ["docker", "info"], capture_output=True, timeout=10, check=False
+        )
+    except Exception:  # noqa: BLE001 - a best-effort probe; any failure means "no"
+        return False
+    return probe.returncode == 0
+
+
+def _http_get(url: str) -> str:
+    # The response body regardless of status — a neutralized traversal answers 403/404,
+    # which urlopen raises on; we still want to assert the flag is NOT in that body.
+    try:
+        body = urllib.request.urlopen(url, timeout=10).read()
+    except urllib.error.HTTPError as exc:
+        body = exc.read()
+    return bytes(body).decode()
+
+
+def _wait_ready(base: str, timeout: float) -> None:
+    deadline = time.monotonic() + timeout
+    while time.monotonic() < deadline:
+        try:
+            urllib.request.urlopen(base + "/", timeout=2)
+            return
+        except OSError:  # URLError is an OSError subclass
+            time.sleep(0.3)
+    raise AssertionError(f"container did not become ready at {base}")
+
+
+@contextlib.contextmanager
+def _container(
+    build_files: dict[str, str],
+    tmp_path: Path,
+    tag: str,
+    *,
+    env: Sequence[tuple[str, str]] = (),
+) -> Iterator[str]:
+    # Build the given image build-context, run it (with any -e env), and yield the base
+    # URL once it answers. Cleans up image + container regardless of outcome.
+    context = tmp_path / "ctx"
+    context.mkdir()
+    for name, content in build_files.items():
+        (context / name).write_text(content, encoding="utf-8")
+    run_cmd = ["docker", "run", "-d", "-p", "0:8000", *hardening_run_args()]
+    for key, value in env:
+        run_cmd += ["-e", f"{key}={value}"]
+    run_cmd.append(tag)
+    container_id = ""
+    try:
+        subprocess.run(
+            ["docker", "build", "-q", "-t", tag, str(context)],
+            check=True,
+            capture_output=True,
+            timeout=600,
+        )
+        started = subprocess.run(
+            run_cmd, check=True, capture_output=True, text=True, timeout=60
+        )
+        container_id = started.stdout.strip()
+        mapping = subprocess.run(
+            ["docker", "port", container_id, "8000"],
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        ).stdout.strip()
+        base = f"http://127.0.0.1:{mapping.rsplit(':', 1)[-1]}"
+        _wait_ready(base, timeout=30)
+        yield base
+    finally:
+        if container_id:
+            subprocess.run(["docker", "rm", "-f", container_id], capture_output=True)
+        subprocess.run(["docker", "rmi", "-f", tag], capture_output=True)
+
+
+def _cmdi_params(graph: WorldGraph) -> dict[str, object]:
+    vuln = next(
+        n
+        for n in graph.by_kind("vulnerability")
+        if n.attrs.get("kind") == "command_injection"
+    )
+    params = vuln.attrs["params"]
+    assert isinstance(params, dict)
+    return params
+
+
+def _pin_context(graph: WorldGraph, context: str) -> None:
+    params = _cmdi_params(graph)
+    params["inj_context"] = context
+    params["quote"] = "'"
+
+
+def _exploit_for(graph: WorldGraph, context: str) -> str:
+    # The context-matching exploit path; mutates params transiently to shape its payload
+    _pin_context(graph, context)
+    exploit_path, _benign = cmdi_exploit_and_benign(graph)
+    return exploit_path
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+def test_world_runs_in_a_container_and_is_exploited(tmp_path: Path) -> None:
+    snap = _admit_cmdi()
+    graph = snap.graph
+    tag = f"openrange-m1-{snap.snapshot_id[:12]}"
+    with _container(image_files(graph), tmp_path, tag) as base:
+        exploit_path, _benign = cmdi_exploit_and_benign(graph)
+        expected = str(graph.nodes["secret_flag"].attrs["value_ref"])
+        body = urllib.request.urlopen(base + exploit_path, timeout=10).read().decode()
+    assert expected in body, body[:200]
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+def test_world_container_is_hardened(tmp_path: Path) -> None:
+    # The world runs attacker-controlled code, so it is contained: all capabilities
+    # dropped, no privilege escalation, and memory / pid caps set — verified both on the
+    # run config and behaviourally inside the container. It stays exploitable over HTTP
+    # under these flags (every other docker test here runs with the same _container).
+    snap = _admit_cmdi()
+    graph = snap.graph
+    tag = f"openrange-m1-harden-{snap.snapshot_id[:8]}"
+    with _container(image_files(graph), tmp_path, tag) as base:
+        cid = subprocess.run(
+            ["docker", "ps", "-q", "--filter", f"ancestor={tag}"],
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        ).stdout.split()[0]
+        host = json.loads(
+            subprocess.run(
+                ["docker", "inspect", cid],
+                check=True,
+                capture_output=True,
+                text=True,
+                timeout=10,
+            ).stdout
+        )[0]["HostConfig"]
+        assert host["CapDrop"] == ["ALL"], host["CapDrop"]
+        assert any("no-new-privileges" in opt for opt in host.get("SecurityOpt") or [])
+        assert host["Memory"] > 0 and host["PidsLimit"] and host["PidsLimit"] > 0
+
+        # Behavioural: effective capabilities are actually all-zero in the container.
+        status = subprocess.run(
+            ["docker", "exec", cid, "cat", "/proc/self/status"],
+            check=True,
+            capture_output=True,
+            text=True,
+            timeout=10,
+        ).stdout
+        cap_eff = next(ln for ln in status.splitlines() if ln.startswith("CapEff:"))
+        assert cap_eff.split()[1].strip("0") == "", cap_eff
+
+        # Still exploitable under the hardening — containment doesn't break the vuln.
+        exploit_path, _benign = cmdi_exploit_and_benign(graph)
+        body = _http_get(base + exploit_path)
+    assert str(graph.nodes["secret_flag"].attrs["value_ref"]) in body, body[:200]
+
+
+def test_generated_app_has_a_real_shell_cmdi_branch() -> None:
+    import ast
+
+    source = _realize_graph(_admit_cmdi().graph)["app.py"]
+    ast.parse(source)  # the generated app is valid Python
+    assert "subprocess.run" in source  # real shell, gated by OPENRANGE_REALFS
+    assert 'os.environ.get("OPENRANGE_REALFS")' in source  # the CONTAINER toggle
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+def test_real_shell_container_recovers_a_real_file_flag(tmp_path: Path) -> None:
+    snap = _admit_cmdi()
+    graph = snap.graph
+    _pin_context(graph, "separator")  # a clean `; cat <path>` exploit
+    flag = str(graph.nodes["secret_flag"].attrs["value_ref"])
+    exploit_path, benign_path = cmdi_exploit_and_benign(graph)
+
+    tag = f"openrange-m1-realfs-{snap.snapshot_id[:12]}"
+    with _container(image_files(graph), tmp_path, tag) as base:
+        # A real `cat` against the real filesystem recovers the real file's flag.
+        exploit_body = (
+            urllib.request.urlopen(base + exploit_path, timeout=10).read().decode()
+        )
+        benign_body = (
+            urllib.request.urlopen(base + benign_path, timeout=10).read().decode()
+        )
+    assert flag in exploit_body, exploit_body[:200]
+    assert flag not in benign_body
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+@pytest.mark.parametrize(
+    ("live", "wrong"),
+    [
+        ("separator", "substitution"),
+        ("substitution", "separator"),
+        ("quoted", "separator"),
+    ],
+)
+def test_real_shell_contexts_are_mutually_exclusive(
+    live: str, wrong: str, tmp_path: Path
+) -> None:
+    # The injection contexts hold over a REAL shell, not just the in-memory emulation: a
+    # world built for one context is exploited by THAT context's payload and NOT by
+    # another's (the wrong vectors are filtered before sh).
+    snap = _admit_cmdi()
+    graph = snap.graph
+    flag = str(graph.nodes["secret_flag"].attrs["value_ref"])
+
+    matching = _exploit_for(graph, live)
+    mismatched = _exploit_for(graph, wrong)
+    _pin_context(graph, live)  # the image must be built from the live context
+
+    tag = f"openrange-m1-ctx-{live}-{snap.snapshot_id[:8]}"
+    with _container(image_files(graph), tmp_path, tag) as base:
+        hit = urllib.request.urlopen(base + matching, timeout=10).read().decode()
+        miss = urllib.request.urlopen(base + mismatched, timeout=10).read().decode()
+    assert flag in hit, hit[:200]  # the matching context's exploit lands
+    assert flag not in miss  # a wrong-context exploit is filtered out
+
+
+# --- CONTAINER backing wired as a runtime: it grades identically to PROCESS -----------
+
+
+def _run_pentest_episode(
+    snapshot: Snapshot,
+    task_id: str,
+    backing: Backing,
+    root: Path,
+    exploit_path: str,
+    flag: str,
+) -> EpisodeResult:
+    # Drive one pentest episode end to end on the given backing: start it, run the
+    # exploit over its live HTTP surface, submit the recovered flag, return the result.
+    service = EpisodeService(WebappPack(), root, backing=backing)
+    try:
+        handle = service.start_episode(snapshot, task_id)
+        surface = service.surface(handle)
+        base_url = str(surface["base_url"])
+        solver_root = Path(str(surface["solver_root"]))
+        body = (
+            urllib.request.urlopen(base_url + exploit_path, timeout=20).read().decode()
+        )
+        assert flag in body, f"{backing}: {body[:200]}"
+        (solver_root / "result.json").write_text(
+            json.dumps({"flag": flag}), encoding="utf-8"
+        )
+        report = service.stop_episode(handle)
+    finally:
+        service.close()
+    return report.episode_result
+
+
+def test_container_runtime_rejects_non_container_backing() -> None:
+    with pytest.raises(NotImplementedError):
+        ContainerWebappRuntime(_admit_cmdi().graph, Backing.PROCESS)
+
+
+def test_container_runtime_is_inert_before_reset() -> None:
+    # No container yet (no docker touched): the log read is None and stop() is a clean
+    # no-op — nothing built or running to tear down.
+    runtime = ContainerWebappRuntime(_admit_cmdi().graph, Backing.CONTAINER)
+    assert runtime._read_log_bytes() is None
+    runtime.stop()  # must not raise with nothing built/running
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+def test_container_runtime_reuses_the_image_across_resets() -> None:
+    # The image builds once and is reused on later resets; each reset brings up a fresh
+    # container on a fresh published port.
+    runtime = ContainerWebappRuntime(_admit_cmdi().graph, Backing.CONTAINER)
+    try:
+        runtime.reset()
+        first = str(runtime.surface()["base_url"])
+        runtime.reset()  # image already built → rebuild is skipped
+        second = str(runtime.surface()["base_url"])
+        assert first.startswith("http://127.0.0.1:")
+        assert second.startswith("http://127.0.0.1:")
+    finally:
+        runtime.stop()
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+def test_container_and_process_backings_grade_identically(tmp_path: Path) -> None:
+    # The load-bearing parity check: the SAME snapshot + SAME exploit grades identically
+    # on PROCESS (in-memory emulation) and CONTAINER (a real shell in a container).
+    # Only fidelity changes between the backings, not the task surface.
+    snap = _admit_cmdi()
+    graph = snap.graph
+    _pin_context(graph, "separator")
+    flag = str(graph.nodes["secret_flag"].attrs["value_ref"])
+    task = next(t for t in snap.tasks if t.meta.get("family") == "webapp.pentest")
+    exploit_path, _benign = cmdi_exploit_and_benign(graph)
+
+    process = _run_pentest_episode(
+        snap, task.id, Backing.PROCESS, tmp_path / "proc", exploit_path, flag
+    )
+    container = _run_pentest_episode(
+        snap, task.id, Backing.CONTAINER, tmp_path / "cont", exploit_path, flag
+    )
+
+    assert process.success is True  # the exploit really solves the world
+    assert container.success == process.success
+    assert container.subgoals == process.subgoals  # identical grade across backings
+
+
+# --- file_read shape over a real filesystem (generalize past command_injection) ------
+
+
+def _admit_path_traversal() -> Snapshot:
+    snap = admit(
+        WebappPack(),
+        manifest={
+            "pack": {"id": "webapp"},
+            "runtime": {"tick": {"mode": "off"}},
+            "npc": [],
+            "seed": 7,
+            "loot_shapes": {"file": 1, "db": 0},
+            "vuln_kinds": {"path_traversal": 1},
+        },
+        max_repairs=3,
+    )
+    assert isinstance(snap, Snapshot), snap
+    return snap
+
+
+def _pt_vuln(graph: WorldGraph) -> Node:
+    return next(
+        n
+        for n in graph.by_kind("vulnerability")
+        if n.attrs.get("kind") == "path_traversal"
+    )
+
+
+def _flag_file_path(graph: WorldGraph) -> str:
+    # The file whose content is the flag, in the projected seed's file map.
+    from cyber_webapp.codegen.seeding import project_seed
+
+    flag = str(graph.nodes["secret_flag"].attrs["value_ref"])
+    files = project_seed(graph)["files"]
+    assert isinstance(files, dict)
+    for path, content in files.items():
+        if content == flag:
+            return str(path)
+    raise AssertionError("no seed file holds the flag")
+
+
+def _pt_url(graph: WorldGraph, payload: str) -> str:
+    vuln = _pt_vuln(graph)
+    params = vuln.attrs["params"]
+    assert isinstance(params, dict)
+    endpoint_id = next(e.dst for e in graph.out_edges(vuln.id, "affects"))
+    public_url = str(graph.nodes[endpoint_id].attrs["public_url"])
+    param = str(params["target_param"])
+    return f"{public_url}?{param}={quote(payload, safe='')}"
+
+
+@pytest.mark.skipif(not _docker_available(), reason="docker engine not reachable")
+@pytest.mark.parametrize("confinement", ["absolute_only", "relative", "dotdot_filter"])
+def test_path_traversal_reads_a_real_file_in_a_container(
+    confinement: str, tmp_path: Path
+) -> None:
+    # The file_read shape is REAL on the generated app in a container: a path-traversal
+    # escape is a real open() against the real container fs, and the three confinement
+    # contexts stay mutually exclusive over it — each accepts ONE technique and
+    # neutralizes the others, so a wrong-technique payload recovers nothing.
+    snap = _admit_path_traversal()
+    graph = snap.graph
+    flag = str(graph.nodes["secret_flag"].attrs["value_ref"])
+    vuln = _pt_vuln(graph)
+    params = vuln.attrs["params"]
+    assert isinstance(params, dict)
+    params["confinement"] = confinement
+
+    base_dir = str(params["base_dir"])
+    flag_path = _flag_file_path(graph)
+    relchain = posixpath.relpath(flag_path, base_dir)
+    assert ".." in relchain  # the flag is reachable only by escaping base_dir
+    payloads = {
+        # confinement: (the technique that escapes, a technique it neutralizes)
+        "absolute_only": (flag_path, relchain),
+        "relative": (relchain, flag_path),
+        "dotdot_filter": (relchain.replace("../", "....//"), relchain),
+    }
+    matching, wrong = payloads[confinement]
+
+    tag = f"openrange-m1-pt-{confinement}-{snap.snapshot_id[:8]}"
+    with _container(image_files(graph), tmp_path, tag) as base:
+        hit = _http_get(base + _pt_url(graph, matching))
+        miss = _http_get(base + _pt_url(graph, wrong))
+    assert flag in hit, hit[:200]  # real open() recovers the real file via this escape
+    assert flag not in miss  # a wrong-technique payload this confinement neutralizes
diff --git a/tests/test_cyber_realize_admit.py b/tests/test_cyber_realize_admit.py
index f7f80e4..f110996 100644
--- a/tests/test_cyber_realize_admit.py
+++ b/tests/test_cyber_realize_admit.py
@@ -1,4 +1,4 @@
-"""M0 — the dynamic admission gate for LLM-realized handlers (DESIGN.md §9).
+"""The dynamic admission gate for LLM-realized handlers.
 
 The gate renders + runs a world, exploits it, and lets the consequence verifier
 decide: the exploit must leak the flag, a benign request must not. These drive the
diff --git a/tests/test_llm_and_dashboard.py b/tests/test_llm_and_dashboard.py
index 8cf4caa..4d71ce6 100644
--- a/tests/test_llm_and_dashboard.py
+++ b/tests/test_llm_and_dashboard.py
@@ -42,7 +42,12 @@
 from openrange.dashboard import (
     read_dashboard_events as read_dashboard_artifact_events,
 )
-from openrange.llm import CodexBackend, parse_json_object, run_codex
+from openrange.llm import (
+    ClaudeBackend,
+    CodexBackend,
+    parse_json_object,
+    run_codex,
+)
 from openrange.runtime import OpenRangeRun, RunConfig
 
 MANIFEST = {
@@ -294,6 +299,126 @@ def test_codex_backend_requires_schema_output_file(tmp_path: Path) -> None:
         )
 
 
+_CLAUDE_OK = """
+import json
+
+print(json.dumps({"type": "result", "result": json.dumps({"handler": "ok"})}))
+"""
+
+_CLAUDE_TEXT = """
+import json
+
+print(json.dumps({"type": "result", "result": "plain reply text"}))
+"""
+
+_CLAUDE_FENCED = """
+import json
+
+reply = "prose\\n```json\\n" + json.dumps({"handler": "fenced"}) + "\\n```"
+print(json.dumps({"type": "result", "result": reply}))
+"""
+
+_CLAUDE_NOJSON = """
+import json
+
+print(json.dumps({"type": "result", "result": "I refuse, no json here"}))
+"""
+
+_CLAUDE_ARGV = """
+import json
+import sys
+from pathlib import Path
+
+(Path(__file__).parent / "argv.json").write_text(json.dumps(sys.argv))
+print(json.dumps({"type": "result", "result": "ok"}))
+"""
+
+
+def test_claude_backend_parses_structured_reply(tmp_path: Path) -> None:
+    command = executable(tmp_path, "claude_ok.py", _CLAUDE_OK)
+    result = ClaudeBackend(command=command, model="sonnet").complete(
+        LLMRequest("write json", json_schema={"type": "object"}),
+    )
+    assert result.parsed_json == {"handler": "ok"}
+
+
+def test_claude_backend_without_schema_returns_text(tmp_path: Path) -> None:
+    command = executable(tmp_path, "claude_text.py", _CLAUDE_TEXT)
+    assert ClaudeBackend(command=command).complete(LLMRequest("hi")) == LLMResult(
+        "plain reply text"
+    )
+
+
+def test_claude_backend_extracts_fenced_json(tmp_path: Path) -> None:
+    command = executable(tmp_path, "claude_fenced.py", _CLAUDE_FENCED)
+    result = ClaudeBackend(command=command).complete(
+        LLMRequest("hi", json_schema={"type": "object"}),
+    )
+    assert result.parsed_json == {"handler": "fenced"}
+
+
+def test_claude_backend_rejects_a_reply_with_no_json(tmp_path: Path) -> None:
+    command = executable(tmp_path, "claude_nojson.py", _CLAUDE_NOJSON)
+    with pytest.raises(LLMBackendError, match="invalid JSON"):
+        ClaudeBackend(command=command).complete(
+            LLMRequest("hi", json_schema={"type": "object"}),
+        )
+
+
+def test_claude_backend_passes_prompt_and_model_flags(tmp_path: Path) -> None:
+    command = executable(tmp_path, "claude_argv.py", _CLAUDE_ARGV)
+    ClaudeBackend(command=command, model="opus").complete(LLMRequest("hi"))
+    argv = json.loads((tmp_path / "argv.json").read_text(encoding="utf-8"))
+    assert "-p" in argv
+    assert argv[argv.index("--output-format") + 1] == "json"
+    assert argv[argv.index("--model") + 1] == "opus"
+
+
+def test_claude_backend_reports_process_failure(tmp_path: Path) -> None:
+    command = executable(
+        tmp_path,
+        "claude_fail.py",
+        """
+        import sys
+
+        print("claude boom", file=sys.stderr)
+        raise SystemExit(2)
+        """,
+    )
+    with pytest.raises(LLMBackendError, match="claude boom"):
+        ClaudeBackend(command=command).complete(LLMRequest("hi"))
+
+
+def test_claude_backend_preflight_checks_the_binary(tmp_path: Path) -> None:
+    ClaudeBackend(command=executable(tmp_path, "claude_ok.py", _CLAUDE_OK)).preflight()
+    with pytest.raises(LLMBackendError, match="not found on PATH"):
+        ClaudeBackend(command=tmp_path / "missing").preflight()
+
+
+def test_claude_backend_reports_os_errors_and_timeouts(tmp_path: Path) -> None:
+    with pytest.raises(LLMBackendError):
+        ClaudeBackend(command=tmp_path / "nonexistent").complete(LLMRequest("hi"))
+    sleeper = executable(
+        tmp_path,
+        "claude_sleep.py",
+        """
+        import time
+
+        time.sleep(5)
+        """,
+    )
+    with pytest.raises(LLMBackendError, match="timed out"):
+        ClaudeBackend(command=sleeper, timeout=0.3).complete(LLMRequest("hi"))
+
+
+def test_claude_result_helpers_handle_non_envelope_replies() -> None:
+    from openrange.llm import _claude_result_text, _first_json_object
+
+    assert _claude_result_text("not json at all") == "not json at all"
+    assert _claude_result_text('{"type": "x"}') == '{"type": "x"}'
+    assert _first_json_object("no braces here") == "no braces here"
+
+
 def test_run_codex_reports_os_errors_and_timeouts(tmp_path: Path) -> None:
     sleeper = executable(
         tmp_path,
diff --git a/tests/test_runtime.py b/tests/test_runtime.py
index 3360733..a7a8261 100644
--- a/tests/test_runtime.py
+++ b/tests/test_runtime.py
@@ -233,8 +233,8 @@ def _backing_manifest(backing: str | None) -> dict[str, object]:
 
 class TestBackingSelection:
     """`RunConfig.backing` and `manifest.runtime.backing` reach
-    `pack.realize`. Only `PROCESS` is wired today, so selecting any other
-    backing is expected to surface the realizer's `NotImplementedError` —
+    `pack.realize`. `PROCESS` and `CONTAINER` are wired; selecting a
+    still-unwired backing surfaces the realizer's `NotImplementedError`,
     which is exactly what proves the selector is connected end to end."""
 
     def test_runconfig_backing_process_runs(
@@ -251,17 +251,19 @@ def solve(ctx: EpisodeContext) -> AgentTurn:
         ep = run.run_episode(snapshot, solve, task_id=_build_task_id(snapshot))
         assert ep.success is True, ep.report.episode_result.reason
 
-    def test_runconfig_backing_container_reaches_realizer(
+    def test_runconfig_unwired_backing_reaches_realizer(
         self, snapshot: Snapshot, tmp_path: Path
     ) -> None:
+        # An unwired backing (SIMULATOR) surfaces the realizer's NotImplementedError,
+        # proving the selector reaches pack.realize. (PROCESS and CONTAINER are wired.)
         run = OpenRangeRun(
-            RunConfig(tmp_path, dashboard=False, backing=Backing.CONTAINER)
+            RunConfig(tmp_path, dashboard=False, backing=Backing.SIMULATOR)
         )
 
         def solve(ctx: EpisodeContext) -> None:
             return None  # never runs: realize() raises first
 
-        with pytest.raises(NotImplementedError, match="Backing.CONTAINER"):
+        with pytest.raises(NotImplementedError, match="SIMULATOR"):
             run.run_episode(snapshot, solve, task_id=_build_task_id(snapshot))
 
     def test_manifest_backing_selects_process(self, tmp_path: Path) -> None: