From 2c7a8d3af2537fc88ade23a57037dd1c96bc5bd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Tue, 16 Jun 2026 16:11:50 +0100 Subject: [PATCH 1/4] fix(process): kill tmux pane process groups, not just the pane shell MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `tmux kill-session` only delivers SIGHUP to each pane's foreground process. Wrapper chains like `sh → pnpm → node → vite` reparent and survive the signal — they end up adopted by launchd/init while still holding their ports. A few `ecluse up`/`down` cycles accumulate enough orphans that the next `ecluse up` lands a service on a port already held by a previous session's zombie, silently serving the wrong worktree's content to the user. The nohup path was fixed in PR #18 with TERM→KILL grace on the whole process group. This commit applies the same pattern to tmux: - `kill_tmux` now enumerates every pane PID across all windows of the session (`tmux list-panes -s -t -F '#{pane_pid}'`) and signals each as a process group via the existing `kill_process_group` helper. The session is then `tmux kill-session`'d to remove the now empty windows. - New `tmux_session_pane_pids` helper, private to the module. - New `kill_tmux_kills_whole_process_group` test mirrors the nohup regression test from PR #18 — a service that launches a backgrounded `sleep` child via `sleep 300 & echo $! > child.pid; wait`, after `kill_services` the child PID must be dead. Refs #30 --- src/process.rs | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/process.rs b/src/process.rs index b48d291..0f6a9eb 100644 --- a/src/process.rs +++ b/src/process.rs @@ -574,6 +574,13 @@ fn tmux_pane_tail(session: &str, window: &str, n: usize) -> String { fn kill_tmux(result: &SpawnResult) { if let Some(session) = &result.tmux_session { + // Bare `tmux kill-session` only delivers SIGHUP to each pane's foreground + // process. Wrapper chains like sh → pnpm → node → vite reparent and survive, + // holding ports across `ecluse down`. Group-kill every pane PID first (same + // TERM→KILL grace as the nohup path), then kill the now-empty session. + for pid in tmux_session_pane_pids(session) { + kill_process_group(pid); + } Command::new("tmux") .args(["kill-session", "-t", session]) .output() @@ -584,6 +591,23 @@ fn kill_tmux(result: &SpawnResult) { } } +/// All pane PIDs across all windows of `session`. Empty on any tmux failure. +fn tmux_session_pane_pids(session: &str) -> Vec { + let Ok(out) = Command::new("tmux") + .args(["list-panes", "-s", "-t", session, "-F", "#{pane_pid}"]) + .output() + else { + return vec![]; + }; + if !out.status.success() { + return vec![]; + } + String::from_utf8_lossy(&out.stdout) + .lines() + .filter_map(|l| l.trim().parse::().ok()) + .collect() +} + fn spawn_nohup( slug: &str, services: &[&&ServiceConfig], @@ -1234,4 +1258,59 @@ mod tmux_tests { } assert!(!pid_alive(pid), "pane process must die with the session"); } + + fn wait_until(timeout: std::time::Duration, mut cond: impl FnMut() -> bool) -> bool { + let deadline = std::time::Instant::now() + timeout; + while std::time::Instant::now() < deadline { + if cond() { + return true; + } + std::thread::sleep(std::time::Duration::from_millis(50)); + } + cond() + } + + // The pane command spawns a background child (the common pnpm → node → vite + // case); kill_services must take the whole process group, not just the pane. + #[test] + fn kill_tmux_kills_whole_process_group() { + if !tmux_available() { + return; + } + let dir = TempDir::new().unwrap(); + std::fs::create_dir_all(dir.path().join(".ecluse")).unwrap(); + let child_pid_file = dir.path().join("child.pid"); + let svc = native_svc( + "bg", + &format!("sleep 300 & echo $! > {}; wait", child_pid_file.display()), + ); + let result = spawn_services( + &ProcessManager::Tmux, + "tmux-pg-test", + &[&svc], + dir.path(), + &std::collections::HashMap::new(), + ) + .unwrap(); + + assert!( + wait_until(std::time::Duration::from_secs(5), || child_pid_file + .exists()), + "background child pid file never appeared" + ); + let child_pid: u32 = std::fs::read_to_string(&child_pid_file) + .unwrap() + .trim() + .parse() + .unwrap(); + assert!(pid_alive(child_pid), "background child should be running"); + + kill_services(&ProcessManager::Tmux, &result); + + assert!( + wait_until(std::time::Duration::from_secs(5), || !pid_alive(child_pid)), + "background child must die with the tmux pane's process group" + ); + assert!(!tmux_session_exists("ecluse-tmux-pg-test")); + } } From 4f9c947cecd1400a1cef7a5b5a7f64ae12881395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Tue, 16 Jun 2026 16:14:15 +0100 Subject: [PATCH 2/4] fix(flush): sweep worktree cwd and listener ports for stray processes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `ecluse flush` previously inherited the same kill-too-narrow defect as `ecluse down`: its tmux step only ran `tmux kill-session`, so multi-level descendants (pnpm → node → vite → workerd) survived as orphans. With the tmux fix from the previous commit, `down` cleans up correctly, but flush still needs to handle the case where state.json has lost track of sessions whose orphans never made it into a pid file in the first place. Two new sweeps run between step 3 (docker compose down) and step 4 (worktree removal): 3a. cwd sweep: for each subdirectory under `worktree_dir`, list every process whose cwd is inside it (via `sync::pids_in_directory`, which wraps `lsof +d`) and TERM→KILL its process group. Runs before worktree removal so `git worktree remove` doesn't race a live process holding file handles. Skips flush's own PID so the command doesn't suicide. 3b. listener sweep: enumerate every `base_port + slot*slot_stride` and `extra_ports[].base_port + slot*slot_stride` across every configured service and every slot 1..=max_slots. For each port, `validate::port_listener` returns the listener PID (if any); TERM→KILL its process group. Catches detached daemons that no longer have an open file inside the worktree (e.g. workerd's proxy worker) but still hold a configured port. Deduplicates across the (service × slot × port) cross-product so a single multi-port process is hit once. The flush confirmation prompt is updated to warn that editors and shells with files open in the worktree will be killed. CI workflows passing `--yes` are unaffected. Visibility changes: - `sync::pids_in_directory`: private → `pub(crate)` (called from main). - `process::kill_process_group_with_grace`: new `pub` wrapper around the module-private `kill_process_group`, so main can drive group-kills without reaching into private machinery. The new sweeps don't need dedicated tests — `pids_in_directory`, `port_listener`, and `kill_process_group` each have existing unit coverage; the flush command composes them. A full integration test would require provisioning a git repo with a worktree plus a controllable subject process, which is out of proportion for the correctness-by-composition gain. Refs #30 --- src/main.rs | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/process.rs | 9 +++++++ src/sync.rs | 2 +- 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 80fb8df..29277d0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2001,6 +2001,9 @@ fn cmd_flush(args: cli::FlushArgs) -> Result<()> { if !args.yes { print!( "This will destroy all ecluse sessions, worktrees, and running services.\n\ + It will also KILL every process with a file open inside the worktrees \ + (including editors, shells, and `tail -f` against worktree logs) and \ + every process listening on a configured port.\n\ There is no undo. Continue? [y/N] " ); std::io::stdout().flush()?; @@ -2071,6 +2074,67 @@ fn cmd_flush(args: cli::FlushArgs) -> Result<()> { } } + // Step 3a: sweep every process whose cwd is inside a worktree. Step 1 + // killed services tracked in state.json; this catches detached descendants + // (workerd, vite plugins that setsid()) and processes that crashed out of + // a recorded session. Runs BEFORE worktree removal so git worktree remove + // doesn't race a live process holding file handles. + let worktree_dir_path = root.join(&config.worktree_dir); + if worktree_dir_path.exists() { + log.step("Sweeping stray processes with cwd in worktrees..."); + if let Ok(entries) = std::fs::read_dir(&worktree_dir_path) { + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_dir() { + continue; + } + for pid in sync::pids_in_directory(&path) { + // Skip our own pid — flush runs from inside the repo and + // would otherwise commit suicide on the first sweep. + if pid == std::process::id() { + continue; + } + log.detail(&format!(" kill -TERM -- -{} (cwd {})", pid, path.display())); + process::kill_process_group_with_grace(pid); + } + } + } + } + + // Step 3b: sweep every listener on any port the config can allocate. This + // catches orphans that no longer have an open file inside the worktree + // (e.g. a daemonized process that chdir'd to /) but are still holding a + // port from the configured range. + log.step("Sweeping listeners on configured ports..."); + let mut swept_listener_pids: std::collections::HashSet = Default::default(); + for svc in &config.services { + for slot in 1..=config.max_slots { + // Primary port (covers host_port override). + let primary = svc.port(slot, config.slot_stride); + // Extra ports (debugger sockets, secondary listeners). + let extras: Vec = svc + .extra_ports + .iter() + .map(|ep| { + ep.base_port + .saturating_add((slot as u16).saturating_mul(config.slot_stride.max(1) as u16)) + }) + .collect(); + for port in std::iter::once(primary).chain(extras) { + if let Some(pid) = validate::port_listener(port) { + if pid == 0 || pid == std::process::id() { + continue; + } + if !swept_listener_pids.insert(pid) { + continue; + } + log.detail(&format!(" kill -TERM -- -{} (port {})", pid, port)); + process::kill_process_group_with_grace(pid); + } + } + } + } + // Step 4: remove all worktrees under worktree_dir. let worktree_dir = root.join(&config.worktree_dir); if worktree_dir.exists() { diff --git a/src/process.rs b/src/process.rs index 0f6a9eb..659f7f4 100644 --- a/src/process.rs +++ b/src/process.rs @@ -717,6 +717,15 @@ fn kill_process_group(pgid: u32) { signal_with_grace(&format!("-{}", pgid)); } +/// Public wrapper around the module-private `kill_process_group` for callers +/// that need to TERM→KILL a process group (e.g. `ecluse flush` sweeping +/// orphaned descendants of long-dead sessions). For an ecluse-spawned service +/// the pgid equals the leader PID stored in the pid file, so callers can just +/// pass the recorded PID. +pub fn kill_process_group_with_grace(pgid: u32) { + kill_process_group(pgid); +} + /// TERM a single process, escalating to KILL after the grace period. pub fn kill_pid_with_grace(pid: u32) { signal_with_grace(&pid.to_string()); diff --git a/src/sync.rs b/src/sync.rs index ab8fe33..bf1ca10 100644 --- a/src/sync.rs +++ b/src/sync.rs @@ -204,7 +204,7 @@ pub(crate) fn tmux_window_exists(session: &str, window: &str) -> bool { // ── internal helpers ────────────────────────────────────────────────────────── /// Return unique PIDs of processes with an open file descriptor inside `dir`. -fn pids_in_directory(dir: &Path) -> Vec { +pub(crate) fn pids_in_directory(dir: &Path) -> Vec { let output = match Command::new("lsof") .arg("+d") .arg(dir) From 1bb0a4dfb7255b5382f715603a93061f959d8e50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Tue, 16 Jun 2026 16:18:09 +0100 Subject: [PATCH 3/4] feat(status): flag wrong-owner listeners on managed service ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a previous session's orphan grabs the port that a new session's service was configured for, `ecluse status` previously reported the new service as healthy: the stored PID was alive (in a tmux pane) AND something was responding on the configured port. The fact that the "something" was a completely different process — serving the wrong worktree's content — was invisible. The user only noticed when a stale build appeared in their browser. Status now performs a listener-identity check for every managed native service: `validate::port_listener(port)` returns the actual listener PID, and if it's neither the stored PID nor a descendant (via `whose_pid::is_descendant`), the service is flagged `wrong_owner` and rendered as `✗ wrong owner (PID N)`. Exit code is unchanged: a wrong- owner row simply trips the existing `healthy=false → exit 1` path. `ServiceStatus` gains two fields: - `listener_pid: Option` — whoever is actually bound to the port, for diagnosis. Always populated when a port is given AND a listener was found. - `wrong_owner: bool` — true iff the listener is not the stored PID or one of its descendants. JSON output gains both fields verbatim. Human-table output renders `wrong_owner` via the new `status_str` helper, extracted from the inline closure in `cmd_status` so the four-way state machine (managed vs. unmanaged × healthy/down/wrong-owner) is unit-testable. Six new tests cover every branch including the precedence rule (wrong_owner wins over healthy=true). Visibility change: - `whose_pid::is_descendant`: private → `pub(crate)` for use in status. Docker services aren't checked — their host port is owned by dockerd or its rootless proxy, not by any process inside the container, so the listener-PID heuristic doesn't apply. Fixes #30 --- src/main.rs | 142 ++++++++++++++++++++++++++++++++++++++++++----- src/whose_pid.rs | 2 +- 2 files changed, 130 insertions(+), 14 deletions(-) diff --git a/src/main.rs b/src/main.rs index 29277d0..9ab1467 100644 --- a/src/main.rs +++ b/src/main.rs @@ -437,6 +437,65 @@ mod tests { // Single char after sanitization → invalid slug assert!(sanitize_to_slug("a").is_err()); } + + // ── status_str ──────────────────────────────────────────────────────────── + + fn svc_status( + managed: bool, + healthy: bool, + wrong_owner: bool, + listener_pid: Option, + ) -> ServiceStatus { + ServiceStatus { + name: "api".into(), + kind: "native", + port: Some(3001), + healthy, + managed, + pid: Some(42), + tmux_window: None, + listener_pid, + wrong_owner, + } + } + + #[test] + fn status_str_unmanaged_shows_dash() { + let s = svc_status(false, false, false, None); + assert_eq!(status_str(&s), "\u{2014}"); + } + + #[test] + fn status_str_healthy_managed_shows_up() { + let s = svc_status(true, true, false, None); + assert_eq!(status_str(&s), "\u{2713} up"); + } + + #[test] + fn status_str_unhealthy_managed_shows_down() { + let s = svc_status(true, false, false, None); + assert_eq!(status_str(&s), "\u{2717} down"); + } + + #[test] + fn status_str_wrong_owner_with_listener_pid_shows_pid() { + let s = svc_status(true, false, true, Some(99999)); + assert_eq!(status_str(&s), "\u{2717} wrong owner (PID 99999)"); + } + + #[test] + fn status_str_wrong_owner_without_listener_pid() { + let s = svc_status(true, false, true, None); + assert_eq!(status_str(&s), "\u{2717} wrong owner"); + } + + #[test] + fn status_str_wrong_owner_takes_precedence_over_healthy() { + // A service can simultaneously have its stored PID alive AND a + // different process bound to its port. `wrong_owner` wins. + let s = svc_status(true, true, true, Some(99999)); + assert_eq!(status_str(&s), "\u{2717} wrong owner (PID 99999)"); + } } /// Sanitize a branch name or slug into a valid ecluse slug + original branch pair. @@ -2094,7 +2153,11 @@ fn cmd_flush(args: cli::FlushArgs) -> Result<()> { if pid == std::process::id() { continue; } - log.detail(&format!(" kill -TERM -- -{} (cwd {})", pid, path.display())); + log.detail(&format!( + " kill -TERM -- -{} (cwd {})", + pid, + path.display() + )); process::kill_process_group_with_grace(pid); } } @@ -2116,8 +2179,9 @@ fn cmd_flush(args: cli::FlushArgs) -> Result<()> { .extra_ports .iter() .map(|ep| { - ep.base_port - .saturating_add((slot as u16).saturating_mul(config.slot_stride.max(1) as u16)) + ep.base_port.saturating_add( + (slot as u16).saturating_mul(config.slot_stride.max(1) as u16), + ) }) .collect(); for port in std::iter::once(primary).chain(extras) { @@ -2194,6 +2258,35 @@ struct ServiceStatus { managed: bool, pid: Option, tmux_window: Option, + /// PID of whatever process is actually listening on `port`, if any. + /// Only populated for native services; docker port mappings are owned by + /// the daemon, not the container process, so the check doesn't apply. + listener_pid: Option, + /// True iff a listener is bound to `port` AND that listener is neither + /// `pid` nor a descendant of it. A stale orphan from a previous session + /// hijacking the port — `ecluse status` reports the service as down + /// even though something IS responding to requests. + wrong_owner: bool, +} + +/// Human-readable status string for a service row. Extracted from cmd_status +/// so the wrong-owner branch can be unit-tested. +fn status_str(s: &ServiceStatus) -> String { + if !s.managed { + "\u{2014}".into() // — port-only, not ecluse-managed + } else if s.wrong_owner { + // A different process owns the configured port — likely an orphan from + // a previous session. The service is "down" from ecluse's perspective + // even if something IS responding. + match s.listener_pid { + Some(pid) => format!("\u{2717} wrong owner (PID {})", pid), + None => "\u{2717} wrong owner".into(), + } + } else if s.healthy { + "\u{2713} up".into() + } else { + "\u{2717} down".into() + } } #[derive(Tabled)] @@ -2310,14 +2403,42 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { // Port-allocation-only services (no command) are never spawned by // ecluse — don't report them as down. let managed = svc.command.is_some(); + + // Listener identity check: if SOME process is bound to the expected + // port and it's neither this service's recorded PID nor a descendant + // of it, the port is being served by an orphan from a previous + // session (or unrelated software). Surface this rather than silently + // reporting healthy=true — the service is technically alive but the + // user is hitting the wrong process. + let (listener_pid, wrong_owner) = if managed { + match (port, pid) { + (Some(p), Some(stored)) => match validate::port_listener(p) { + Some(actual) + if actual != stored + && actual != 0 + && !whose_pid::is_descendant(stored, actual) => + { + (Some(actual), true) + } + other => (other, false), + }, + _ => (None, false), + } + } else { + (None, false) + }; + let healthy_with_owner_check = healthy && !wrong_owner; + statuses.push(ServiceStatus { name: svc.name.clone(), kind: "native", port, - healthy: healthy || !managed, + healthy: healthy_with_owner_check || !managed, managed, pid, tmux_window, + listener_pid, + wrong_owner, }); } @@ -2336,6 +2457,8 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { managed: true, pid: None, tmux_window: None, + listener_pid: None, + wrong_owner: false, }); } @@ -2353,6 +2476,8 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { "managed": s.managed, "pid": s.pid, "tmux_window": s.tmux_window, + "listener_pid": s.listener_pid, + "wrong_owner": s.wrong_owner, }) }) .collect(); @@ -2382,15 +2507,6 @@ fn cmd_status(args: cli::StatusArgs) -> Result<()> { if statuses.is_empty() { println!("No services defined in .ecluse.toml."); } else { - let status_str = |s: &ServiceStatus| -> String { - if !s.managed { - "\u{2014}".into() // — port-only, not ecluse-managed - } else if s.healthy { - "\u{2713} up".into() - } else { - "\u{2717} down".into() - } - }; let port_str = |s: &ServiceStatus| -> String { s.port.map(|p| p.to_string()).unwrap_or_else(|| "-".into()) }; diff --git a/src/whose_pid.rs b/src/whose_pid.rs index 85af7f2..5314acf 100644 --- a/src/whose_pid.rs +++ b/src/whose_pid.rs @@ -106,7 +106,7 @@ fn match_tmux_session(session: &Session, pid: u32) -> Option { } /// True iff `descendant` is a transitive child of `ancestor` (up to 5 levels deep). -fn is_descendant(ancestor: u32, descendant: u32) -> bool { +pub(crate) fn is_descendant(ancestor: u32, descendant: u32) -> bool { if ancestor == descendant { return false; } From 0c7fdf895e1c9a0dc934bf33d52b11a6dc2c058d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois?= Date: Tue, 16 Jun 2026 16:20:29 +0100 Subject: [PATCH 4/4] docs: changelog + troubleshooting + limits for #30 - CHANGELOG: three Unreleased entries for the down/flush/status fixes. - SKILL.md: new troubleshooting subsection 'Wrong content served on the configured URL after multiple up/down cycles' covering the symptom, the root cause, the 0.3.2+ status row format, and recovery on any version. - docs/src/limits.md: update the 'Process management is spawn-and-kill only' section to mention process-group kill, the setsid escape hatch, and the new status wrong-owner check. --- CHANGELOG.md | 9 +++++++++ docs/src/limits.md | 4 ++++ skills/ecluse/SKILL.md | 27 +++++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64801f9..b47c135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). --- +## [Unreleased] + +### Fixed +- `ecluse down` in tmux mode now kills the entire pane process group, not just the pane's foreground shell. Previously, multi-level child chains (`sh → pnpm → node → vite`, plus anything that calls `setsid()` like Cloudflare workerd) survived as orphans adopted by `launchd`/`init`, holding their ports indefinitely. Each orphan held 4-8 ports; after a few `up`/`down` cycles the next `ecluse up` would silently land on a port already held by a zombie, serving a different worktree's content. The same TERM→KILL grace pattern that was applied to the nohup path in PR #18 now applies to tmux. (#30) +- `ecluse flush` now sweeps every process whose cwd is inside a worktree (`lsof +d `) AND every listener on a configured port (`base_port + slot*slot_stride` and `extra_ports[].base_port + slot*slot_stride` across all `max_slots`), killing each with TERM→KILL grace. The flush confirmation prompt warns that editors/shells with files open in worktrees will be killed; `--yes` bypass for CI is unchanged. (#30) +- `ecluse status` detects when the configured port is being served by a different process than the recorded PID (or any of its descendants). The service is flagged `✗ wrong owner (PID N)` instead of being silently reported as healthy. `--json` output gains `listener_pid` and `wrong_owner` fields. Exit code semantics unchanged: a wrong-owner row trips the existing `exit 1` path. (#30) + +--- + ## [0.3.1] — 2026-06-15 ### Fixed diff --git a/docs/src/limits.md b/docs/src/limits.md index 3a9c0a6..e906271 100644 --- a/docs/src/limits.md +++ b/docs/src/limits.md @@ -24,6 +24,10 @@ ecluse down feat-foo --keep-worktree ecluse up feat-foo --reuse-worktree ``` +Teardown kills the whole process group of each spawned service, with a TERM→KILL grace (2s) — wrapper chains like `sh → pnpm → node → vite` are killed in their entirety, not just the outermost wrapper. This applies to both `tmux` and `nohup` process managers as of 0.3.2. Services that explicitly `setsid()` themselves out of the process group (rare) escape this and require `ecluse flush`, which additionally sweeps every process whose cwd is inside a worktree and every listener on a configured port. + +`ecluse status` flags ports where a different process is bound than the one ecluse recorded — `✗ wrong owner (PID N)` instead of `✓ up`. This catches stale orphans hijacking a session's port even though the session's own recorded PID is still alive. + ## `command` requires the app to read its port from the environment ecluse injects the full `.env.ecluse` contents — `PORT`, `ECLUSE_SLOT`, `ECLUSE_SLUG`, `ECLUSE_MODE`, all `ECLUSE__PORT` vars, and any `port_env` aliases — directly into the environment of the spawned process. There is no separate sourcing step; the same map written to `.env.ecluse` is passed to the child process before exec. This only fails if the app ignores the environment entirely: diff --git a/skills/ecluse/SKILL.md b/skills/ecluse/SKILL.md index 910b7ac..4a3c246 100644 --- a/skills/ecluse/SKILL.md +++ b/skills/ecluse/SKILL.md @@ -586,6 +586,33 @@ ecluse status 3. **Never run `lsof -ti TCP: | xargs kill` blind** — see "Killing services safely". Use `ecluse whose-pid` to verify ownership before any manual kill. 4. **Consider `slot_stride = 10` in `.ecluse.toml`** for visually distinct adjacent-slot ports (3010, 3020, 3030 instead of 3001, 3002, 3003). Doesn't prevent the root cause but makes mistakes harder. +### Wrong content served on the configured URL after multiple up/down cycles + +**Symptom:** the user navigates to `http://localhost:7301` expecting slot 1, but sees slot 4's branch instead. `ecluse status` reports the slot 1 service as healthy. Restarting only the affected session doesn't fix it — the wrong content keeps appearing on the configured port. + +**Root cause (fixed in 0.3.2+):** an orphan from a previous session is holding the port. Common cause: pnpm/npm wrapper chains where the actual server is a grandchild (`sh → pnpm → node → vite`) — under 0.3.1 and earlier, `ecluse down` killed only the outer wrapper and the actual server reparented to `launchd`/`init`, surviving indefinitely and holding 4-8 ports each. After several `up`/`down` cycles these orphans accumulated and silently collided with new sessions. + +**Detection:** `ecluse status` in 0.3.2+ flags this directly: + +``` +SERVICE TYPE PORT STATUS WINDOW +backoffice native 7301 ✗ wrong owner (PID 81906) backoffice +``` + +The `wrong owner` row means: the stored PID (or its descendants) is NOT what's currently listening on 7301 — something else is. JSON output gains `listener_pid` and `wrong_owner` fields. Exit code is 1 (same as `✗ down`). + +**Recovery on any version:** + +```bash +ecluse whose-pid # confirm it's an orphan, not another session +# If unowned by any ecluse session: +kill -- - # kill the whole process group (the `-` prefix) +# OR, the recovery hammer (kills everything in worktrees + every configured port): +ecluse flush --yes +``` + +**Prevention:** upgrade to 0.3.2+. The tmux teardown path now kills the whole process group (TERM→KILL grace), matching what the nohup path already did. `ecluse flush` also sweeps both the worktree cwd and every configured port to clean up orphans that escaped a previous version's teardown. + ### Docker not running ```bash