From 0fc430346931cdd0deb7c1cf62b1d6044b3dd05b Mon Sep 17 00:00:00 2001
From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com>
Date: Fri, 19 Jun 2026 02:23:32 +0800
Subject: [PATCH 01/10] feat: data directory separation and frontend
 virtualization

---
 desktop/build/windows/installer/project.nsi   |   6 +-
 desktop/frontend/src/App.tsx                  |  32 ++-
 .../frontend/src/components/ProjectTree.tsx   |  26 +-
 .../src/components/WorkspacePanel.tsx         | 256 ++++++++++++------
 .../src/components/editors/HljsDiff.tsx       |  83 ++++--
 internal/boot/boot.go                         |   6 +
 internal/bot/gateway.go                       |  37 ++-
 internal/bot/weixin/weixin_login.go           |   4 +-
 internal/config/config.go                     |   8 +-
 internal/config/edit.go                       |  20 +-
 internal/config/migrate.go                    |  92 ++++++-
 internal/config/migrate_test.go               |  61 +++++
 internal/control/approval_e2e_test.go         |  67 +++++
 internal/control/controller.go                |  63 ++++-
 internal/fileutil/atomicwrite.go              |  38 +++
 internal/fileutil/atomicwrite_test.go         |  43 +++
 internal/plugin/codegraph_limit.go            |  35 +++
 internal/plugin/codegraph_limit_test.go       |  46 ++++
 internal/plugin/known_overrides.go            |   5 +
 internal/plugin/lazy.go                       |   8 +-
 internal/plugin/plugin.go                     |  24 +-
 internal/plugin/transport_stdio.go            |  39 ++-
 22 files changed, 822 insertions(+), 177 deletions(-)
 create mode 100644 internal/plugin/codegraph_limit.go
 create mode 100644 internal/plugin/codegraph_limit_test.go

diff --git a/desktop/build/windows/installer/project.nsi b/desktop/build/windows/installer/project.nsi
index 26f7e5d0f..63a08ec40 100644
--- a/desktop/build/windows/installer/project.nsi
+++ b/desktop/build/windows/installer/project.nsi
@@ -153,7 +153,8 @@ Section "uninstall"
 
     RMDir /r "$AppData\${PRODUCT_EXECUTABLE}" # Remove the WebView2 DataPath
 
-    RMDir /r $INSTDIR
+    ; Precision uninstall: delete main application files
+    Delete "$INSTDIR\${PRODUCT_EXECUTABLE}"
 
     Delete "$SMPROGRAMS\${INFO_PRODUCTNAME}.lnk"
     Delete "$DESKTOP\${INFO_PRODUCTNAME}.lnk"
@@ -162,4 +163,7 @@ Section "uninstall"
     !insertmacro wails.unassociateCustomProtocols
 
     !insertmacro reasonix.deleteUninstaller
+
+    ; Only remove the installation directory if it is empty to prevent data loss
+    RMDir $INSTDIR
 SectionEnd
diff --git a/desktop/frontend/src/App.tsx b/desktop/frontend/src/App.tsx
index 33cc8764d..c8f88a5f7 100644
--- a/desktop/frontend/src/App.tsx
+++ b/desktop/frontend/src/App.tsx
@@ -1546,16 +1546,24 @@ export default function App() {
         if (isThemeMode(arg)) {
           const next = arg;
           const style = getThemeStyle(next);
-          await app.SetDesktopAppearance(next, style);
-          applyTheme(next, style);
-          notice(t("settings.themeChanged", { theme: next, style }));
+          try {
+            await app.SetDesktopAppearance(next, style);
+            applyTheme(next, style);
+            notice(t("settings.themeChanged", { theme: next, style }));
+          } catch (err) {
+            showToast(err instanceof Error ? err.message : String(err), "error");
+          }
           return;
         }
         if (isThemeStyle(arg)) {
           const cur = getTheme();
-          await app.SetDesktopAppearance(cur, arg);
-          applyTheme(cur, arg);
-          notice(t("settings.themeChanged", { theme: cur, style: arg }));
+          try {
+            await app.SetDesktopAppearance(cur, arg);
+            applyTheme(cur, arg);
+            notice(t("settings.themeChanged", { theme: cur, style: arg }));
+          } catch (err) {
+            showToast(err instanceof Error ? err.message : String(err), "error");
+          }
           return;
         }
         notice(t("settings.themeUnknown", { name: arg }), "warn");
@@ -1567,7 +1575,7 @@ export default function App() {
       if (goal.trim()) await setControllerGoal(goal);
       commitThenSend(trimmed, submitText.trim());
     },
-    [applyGoal, closeTransientOverlays, collaborationMode, composerProfile, goal, send, runShell, notice, setControllerCollaborationMode, setControllerGoal, setControllerToolApprovalMode, steer, switchModel, t, toolApprovalMode],
+    [applyGoal, closeTransientOverlays, collaborationMode, composerProfile, goal, send, runShell, notice, setControllerCollaborationMode, setControllerGoal, setControllerToolApprovalMode, steer, switchModel, t, toolApprovalMode, showToast],
   );
 
   const refreshTabMetas = useCallback(async (): Promise<TabMeta[]> => {
@@ -2318,9 +2326,13 @@ export default function App() {
   const renameTopic = useCallback(async (topicId: string, title: string) => {
     const nextTitle = title.trim();
     if (!topicId || !nextTitle) return;
-    await app.RenameTopic(topicId, nextTitle);
-    await refreshProjectsAndTabs();
-  }, [refreshProjectsAndTabs]);
+    try {
+      await app.RenameTopic(topicId, nextTitle);
+      await refreshProjectsAndTabs();
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
+    }
+  }, [refreshProjectsAndTabs, showToast]);
 
   const startActiveTopicRename = useCallback(() => {
     if (!activeTab?.topicId) return;
diff --git a/desktop/frontend/src/components/ProjectTree.tsx b/desktop/frontend/src/components/ProjectTree.tsx
index 7f1fe9306..20d9b85e8 100644
--- a/desktop/frontend/src/components/ProjectTree.tsx
+++ b/desktop/frontend/src/components/ProjectTree.tsx
@@ -6,6 +6,7 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react";
 import type { CSSProperties, DragEvent as ReactDragEvent, KeyboardEvent as ReactKeyboardEvent, MouseEvent as ReactMouseEvent } from "react";
 import { Archive, ArrowDown, Pencil, Plus, Folder, FolderPlus, Search, BriefcaseBusiness, Copy, FolderOpen, XCircle, History, Check, ListCollapse, ListRestart, MessageSquare, Clock, Pin, MoreHorizontal, Minimize2, Maximize2 } from "lucide-react";
 import { asArray } from "../lib/array";
+import { useToast } from "../lib/toast";
 import { app } from "../lib/bridge";
 import type { ProjectNode, ProjectTopicStatus } from "../lib/types";
 import { topicActivityTime } from "../lib/session";
@@ -447,6 +448,7 @@ export function ProjectTree({
   searchFocusSignal = 0,
 }: ProjectTreeProps) {
   const t = useT();
+  const { showToast } = useToast();
   const compactTopics = variant === "workbench";
   const creationTopics = variant === "creation";
   const [tree, setTree] = useState<ProjectNode[]>([]);
@@ -751,8 +753,8 @@ export function ProjectTree({
       else await app.RenameTopic(topicId, title);
       await refresh();
       if (!onRenameTopic) await onTopicsChanged?.();
-    } catch {
-      /* ignore */
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
     }
   };
 
@@ -763,8 +765,8 @@ export function ProjectTree({
     try {
       await app.RenameProject(root, title);
       await refresh();
-    } catch {
-      /* ignore */
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
     }
   };
 
@@ -776,8 +778,8 @@ export function ProjectTree({
       setConfirmAction(null);
       await refresh();
       await onTopicsChanged?.();
-    } catch {
-      /* ignore */
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
     }
   };
 
@@ -788,8 +790,8 @@ export function ProjectTree({
       setMenuPoint(null);
       await refresh();
       await onTopicsChanged?.();
-    } catch {
-      /* ignore */
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
     }
   };
 
@@ -801,8 +803,8 @@ export function ProjectTree({
       setMenuPoint(null);
       await refresh();
       await onTopicsChanged?.();
-    } catch {
-      /* ignore */
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
     }
   };
 
@@ -823,8 +825,8 @@ export function ProjectTree({
       setMenuPoint(null);
       setConfirmRemoveProject(null);
       await refresh();
-    } catch {
-      /* ignore */
+    } catch (err) {
+      showToast(err instanceof Error ? err.message : String(err), "error");
     }
   };
 
diff --git a/desktop/frontend/src/components/WorkspacePanel.tsx b/desktop/frontend/src/components/WorkspacePanel.tsx
index 5443f38bc..8c2833665 100644
--- a/desktop/frontend/src/components/WorkspacePanel.tsx
+++ b/desktop/frontend/src/components/WorkspacePanel.tsx
@@ -1,4 +1,5 @@
 import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import { useVirtualizer } from "@tanstack/react-virtual";
 import type {
   CSSProperties,
   DragEvent as ReactDragEvent,
@@ -180,6 +181,15 @@ function formatCommitDate(dateStr: string): string {
   const minutes = String(d.getMinutes()).padStart(2, "0");
   return `${day} ${month} ${year} ${hours}:${minutes}`;
 }
+interface TreeRow {
+  key: string;
+  path: string;
+  depth: number;
+  entry: DirEntry;
+  active: boolean;
+  isOpen?: boolean;
+  isSearch?: boolean;
+}
 
 export function WorkspacePanel({
   open,
@@ -730,6 +740,48 @@ export function WorkspacePanel({
       .sort((a, b) => a.path.localeCompare(b.path));
   }, [entriesByDir, filter, scopedFilePaths]);
 
+  const treeRows = useMemo<TreeRow[]>(() => {
+    if (flattened) {
+      return flattened.map(({ path, entry }) => ({
+        key: path,
+        path,
+        depth: 0,
+        entry,
+        active: selectedPath === path,
+        isSearch: true,
+      }));
+    }
+    const acc: TreeRow[] = [];
+    const build = (dir: string, depth: number) => {
+      const entries = entriesByDir[dir] ?? [];
+      for (const entry of entries) {
+        const path = entryPath(dir, entry);
+        const isOpen = openDirs.has(path);
+        const active = selectedPath === path;
+        acc.push({
+          key: path,
+          path,
+          depth,
+          entry,
+          active,
+          isOpen,
+        });
+        if (entry.isDir && isOpen) {
+          build(path, depth + 1);
+        }
+      }
+    };
+    build("", 0);
+    return acc;
+  }, [flattened, entriesByDir, openDirs, selectedPath]);
+
+  const virtualizer = useVirtualizer({
+    count: treeRows.length,
+    getScrollElement: () => treeRef.current,
+    estimateSize: () => 24,
+    overscan: 10,
+  });
+
   const searchPlaceholder = t(scopedFilePaths ? "workspace.filterReferencedFiles" : changedMode ? "workspace.filterChanges" : "workspace.filter");
 
   const effectiveTreeWidth = useMemo(() => clampWorkspaceTreeWidth(treeWidth, panelWidth), [panelWidth, treeWidth]);
@@ -747,13 +799,11 @@ export function WorkspacePanel({
 
   useEffect(() => {
     if (!selectedPath || !actualTreeVisible) return;
-    const frame = window.requestAnimationFrame(() => {
-      const row = Array.from(treeRef.current?.querySelectorAll<HTMLElement>("[data-workspace-path]") ?? [])
-        .find((element) => element.dataset.workspacePath === selectedPath);
-      row?.scrollIntoView({ block: "nearest", inline: "nearest" });
-    });
-    return () => window.cancelAnimationFrame(frame);
-  }, [actualTreeVisible, entriesByDir, filter, selectedPath]);
+    const selectedIndex = treeRows.findIndex((row) => row.path === selectedPath);
+    if (selectedIndex !== -1) {
+      virtualizer.scrollToIndex(selectedIndex, { align: "auto" });
+    }
+  }, [selectedPath, actualTreeVisible, treeRows, virtualizer]);
 
   const panelStyle = useMemo(
     () =>
@@ -920,53 +970,85 @@ export function WorkspacePanel({
     void app.RevealWorkspacePath(treeMenu.path).catch(() => {});
   };
 
-  const renderRows = (dir: string, depth: number): ReactElement[] => {
-    const entries = entriesByDir[dir] ?? [];
-    return entries.flatMap((entry) => {
-      const path = entryPath(dir, entry);
-      const isOpen = openDirs.has(path);
-      const active = selectedPath === path;
-      const row = (
-        <button
-          key={path}
-          className={`workspace-tree__row${active ? " workspace-tree__row--active" : ""}`}
-          data-workspace-path={path}
-          draggable
-          onDragStart={(event) => startTreeDrag(event, path, entry.isDir)}
-          onClick={() => {
-            if (entry.isDir) {
-              toggleDir(path);
+  const renderNormalRow = (row: TreeRow) => {
+    const { path, depth, entry, isOpen, active } = row;
+    return (
+      <button
+        key={path}
+        className={`workspace-tree__row${active ? " workspace-tree__row--active" : ""}`}
+        data-workspace-path={path}
+        draggable
+        onDragStart={(event) => startTreeDrag(event, path, entry.isDir)}
+        onClick={() => {
+          if (entry.isDir) {
+            toggleDir(path);
+          } else {
+            if (selectedPath === path) {
+              setSelectedPath(null);
             } else {
-              if (selectedPath === path) {
-                setSelectedPath(null);
-              } else {
-                selectFile(path);
-              }
+              selectFile(path);
             }
-          }}
-          onContextMenu={(event) => openTreeMenu(event, path, entry.isDir)}
-          style={{ paddingLeft: 8 + depth * 14 }}
-        >
-          {entry.isDir ? (
-            isOpen ? (
-              <ChevronDown size={13} className="workspace-tree__chev" />
-            ) : (
-              <ChevronRight size={13} className="workspace-tree__chev" />
-            )
-          ) : (
-            <span className="workspace-tree__chev" />
-          )}
-          {entry.isDir ? (
-            <Folder size={14} className="workspace-tree__icon workspace-tree__icon--dir" />
-          ) : (
-            <FileText size={14} className="workspace-tree__icon" />
-          )}
-          <span className="workspace-tree__name">{entry.name}</span>
-        </button>
-      );
-      if (!entry.isDir || !isOpen) return [row];
-      return [row, ...renderRows(path, depth + 1)];
-    });
+          }
+        }}
+        onContextMenu={(event) => openTreeMenu(event, path, entry.isDir)}
+        style={{ paddingLeft: 8 + depth * 14 }}
+      >
+        {entry.isDir ? (
+          <ChevronRight
+            size={13}
+            className={`workspace-tree__chev ${isOpen ? "workspace-tree__chev--open" : ""}`}
+            style={{
+              transition: "transform 0.15s ease",
+              transform: isOpen ? "rotate(90deg)" : "rotate(0deg)",
+            }}
+          />
+        ) : (
+          <span className="workspace-tree__chev" />
+        )}
+        {entry.isDir ? (
+          <Folder size={14} className="workspace-tree__icon workspace-tree__icon--dir" />
+        ) : (
+          <FileText size={14} className="workspace-tree__icon" />
+        )}
+        <span className="workspace-tree__name">{entry.name}</span>
+      </button>
+    );
+  };
+
+  const renderSearchRow = (row: TreeRow) => {
+    const { path, entry, active } = row;
+    const dir = parentPath(path);
+    return (
+      <button
+        key={path}
+        className={`workspace-tree__row workspace-tree__row--search${active ? " workspace-tree__row--active" : ""}`}
+        data-workspace-path={path}
+        draggable
+        onDragStart={(event) => startTreeDrag(event, path, entry.isDir)}
+        onClick={() => {
+          if (entry.isDir) {
+            toggleDir(path);
+          } else {
+            if (selectedPath === path) {
+              setSelectedPath(null);
+            } else {
+              selectFile(path);
+            }
+          }
+        }}
+        onContextMenu={(event) => openTreeMenu(event, path, entry.isDir)}
+      >
+        {entry.isDir ? (
+          <Folder size={14} className="workspace-tree__icon workspace-tree__icon--dir" />
+        ) : (
+          <FileText size={14} className="workspace-tree__icon" />
+        )}
+        <span className="workspace-tree__result">
+          <span className="workspace-tree__result-name">{basename(path)}</span>
+          {dir && <span className="workspace-tree__result-dir">{dir}</span>}
+        </span>
+      </button>
+    );
   };
 
   const isMarkdown = selectedPath?.toLowerCase().endsWith(".md") ?? false;
@@ -1414,43 +1496,47 @@ export function WorkspacePanel({
             </Tooltip>
           </div>
         )}
-        <div className="workspace-tree" ref={treeRef} onContextMenu={openTreeBlankMenu}>
-          {flattened
-            ? flattened.map(({ path, entry }) => {
-                const dir = parentPath(path);
+        <div
+          className="workspace-tree"
+          ref={treeRef}
+          onContextMenu={openTreeBlankMenu}
+          style={{
+            height: "100%",
+            overflow: "auto",
+            position: "relative",
+          }}
+        >
+          {treeRows.length > 0 ? (
+            <div
+              className="workspace-tree__sizer"
+              style={{
+                height: virtualizer.getTotalSize(),
+                width: "100%",
+                position: "relative",
+              }}
+            >
+              {virtualizer.getVirtualItems().map((row) => {
+                const item = treeRows[row.index];
+                if (!item) return null;
                 return (
-                  <button
-                    key={path}
-                    className={`workspace-tree__row workspace-tree__row--search${selectedPath === path ? " workspace-tree__row--active" : ""}`}
-                    data-workspace-path={path}
-                    draggable
-                    onDragStart={(event) => startTreeDrag(event, path, entry.isDir)}
-                    onClick={() => {
-                      if (entry.isDir) {
-                        toggleDir(path);
-                      } else {
-                        if (selectedPath === path) {
-                          setSelectedPath(null);
-                        } else {
-                          selectFile(path);
-                        }
-                      }
+                  <div
+                    key={item.key}
+                    data-index={row.index}
+                    ref={virtualizer.measureElement}
+                    style={{
+                      position: "absolute",
+                      top: 0,
+                      left: 0,
+                      width: "100%",
+                      transform: `translateY(${row.start}px)`,
                     }}
-                    onContextMenu={(event) => openTreeMenu(event, path, entry.isDir)}
                   >
-                    {entry.isDir ? (
-                      <Folder size={14} className="workspace-tree__icon workspace-tree__icon--dir" />
-                    ) : (
-                      <FileText size={14} className="workspace-tree__icon" />
-                    )}
-                    <span className="workspace-tree__result">
-                      <span className="workspace-tree__result-name">{basename(path)}</span>
-                      {dir && <span className="workspace-tree__result-dir">{dir}</span>}
-                    </span>
-                  </button>
+                    {item.isSearch ? renderSearchRow(item) : renderNormalRow(item)}
+                  </div>
                 );
-              })
-            : renderRows("", 0)}
+              })}
+            </div>
+          ) : null}
         </div>
       </section>
       {treeMenu && (
diff --git a/desktop/frontend/src/components/editors/HljsDiff.tsx b/desktop/frontend/src/components/editors/HljsDiff.tsx
index 93e8124aa..a4969b6d3 100644
--- a/desktop/frontend/src/components/editors/HljsDiff.tsx
+++ b/desktop/frontend/src/components/editors/HljsDiff.tsx
@@ -1,3 +1,5 @@
+import { useRef } from "react";
+import { useVirtualizer } from "@tanstack/react-virtual";
 import type { DiffProps } from "../DiffView";
 import { diffLines, diffRowsFromUnifiedDiff } from "../../lib/diff";
 import { highlightToHtml } from "../../lib/highlight";
@@ -14,23 +16,72 @@ function lineNo(n?: number): string {
 
 export default function HljsDiff({ original = "", modified = "", diff = "", language, maxHeight }: DiffProps) {
   const rows = diff ? diffRowsFromUnifiedDiff(diff) : diffLines(original, modified);
+  const scrollRef = useRef<HTMLDivElement>(null);
+
+  const isVirtual = rows.length > 200;
+
+  const virtualizer = useVirtualizer({
+    count: isVirtual ? rows.length : 0,
+    getScrollElement: () => scrollRef.current,
+    estimateSize: () => 24,
+    overscan: 10,
+  });
+
+  const renderRow = (r: typeof rows[0], idx: number) => (
+    <div key={idx} className={`diff__row diff__row--${r.type}`}>
+      <span className="diff__gutter">
+        <span className="diff__line diff__line--old">{lineNo(r.oldLine)}</span>
+        <span className="diff__line diff__line--new">{lineNo(r.newLine)}</span>
+        <span className="diff__sign">{SIGN[r.type]}</span>
+      </span>
+      <code
+        className="diff__text"
+        dangerouslySetInnerHTML={{ __html: highlightToHtml(r.text, language) }}
+      />
+    </div>
+  );
+
   return (
-    <div className="diff hljs" style={maxHeight ? { maxHeight } : undefined}>
-      <div className="diff__table">
-        {rows.map((r, idx) => (
-          <div key={idx} className={`diff__row diff__row--${r.type}`}>
-            <span className="diff__gutter">
-              <span className="diff__line diff__line--old">{lineNo(r.oldLine)}</span>
-              <span className="diff__line diff__line--new">{lineNo(r.newLine)}</span>
-              <span className="diff__sign">{SIGN[r.type]}</span>
-            </span>
-            <code
-              className="diff__text"
-              dangerouslySetInnerHTML={{ __html: highlightToHtml(r.text, language) }}
-            />
-          </div>
-        ))}
-      </div>
+    <div
+      ref={scrollRef}
+      className="diff hljs"
+      style={{
+        maxHeight: maxHeight || undefined,
+        overflow: maxHeight ? "auto" : undefined,
+        position: maxHeight ? "relative" : undefined,
+      }}
+    >
+      {isVirtual ? (
+        <div
+          className="diff__table"
+          style={{
+            height: virtualizer.getTotalSize(),
+            width: "100%",
+            position: "relative",
+          }}
+        >
+          {virtualizer.getVirtualItems().map((row) => (
+            <div
+              key={row.key}
+              data-index={row.index}
+              ref={virtualizer.measureElement}
+              style={{
+                position: "absolute",
+                top: 0,
+                left: 0,
+                width: "100%",
+                transform: `translateY(${row.start}px)`,
+              }}
+            >
+              {renderRow(rows[row.index], row.index)}
+            </div>
+          ))}
+        </div>
+      ) : (
+        <div className="diff__table">
+          {rows.map((r, idx) => renderRow(r, idx))}
+        </div>
+      )}
     </div>
   );
 }
diff --git a/internal/boot/boot.go b/internal/boot/boot.go
index aa40f0c1c..42bc44332 100644
--- a/internal/boot/boot.go
+++ b/internal/boot/boot.go
@@ -110,6 +110,11 @@ type Options struct {
 	// artifacts left by a previous process. Nil uses the core physical-delete
 	// reconciler; frontends with different deletion semantics can override it.
 	CleanupPendingReconciler func(sessionDir string) error
+	// ApprovalTimeout bounds how long a tool-approval or ask prompt blocks for a
+	// user decision. Zero (default) waits forever — correct for an interactive
+	// terminal. Headless/bot frontends pass a positive value so an unanswered
+	// prompt can't wedge the session indefinitely (#4626, #4402).
+	ApprovalTimeout time.Duration
 }
 
 // Build loads config, resolves the model(s), and returns a Controller wrapping a
@@ -930,6 +935,7 @@ func Build(ctx context.Context, opts Options) (*control.Controller, error) {
 		DisableColdResumePrune: !cfg.ColdResumePruneEnabled(),
 		Shell:                  shell,
 		PlanModeAllowedTools:   cfg.Agent.PlanModeAllowedTools,
+		ApprovalTimeout:        opts.ApprovalTimeout,
 		OnRemember: func(rule string) control.RememberResult {
 			return rememberPermissionRule(root, rule)
 		},
diff --git a/internal/bot/gateway.go b/internal/bot/gateway.go
index d20054aca..5870a7518 100644
--- a/internal/bot/gateway.go
+++ b/internal/bot/gateway.go
@@ -22,6 +22,11 @@ type GatewayConfig struct {
 	Model              string
 	ToolApprovalMode   string
 	MaxSteps           int
+	// ApprovalTimeout bounds how long a tool-approval/ask prompt blocks a bot
+	// session waiting for a remote user's reply. Zero falls back to
+	// defaultBotApprovalTimeout so an abandoned prompt can't wedge the bot forever
+	// (#4626, #4402). A negative value disables the timeout (wait indefinitely).
+	ApprovalTimeout    time.Duration
 	WorkspaceRoot      string
 	Channels           map[Platform]ChannelConfig
 	ConnectionChannels map[string]ChannelConfig
@@ -845,12 +850,13 @@ func (gw *BotGateway) getOrCreateSession(ctx context.Context, key string, msg In
 	model, workspaceRoot, toolApprovalMode := gw.sessionOptionsForMessage(msg)
 	gw.logger.Info("bot session creating", "platform", msg.Platform, "chat_type", msg.ChatType, "chat", hashID(msg.ChatID), "session", key[:8], "model", model, "workspace_set", strings.TrimSpace(workspaceRoot) != "", "tool_approval_mode", normalizeBotToolApprovalMode(toolApprovalMode))
 	ctrl, err := boot.Build(ctx, boot.Options{
-		Model:         model,
-		MaxSteps:      gw.cfg.MaxSteps,
-		RequireKey:    true,
-		Sink:          sessionSink,
-		WorkspaceRoot: workspaceRoot,
-		SessionDir:    botSessionDir(workspaceRoot),
+		Model:           model,
+		MaxSteps:        gw.cfg.MaxSteps,
+		RequireKey:      true,
+		Sink:            sessionSink,
+		WorkspaceRoot:   workspaceRoot,
+		SessionDir:      botSessionDir(workspaceRoot),
+		ApprovalTimeout: gw.approvalTimeout(),
 	})
 	if err != nil {
 		gw.logger.Error("build controller failed", "err", err)
@@ -882,6 +888,25 @@ func ensureControllerSessionPath(ctrl *control.Controller) {
 	ctrl.SetSessionPath(agent.NewSessionPath(ctrl.SessionDir(), ctrl.Label()))
 }
 
+// defaultBotApprovalTimeout caps how long a bot session waits for a remote
+// user's approval/ask reply before treating it as denied, so an abandoned
+// prompt (or a dropped IM event) can't leave the session wedged forever
+// (#4626, #4402). 30 minutes is generous for a human reply yet bounded.
+const defaultBotApprovalTimeout = 30 * time.Minute
+
+// approvalTimeout resolves the configured bot approval wait: zero uses the
+// bounded default; a negative value opts out (wait indefinitely).
+func (gw *BotGateway) approvalTimeout() time.Duration {
+	switch {
+	case gw.cfg.ApprovalTimeout < 0:
+		return 0
+	case gw.cfg.ApprovalTimeout == 0:
+		return defaultBotApprovalTimeout
+	default:
+		return gw.cfg.ApprovalTimeout
+	}
+}
+
 func botSessionDir(workspaceRoot string) string {
 	if strings.TrimSpace(workspaceRoot) == "" {
 		return config.SessionDir()
diff --git a/internal/bot/weixin/weixin_login.go b/internal/bot/weixin/weixin_login.go
index f97e4c939..19e9db293 100644
--- a/internal/bot/weixin/weixin_login.go
+++ b/internal/bot/weixin/weixin_login.go
@@ -11,6 +11,7 @@ import (
 	"time"
 
 	"reasonix/internal/config"
+	"reasonix/internal/fileutil"
 )
 
 type savedAccount struct {
@@ -110,7 +111,8 @@ func saveAccount(accountID string, account savedAccount) error {
 	if err != nil {
 		return err
 	}
-	return os.WriteFile(path, data, 0o600)
+	// Atomic write: a truncated credentials file silently breaks login.
+	return fileutil.AtomicWriteFile(path, data, 0o600)
 }
 
 func Login(ctx context.Context, out io.Writer, timeout time.Duration) (*LoginResult, error) {
diff --git a/internal/config/config.go b/internal/config/config.go
index ced3d8da1..c002551a6 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -17,6 +17,7 @@ import (
 
 	"github.com/BurntSushi/toml"
 
+	"reasonix/internal/fileutil"
 	"reasonix/internal/netclient"
 	"reasonix/internal/provider"
 )
@@ -2780,9 +2781,12 @@ func SourcePathForRoot(root string) string {
 	return ""
 }
 
-// WriteFile writes the configuration to path as annotated TOML.
+// WriteFile writes the configuration to path as annotated TOML. The write is
+// atomic + fsynced so an interrupted write or power loss can never truncate the
+// main config into an unparseable state that leaves the app with no usable
+// models (#4615, #4708).
 func (c *Config) WriteFile(path string) error {
-	return os.WriteFile(path, []byte(RenderTOMLForScope(c, renderScopeForPath(path))), 0o644)
+	return fileutil.AtomicWriteFile(path, []byte(RenderTOMLForScope(c, renderScopeForPath(path))), 0o644)
 }
 
 // Provider returns the named provider entry.
diff --git a/internal/config/edit.go b/internal/config/edit.go
index e10de0de9..7a097f6c8 100644
--- a/internal/config/edit.go
+++ b/internal/config/edit.go
@@ -828,25 +828,7 @@ func writeConfigFile(path, body string) error {
 	if strings.TrimSpace(path) == "" {
 		return fmt.Errorf("save: empty config path")
 	}
-	dir := filepath.Dir(path)
-	if err := os.MkdirAll(dir, 0o755); err != nil {
-		return fmt.Errorf("save: create dir: %w", err)
-	}
-	tmp, err := os.CreateTemp(dir, ".reasonix.*.toml.tmp")
-	if err != nil {
-		return fmt.Errorf("save: create temp: %w", err)
-	}
-	tmpPath := tmp.Name()
-	if _, err := tmp.WriteString(body); err != nil {
-		tmp.Close()
-		os.Remove(tmpPath)
-		return fmt.Errorf("save: write: %w", err)
-	}
-	if err := tmp.Close(); err != nil {
-		os.Remove(tmpPath)
-		return fmt.Errorf("save: close temp: %w", err)
-	}
-	return fileutil.ReplaceFile(tmpPath, path)
+	return fileutil.AtomicWriteFile(path, []byte(body), 0o644)
 }
 
 func renderScopeForPath(path string) RenderScope {
diff --git a/internal/config/migrate.go b/internal/config/migrate.go
index 7bf9665a4..67b9df4e1 100644
--- a/internal/config/migrate.go
+++ b/internal/config/migrate.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/json"
 	"fmt"
+	"io"
 	"os"
 	"path/filepath"
 	"sort"
@@ -423,7 +424,15 @@ func migrateLegacyTOMLIfNeeded(dest, home string) (*MigrationResult, error) {
 		if err := cfg.WriteFile(dest); err != nil {
 			return nil, fmt.Errorf("write %s: %w", dest, err)
 		}
-		return &MigrationResult{From: src, To: dest, Plugins: len(cfg.Plugins)}, nil
+		res := &MigrationResult{From: src, To: dest, Plugins: len(cfg.Plugins)}
+		legacyDir := filepath.Dir(src)
+		newDir := filepath.Dir(dest)
+		if !samePath(legacyDir, newDir) {
+			if warnings := migrateSupportData(legacyDir, newDir); len(warnings) > 0 {
+				res.Warnings = append(res.Warnings, warnings...)
+			}
+		}
+		return res, nil
 	}
 	return nil, nil
 }
@@ -571,3 +580,84 @@ func writeCredentialsEnv(home string, lines []string) error {
 	}
 	return nil
 }
+
+func migrateSupportData(legacyDir, newDir string) []string {
+	var warnings []string
+	items := []string{"sessions", "projects", "skills", "archive", "hooks.json"}
+	for _, item := range items {
+		src := filepath.Join(legacyDir, item)
+		fi, err := os.Stat(src)
+		if err != nil {
+			if os.IsNotExist(err) {
+				continue
+			}
+			warnings = append(warnings, fmt.Sprintf("failed to read legacy item %s: %v", item, err))
+			continue
+		}
+		dst := filepath.Join(newDir, item)
+		if fi.IsDir() {
+			if err := copyDir(src, dst); err != nil {
+				warnings = append(warnings, fmt.Sprintf("failed to migrate directory %s: %v", item, err))
+			} else {
+				warnings = append(warnings, fmt.Sprintf("successfully migrated directory %s", item))
+			}
+		} else {
+			if err := copyFile(src, dst); err != nil {
+				warnings = append(warnings, fmt.Sprintf("failed to migrate file %s: %v", item, err))
+			} else {
+				warnings = append(warnings, fmt.Sprintf("successfully migrated file %s", item))
+			}
+		}
+	}
+	return warnings
+}
+
+func copyFile(src, dst string) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+
+	if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
+		return err
+	}
+
+	out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	if _, err = io.Copy(out, in); err != nil {
+		return err
+	}
+	return out.Sync()
+}
+
+func copyDir(src, dst string) error {
+	entries, err := os.ReadDir(src)
+	if err != nil {
+		return err
+	}
+
+	if err := os.MkdirAll(dst, 0o755); err != nil {
+		return err
+	}
+
+	for _, entry := range entries {
+		srcPath := filepath.Join(src, entry.Name())
+		dstPath := filepath.Join(dst, entry.Name())
+
+		if entry.IsDir() {
+			if err := copyDir(srcPath, dstPath); err != nil {
+				return err
+			}
+		} else {
+			if err := copyFile(srcPath, dstPath); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
diff --git a/internal/config/migrate_test.go b/internal/config/migrate_test.go
index 50b73377d..cb54a3850 100644
--- a/internal/config/migrate_test.go
+++ b/internal/config/migrate_test.go
@@ -753,3 +753,64 @@ func TestMigrateCustomBaseURLWarns(t *testing.T) {
 		}
 	}
 }
+
+func TestMigrateSupportData(t *testing.T) {
+	if runtime.GOOS == "windows" {
+		t.Skip("skipping since legacyOSSupportDir equals current reasonixHomeDir on Windows")
+	}
+	home := t.TempDir()
+	t.Setenv("HOME", home)
+	t.Setenv("REASONIX_CREDENTIALS_STORE", "file")
+	t.Setenv("USERPROFILE", home)
+	t.Setenv("AppData", filepath.Join(home, "AppData"))
+
+	legacyConf := legacyUserConfigPath()
+	if legacyConf == "" {
+		t.Skip("skipping because legacy config path is empty")
+	}
+	legacyDir := filepath.Dir(legacyConf)
+
+	// Write data to the legacy support directory
+	filesToWrite := map[string]string{
+		"config.toml":                  "language = \"zh\"",
+		"hooks.json":                   `{"hook":"test"}`,
+		"sessions/s1.json":             `{"id":"s1"}`,
+		"projects/p1/sessions/s2.json": `{"id":"s2"}`,
+		"skills/custom.md":             `custom skill`,
+		"archive/a1.json":              `{"compacted": true}`,
+	}
+	for rel, content := range filesToWrite {
+		path := filepath.Join(legacyDir, rel)
+		if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(path, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	res, err := MigrateLegacyIfNeeded()
+	if err != nil {
+		t.Fatalf("MigrateLegacyIfNeeded failed: %v", err)
+	}
+	if res == nil {
+		t.Fatal("expected migration result, got nil")
+	}
+
+	newDir := filepath.Dir(userConfigPath())
+	for rel, expectedContent := range filesToWrite {
+		if rel == "config.toml" {
+			continue
+		}
+		newPath := filepath.Join(newDir, rel)
+		data, err := os.ReadFile(newPath)
+		if err != nil {
+			t.Errorf("expected file %s to be migrated, but got error: %v", rel, err)
+			continue
+		}
+		if string(data) != expectedContent {
+			t.Errorf("file %s content mismatch: got %q, want %q", rel, string(data), expectedContent)
+		}
+	}
+}
+
diff --git a/internal/control/approval_e2e_test.go b/internal/control/approval_e2e_test.go
index d38ac709f..0382d2c27 100644
--- a/internal/control/approval_e2e_test.go
+++ b/internal/control/approval_e2e_test.go
@@ -5,6 +5,7 @@ import (
 	"encoding/json"
 	"sync"
 	"testing"
+	"time"
 
 	"reasonix/internal/agent"
 	"reasonix/internal/event"
@@ -90,3 +91,69 @@ func TestApprovalToolWideEndToEnd(t *testing.T) {
 		t.Errorf("executed writes = %v, want both a.txt and b.txt", writer.paths)
 	}
 }
+
+// TestApprovalTimeoutDeniesWhenUnanswered verifies a positive ApprovalTimeout
+// turns an unanswered prompt into a denial (error) instead of blocking forever
+// (#4626, #4402). Ask shares the same wait context as tool-approval prompts.
+func TestApprovalTimeoutDeniesWhenUnanswered(t *testing.T) {
+	c := New(Options{
+		Policy:         permission.New("ask", nil, nil, nil),
+		Sink:           event.Discard,
+		ApprovalTimeout: 40 * time.Millisecond,
+	})
+	c.EnableInteractiveApproval()
+
+	start := time.Now()
+	_, err := c.Ask(context.Background(), []event.AskQuestion{{ID: "q1", Prompt: "pick one"}})
+	elapsed := time.Since(start)
+
+	if err == nil {
+		t.Fatal("Ask should error when the approval timeout elapses unanswered")
+	}
+	// Must return near the timeout, not hang. Allow generous slack for CI scheduling.
+	if elapsed > 2*time.Second {
+		t.Fatalf("Ask blocked for %v; timeout should have fired near 40ms", elapsed)
+	}
+}
+
+// TestApprovalTimeoutZeroWaitsIndefinitely confirms the default (zero) keeps the
+// interactive behavior: an unanswered Ask blocks rather than timing out, so a
+// human at a terminal is never cut off.
+func TestApprovalTimeoutZeroWaitsIndefinitely(t *testing.T) {
+	c := New(Options{
+		Policy: permission.New("ask", nil, nil, nil),
+		Sink:   event.Discard,
+		// ApprovalTimeout intentionally zero (default).
+	})
+	c.EnableInteractiveApproval()
+
+	done := make(chan error, 1)
+	go func() {
+		_, err := c.Ask(context.Background(), []event.AskQuestion{{ID: "q1", Prompt: "pick one"}})
+		done <- err
+	}()
+
+	select {
+	case <-done:
+		t.Fatal("Ask with zero timeout must block until answered, not return on its own")
+	case <-time.After(120 * time.Millisecond):
+		// Good: still blocked, as expected for interactive use.
+	}
+
+	// Clean up so the goroutine doesn't linger: answer the prompt.
+	c.mu.Lock()
+	var ids []string
+	for id := range c.asks {
+		ids = append(ids, id)
+	}
+	c.mu.Unlock()
+
+	for _, id := range ids {
+		c.AnswerQuestion(id, []event.AskAnswer{{QuestionID: "q1", Selected: []string{"x"}}})
+	}
+	select {
+	case <-done:
+	case <-time.After(2 * time.Second):
+		t.Fatal("Ask did not unblock after answering")
+	}
+}
diff --git a/internal/control/controller.go b/internal/control/controller.go
index 47337fad7..8a5000654 100644
--- a/internal/control/controller.go
+++ b/internal/control/controller.go
@@ -54,6 +54,12 @@ import (
 // while one is already active in the same Controller.
 var ErrTurnRunning = errors.New("turn already running")
 
+// errNoSessionPath is returned by snapshot when a session has content to persist
+// but no resolved session path — a misconfiguration (e.g. an unresolvable data
+// dir in a bot deployment) that previously dropped conversations silently
+// (#4414). Callers log it and continue; it must never be swallowed quietly.
+var errNoSessionPath = errors.New("session has content but no session path; conversation cannot be persisted")
+
 // Controller drives one chat session. Construct with New; drive with the command
 // methods; observe through the Sink passed in Options.
 type Controller struct {
@@ -128,6 +134,13 @@ type Controller struct {
 	// the blocking wait, so it must never be taken by the Approve/Answer paths.
 	promptMu sync.Mutex
 
+	// approvalTimeout bounds how long requestApproval/AnswerQuestion block waiting
+	// for a user decision. Zero (the default) means wait indefinitely, which is
+	// correct for an interactive terminal where the user is present. Bot/headless
+	// frontends set it so an unanswered approval can't wedge the session forever
+	// when the user has walked away (#4626, #4402).
+	approvalTimeout time.Duration
+
 	// mu guards the run state and approval bookkeeping; every critical section
 	// under it is short and non-blocking.
 	mu               sync.Mutex
@@ -321,6 +334,11 @@ type Options struct {
 	// PlanModeAllowedTools names tools exempt from the plan-mode read-only gate.
 	// Passed through to the executor agent so user-configured exceptions work.
 	PlanModeAllowedTools []string
+	// ApprovalTimeout bounds how long a tool-approval or ask prompt blocks waiting
+	// for a user decision. Zero (default) waits forever — right for an interactive
+	// terminal. Bot/headless frontends set a positive value so an unanswered
+	// prompt can't wedge the session indefinitely (#4626, #4402).
+	ApprovalTimeout time.Duration
 }
 
 // New builds a Controller. A nil Sink is replaced with event.Discard.
@@ -370,6 +388,7 @@ func New(opts Options) *Controller {
 		pluginCtx:              pluginCtx,
 		cpRoot:                 opts.WorkspaceRoot,
 		toolApprovalMode:       ToolApprovalAsk,
+		approvalTimeout:        opts.ApprovalTimeout,
 		approvals:              map[string]pendingApproval{},
 		asks:                   map[string]pendingAsk{},
 		granted:                map[string]bool{},
@@ -1718,14 +1737,17 @@ func (c *Controller) Ask(ctx context.Context, questions []event.AskQuestion) ([]
 
 	c.sink.Emit(event.Event{Kind: event.AskRequest, Ask: event.Ask{ID: id, Questions: questions}})
 
+	waitCtx, cancelWait := c.approvalWaitContext(ctx)
+	defer cancelWait()
+
 	select {
 	case ans := <-reply:
 		return ans, nil
-	case <-ctx.Done():
+	case <-waitCtx.Done():
 		c.mu.Lock()
 		delete(c.asks, id)
 		c.mu.Unlock()
-		return nil, ctx.Err()
+		return nil, waitCtx.Err()
 	}
 }
 
@@ -2460,9 +2482,10 @@ func (c *Controller) maybeColdResumePrune(path string) {
 }
 
 // Snapshot writes the executor's conversation to the active session file. No-op
-// when persistence is unavailable or the session has never been used (no user
-// interaction). Called after every turn so a crash loses at most one in-flight
-// prompt.
+// when the executor is absent or the session has never been used (no user
+// interaction). Returns errNoSessionPath when there IS content but no resolved
+// path, so a misconfigured deployment surfaces instead of dropping data.
+// Called after every turn so a crash loses at most one in-flight prompt.
 func (c *Controller) Snapshot() error {
 	return c.snapshot(false)
 }
@@ -2505,13 +2528,23 @@ func (c *Controller) snapshot(markActivity bool) error {
 	path := c.sessionPath
 	modelRef := c.modelRef
 	c.mu.Unlock()
-	if c.executor == nil || path == "" {
+	if c.executor == nil {
 		return nil
 	}
 	s := c.executor.Session()
 	if !s.HasContent() {
+		// Nothing to persist yet (e.g. a fresh session with only a system
+		// prompt) — staying quiet here is correct, not a data-loss path.
 		return nil
 	}
+	if path == "" {
+		// There IS content but nowhere to write it: this silently dropped whole
+		// bot conversations (#4414). Surface it loudly instead of returning nil
+		// so the missing session path can be diagnosed and fixed at the source.
+		slog.Warn("controller: session has content but no session path; conversation will not be persisted",
+			"label", c.Label(), "session_dir", c.SessionDir())
+		return errNoSessionPath
+	}
 	if !markActivity {
 		if _, err := agent.EnsureBranchMeta(path); err != nil {
 			return err
@@ -3730,6 +3763,17 @@ func parseRewind(args string, cps []checkpoint.Meta) (int, RewindScope, error) {
 	return turn, scope, nil
 }
 
+// approvalWaitContext returns the context the approval/ask wait blocks on. When
+// approvalTimeout is zero it just forwards ctx (interactive: wait forever). When
+// positive it layers a timeout so a headless/bot session can't hang on a prompt
+// nobody will answer (#4626, #4402); the caller treats expiry as a denial.
+func (c *Controller) approvalWaitContext(ctx context.Context) (context.Context, context.CancelFunc) {
+	if c.approvalTimeout <= 0 {
+		return ctx, func() {}
+	}
+	return context.WithTimeout(ctx, c.approvalTimeout)
+}
+
 // requestApproval emits an ApprovalRequest and blocks until Approve(ID, …)
 // answers or ctx is cancelled. A prior session grant for the same approval scope
 // short-circuits. promptMu serialises outstanding prompts.
@@ -3768,6 +3812,9 @@ func (c *Controller) requestApproval(ctx context.Context, tool, subject string,
 	// external channel (desktop notice, phone) while the run blocks on the reply.
 	go c.hooks.Notification(ctx, approvalNotificationText(tool, subject))
 
+	waitCtx, cancelWait := c.approvalWaitContext(ctx)
+	defer cancelWait()
+
 	select {
 	case r := <-reply:
 		// Plan approvals are one-shot — never persist a session grant for them, or
@@ -3782,11 +3829,11 @@ func (c *Controller) requestApproval(ctx context.Context, tool, subject string,
 			c.emitRememberResult(c.onRemember(permission.RememberRuleForScope(tool, subject)))
 		}
 		return r.allow, false, nil
-	case <-ctx.Done():
+	case <-waitCtx.Done():
 		c.mu.Lock()
 		delete(c.approvals, id)
 		c.mu.Unlock()
-		return false, false, ctx.Err()
+		return false, false, waitCtx.Err()
 	}
 }
 
diff --git a/internal/fileutil/atomicwrite.go b/internal/fileutil/atomicwrite.go
index 0a6a02eed..ed50a1815 100644
--- a/internal/fileutil/atomicwrite.go
+++ b/internal/fileutil/atomicwrite.go
@@ -1,7 +1,9 @@
 package fileutil
 
 import (
+	"fmt"
 	"os"
+	"path/filepath"
 	"time"
 )
 
@@ -10,6 +12,42 @@ var (
 	replaceRetryBase  = 20 * time.Millisecond
 )
 
+// AtomicWriteFile writes data to path crash-safely: it writes to a sibling tmp
+// file, fsyncs it so the bytes reach disk (guarding against power loss, not just
+// process crash — see #4615), then atomically renames it onto path via
+// ReplaceFile. A crash or power cut at any point leaves either the old file or
+// the complete new file, never a truncated one. perm applies to the final file.
+func AtomicWriteFile(path string, data []byte, perm os.FileMode) error {
+	dir := filepath.Dir(path)
+	if err := os.MkdirAll(dir, 0o755); err != nil {
+		return fmt.Errorf("create dir for %s: %w", path, err)
+	}
+	tmp, err := os.CreateTemp(dir, ".atomic-*.tmp")
+	if err != nil {
+		return fmt.Errorf("create tmp for %s: %w", path, err)
+	}
+	tmpPath := tmp.Name()
+	if _, err := tmp.Write(data); err != nil {
+		tmp.Close()
+		os.Remove(tmpPath)
+		return fmt.Errorf("write tmp for %s: %w", path, err)
+	}
+	if err := tmp.Sync(); err != nil {
+		tmp.Close()
+		os.Remove(tmpPath)
+		return fmt.Errorf("fsync tmp for %s: %w", path, err)
+	}
+	if err := tmp.Close(); err != nil {
+		os.Remove(tmpPath)
+		return fmt.Errorf("close tmp for %s: %w", path, err)
+	}
+	if err := os.Chmod(tmpPath, perm); err != nil {
+		os.Remove(tmpPath)
+		return fmt.Errorf("chmod tmp for %s: %w", path, err)
+	}
+	return ReplaceFile(tmpPath, path)
+}
+
 // ReplaceFile renames tmp onto dest, falling back to a copy when the rename
 // fails — Windows encryption-software filter drivers report a cross-device link
 // (EXDEV) for a same-dir rename. A second Windows failure mode is a transient
diff --git a/internal/fileutil/atomicwrite_test.go b/internal/fileutil/atomicwrite_test.go
index 3d2cb64dc..c35b13a8d 100644
--- a/internal/fileutil/atomicwrite_test.go
+++ b/internal/fileutil/atomicwrite_test.go
@@ -87,3 +87,46 @@ func TestCopyOntoOverwritesAndPreservesMode(t *testing.T) {
 		t.Logf("dest mode = %o (want 0600 on Unix)", info.Mode().Perm())
 	}
 }
+
+func TestAtomicWriteFileReplacesExisting(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "config.toml")
+	if err := os.WriteFile(path, []byte("old"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if err := AtomicWriteFile(path, []byte("new-content"), 0o600); err != nil {
+		t.Fatalf("AtomicWriteFile: %v", err)
+	}
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if string(got) != "new-content" {
+		t.Fatalf("content = %q, want %q", got, "new-content")
+	}
+	info, err := os.Stat(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if perm := info.Mode().Perm(); perm != 0o600 {
+		t.Fatalf("perm = %o, want 600", perm)
+	}
+	// No leftover tmp files in the directory.
+	entries, _ := os.ReadDir(dir)
+	for _, e := range entries {
+		if e.Name() != "config.toml" {
+			t.Fatalf("unexpected leftover file: %s", e.Name())
+		}
+	}
+}
+
+func TestAtomicWriteFileCreatesParentDir(t *testing.T) {
+	dir := t.TempDir()
+	path := filepath.Join(dir, "nested", "deep", "creds")
+	if err := AtomicWriteFile(path, []byte("x"), 0o600); err != nil {
+		t.Fatalf("AtomicWriteFile into missing dir: %v", err)
+	}
+	if _, err := os.Stat(path); err != nil {
+		t.Fatalf("file not created: %v", err)
+	}
+}
diff --git a/internal/plugin/codegraph_limit.go b/internal/plugin/codegraph_limit.go
new file mode 100644
index 000000000..15b33931f
--- /dev/null
+++ b/internal/plugin/codegraph_limit.go
@@ -0,0 +1,35 @@
+package plugin
+
+import (
+	"fmt"
+	"sync/atomic"
+)
+
+// maxCodeGraphInstances caps how many CodeGraph indexer subprocesses may run
+// concurrently across the whole process. Each tab/session boots its own plugin
+// Host (desktop/tabs.go, boot.go), so opening many tabs on large projects used
+// to spawn one full-tree indexer per tab — exhausting file descriptors and
+// freezing the machine (#4361, #2992, #3797). A hard cap bounds the blast
+// radius: once reached, additional CodeGraph instances refuse to start and the
+// agent degrades to grep/glob, which is the documented manual workaround
+// (`[codegraph] enabled = false`) applied automatically instead of a crash.
+const maxCodeGraphInstances = 4
+
+var liveCodeGraphInstances atomic.Int32
+
+// acquireCodeGraphSlot reserves one of the bounded CodeGraph instance slots.
+// It returns a release func (idempotent) and an error when the cap is reached.
+// The cap only governs CodeGraph; every other stdio plugin is unaffected.
+func acquireCodeGraphSlot() (func(), error) {
+	n := liveCodeGraphInstances.Add(1)
+	if n > maxCodeGraphInstances {
+		liveCodeGraphInstances.Add(-1)
+		return nil, fmt.Errorf("codegraph: %d instances already running (cap %d); not starting another to avoid file-descriptor exhaustion — close some tabs/sessions or set codegraph off for this one", n-1, maxCodeGraphInstances)
+	}
+	var released atomic.Bool
+	return func() {
+		if released.CompareAndSwap(false, true) {
+			liveCodeGraphInstances.Add(-1)
+		}
+	}, nil
+}
diff --git a/internal/plugin/codegraph_limit_test.go b/internal/plugin/codegraph_limit_test.go
new file mode 100644
index 000000000..ad6fdbb32
--- /dev/null
+++ b/internal/plugin/codegraph_limit_test.go
@@ -0,0 +1,46 @@
+package plugin
+
+import "testing"
+
+func TestAcquireCodeGraphSlotCapsInstances(t *testing.T) {
+	// Drain any residual count from parallel tests by asserting a clean start.
+	if got := liveCodeGraphInstances.Load(); got != 0 {
+		t.Fatalf("expected 0 live instances at start, got %d", got)
+	}
+
+	releases := make([]func(), 0, maxCodeGraphInstances)
+	for i := 0; i < maxCodeGraphInstances; i++ {
+		release, err := acquireCodeGraphSlot()
+		if err != nil {
+			t.Fatalf("acquire %d within cap should succeed: %v", i, err)
+		}
+		releases = append(releases, release)
+	}
+
+	// One past the cap must be refused, and the counter must not leak upward.
+	if _, err := acquireCodeGraphSlot(); err == nil {
+		t.Fatal("acquiring past the cap should fail")
+	}
+	if got := liveCodeGraphInstances.Load(); got != int32(maxCodeGraphInstances) {
+		t.Fatalf("count must stay at cap after a refused acquire, got %d", got)
+	}
+
+	// Releasing frees a slot, and release is idempotent.
+	releases[0]()
+	releases[0]()
+	if got := liveCodeGraphInstances.Load(); got != int32(maxCodeGraphInstances-1) {
+		t.Fatalf("double-release must free exactly one slot, got %d", got)
+	}
+	release, err := acquireCodeGraphSlot()
+	if err != nil {
+		t.Fatalf("a freed slot should be reusable: %v", err)
+	}
+	releases[0] = release
+
+	for _, r := range releases {
+		r()
+	}
+	if got := liveCodeGraphInstances.Load(); got != 0 {
+		t.Fatalf("all slots must be freed at end, got %d", got)
+	}
+}
diff --git a/internal/plugin/known_overrides.go b/internal/plugin/known_overrides.go
index f931b681b..832f4b5c6 100644
--- a/internal/plugin/known_overrides.go
+++ b/internal/plugin/known_overrides.go
@@ -21,6 +21,11 @@ func ApplyKnownOverrides(s Spec, workspaceRoot string) Spec {
 			}
 			s.Env = mergeDefaultEnv(s.Env, codeGraphDaemonIdleTimeoutEnv, codeGraphDaemonIdleTimeoutDefaultMS)
 		}
+		// CodeGraph does full-tree indexing + file-watching; run it below normal
+		// scheduling priority so a background indexer can never starve the user's
+		// machine (#3797, #2992). The proc-level mechanism already exists but was
+		// never wired to the spec, so it stayed disabled.
+		s.LowPriority = true
 	}
 	return s
 }
diff --git a/internal/plugin/lazy.go b/internal/plugin/lazy.go
index ff6c6a678..69a3be004 100644
--- a/internal/plugin/lazy.go
+++ b/internal/plugin/lazy.go
@@ -222,10 +222,14 @@ func (lt *lazyTool) Execute(ctx context.Context, args json.RawMessage) (string,
 			return "", fmt.Errorf("MCP server %q is initializing on first use — call again on the next turn for its real tools", sp.spec.Name)
 		}
 		// Cache-hit: run the handshake synchronously so this one Execute can
-		// forward through.
+		// forward through. Bound it with a start timeout so a wedged or
+		// unreachable MCP server can't hang the whole turn indefinitely
+		// (#4806) — on timeout we fail this attempt and a later turn can retry.
 		sp.state = spawnInFlight
 		sp.mu.Unlock()
-		real, err := sp.host.Add(sp.ctx, sp.spec)
+		spawnCtx, cancel := context.WithTimeout(sp.ctx, defaultStartTimeout)
+		real, err := sp.host.AddWithLifecycle(sp.ctx, spawnCtx, sp.spec)
+		cancel()
 		sp.mu.Lock()
 		defer sp.mu.Unlock()
 		if err != nil {
diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go
index d37f27895..71a029b16 100644
--- a/internal/plugin/plugin.go
+++ b/internal/plugin/plugin.go
@@ -751,7 +751,21 @@ func (h *Host) Add(ctx context.Context, s Spec) ([]tool.Tool, error) {
 	return tools, err
 }
 
+// AddWithLifecycle connects one server live, allowing caller to specify separate
+// contexts for the subprocess lifecycle (lifeCtx, session-scoped) and the startup
+// handshake/list calls (callCtx, turn-scoped/timeout-bound).
+func (h *Host) AddWithLifecycle(lifeCtx, callCtx context.Context, s Spec) ([]tool.Tool, error) {
+	if h.has(s.Name) {
+		return nil, serverAlreadyConnectedError(s.Name)
+	}
+	return h.addConnectedWithLifecycle(lifeCtx, callCtx, s)
+}
+
 func (h *Host) addConnected(ctx context.Context, s Spec) ([]tool.Tool, error) {
+	return h.addConnectedWithLifecycle(ctx, ctx, s)
+}
+
+func (h *Host) addConnectedWithLifecycle(lifeCtx, callCtx context.Context, s Spec) ([]tool.Tool, error) {
 	h.mu.RLock()
 	if h.closed {
 		h.mu.RUnlock()
@@ -759,11 +773,11 @@ func (h *Host) addConnected(ctx context.Context, s Spec) ([]tool.Tool, error) {
 	}
 	h.mu.RUnlock()
 
-	c, err := start(ctx, ctx, s)
+	c, err := start(lifeCtx, callCtx, s)
 	if err != nil {
 		return nil, err
 	}
-	ts, err := c.listTools(ctx)
+	ts, err := c.listTools(callCtx)
 	if err != nil {
 		c.close()
 		return nil, fmt.Errorf("list tools: %w", err)
@@ -783,15 +797,15 @@ func (h *Host) addConnected(ctx context.Context, s Spec) ([]tool.Tool, error) {
 	h.clients = append(h.clients, c)
 	h.clearFailure(s.Name)
 	h.mu.Unlock()
-	// Prompts and resources stream in on the long ctx the caller passed (Host.Add
+	// Prompts and resources stream in on the long lifeCtx the caller passed (Host.Add
 	// uses the session-scoped PluginCtx, not a per-turn ctx), so the slow list
 	// calls cannot starve a /mcp add of its return value. nil sink keeps hot-add
 	// quiet — the chat UI re-queries Host.Prompts()/Resources() on demand.
 	if c.hasPrompts {
-		go h.fetchPrompts(ctx, c, nil)
+		go h.fetchPrompts(lifeCtx, c, nil)
 	}
 	if c.hasResources {
-		go h.fetchResources(ctx, c, nil)
+		go h.fetchResources(lifeCtx, c, nil)
 	}
 	return ts, nil
 }
diff --git a/internal/plugin/transport_stdio.go b/internal/plugin/transport_stdio.go
index 18104c511..51e31833f 100644
--- a/internal/plugin/transport_stdio.go
+++ b/internal/plugin/transport_stdio.go
@@ -50,13 +50,29 @@ type stdioTransport struct {
 	pending map[int]chan rpcResponse
 	readErr error // set once the reader goroutine exits; further calls fail fast
 
-	waitOnce sync.Once
+	waitOnce    sync.Once
+	releaseSlot func() // returns a bounded instance slot (e.g. CodeGraph) on close; nil when unbounded
 }
 
 func newStdioTransport(ctx context.Context, s Spec) (*stdioTransport, error) {
 	if strings.TrimSpace(s.Command) == "" {
 		return nil, fmt.Errorf("stdio plugin %q: command is required", s.Name)
 	}
+	var releaseSlot func()
+	if isCodeGraphSpecName(s.Name) {
+		release, err := acquireCodeGraphSlot()
+		if err != nil {
+			return nil, err
+		}
+		releaseSlot = release
+	}
+	defer func() {
+		// Release the reserved slot if construction fails before the transport
+		// takes ownership of it (set to nil on the success path below).
+		if releaseSlot != nil {
+			releaseSlot()
+		}
+	}()
 	env := mergeEnv(os.Environ(), s.Env)
 	exe, env, err := resolveStdioExecutable(ctx, s, env)
 	if err != nil {
@@ -93,14 +109,16 @@ func newStdioTransport(ctx context.Context, s Spec) (*stdioTransport, error) {
 		proc.LowPriorityStarted(cmd)
 	}
 	t := &stdioTransport{
-		name:    s.Name,
-		cmd:     cmd,
-		job:     job,
-		stdin:   stdin,
-		stdout:  bufio.NewReader(stdout),
-		stderr:  stderr,
-		pending: map[int]chan rpcResponse{},
-	}
+		name:        s.Name,
+		cmd:         cmd,
+		job:         job,
+		stdin:       stdin,
+		stdout:      bufio.NewReader(stdout),
+		stderr:      stderr,
+		pending:     map[int]chan rpcResponse{},
+		releaseSlot: releaseSlot,
+	}
+	releaseSlot = nil // ownership transferred to t; close() releases it
 	go t.readLoop()
 	return t, nil
 }
@@ -551,6 +569,9 @@ func (t *stdioTransport) wait() {
 // blocking forever) and reaps it under a budget so one wedged server can never
 // stall a boot or a turn teardown.
 func (t *stdioTransport) close() {
+	if t.releaseSlot != nil {
+		t.releaseSlot() // idempotent; frees the bounded CodeGraph instance slot
+	}
 	if t.stdin != nil {
 		_ = t.stdin.Close()
 	}

From 4bf191962b78a0c6ec328b4fe48fd764b2832534 Mon Sep 17 00:00:00 2001
From: Teng Yanxi <151488904+Xio-Shark@users.noreply.github.com>
Date: Fri, 19 Jun 2026 02:44:59 +0800
Subject: [PATCH 02/10] fix: resolve temp file leaks, HljsDiff virtualization
 bugs, and tighten credential dir permissions

---
 .../src/components/editors/HljsDiff.tsx       | 40 ++++++++++---------
 internal/agent/branch.go                      |  6 ++-
 internal/agent/save.go                        |  6 ++-
 internal/config/credentials.go                |  2 +-
 internal/config/mcpjson.go                    |  6 ++-
 internal/fileutil/atomicwrite.go              | 12 +++++-
 6 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/desktop/frontend/src/components/editors/HljsDiff.tsx b/desktop/frontend/src/components/editors/HljsDiff.tsx
index a4969b6d3..ffb1628d0 100644
--- a/desktop/frontend/src/components/editors/HljsDiff.tsx
+++ b/desktop/frontend/src/components/editors/HljsDiff.tsx
@@ -47,8 +47,8 @@ export default function HljsDiff({ original = "", modified = "", diff = "", lang
       className="diff hljs"
       style={{
         maxHeight: maxHeight || undefined,
-        overflow: maxHeight ? "auto" : undefined,
-        position: maxHeight ? "relative" : undefined,
+        overflow: (maxHeight || isVirtual) ? "auto" : undefined,
+        position: (maxHeight || isVirtual) ? "relative" : undefined,
       }}
     >
       {isVirtual ? (
@@ -60,22 +60,26 @@ export default function HljsDiff({ original = "", modified = "", diff = "", lang
             position: "relative",
           }}
         >
-          {virtualizer.getVirtualItems().map((row) => (
-            <div
-              key={row.key}
-              data-index={row.index}
-              ref={virtualizer.measureElement}
-              style={{
-                position: "absolute",
-                top: 0,
-                left: 0,
-                width: "100%",
-                transform: `translateY(${row.start}px)`,
-              }}
-            >
-              {renderRow(rows[row.index], row.index)}
-            </div>
-          ))}
+          {virtualizer.getVirtualItems().map((row) => {
+            const item = rows[row.index];
+            if (!item) return null;
+            return (
+              <div
+                key={row.key}
+                data-index={row.index}
+                ref={virtualizer.measureElement}
+                style={{
+                  position: "absolute",
+                  top: 0,
+                  left: 0,
+                  width: "100%",
+                  transform: `translateY(${row.start}px)`,
+                }}
+              >
+                {renderRow(item, row.index)}
+              </div>
+            );
+          })}
         </div>
       ) : (
         <div className="diff__table">
diff --git a/internal/agent/branch.go b/internal/agent/branch.go
index 356670057..c8252782d 100644
--- a/internal/agent/branch.go
+++ b/internal/agent/branch.go
@@ -134,7 +134,11 @@ func saveBranchMeta(sessionPath string, m BranchMeta, touchUpdated bool) error {
 		os.Remove(tmpPath)
 		return err
 	}
-	return fileutil.ReplaceFile(tmpPath, metaPath)
+	if err := fileutil.ReplaceFile(tmpPath, metaPath); err != nil {
+		os.Remove(tmpPath)
+		return err
+	}
+	return nil
 }
 
 func EnsureBranchMeta(sessionPath string) (BranchMeta, error) {
diff --git a/internal/agent/save.go b/internal/agent/save.go
index a7d1a8176..319705e04 100644
--- a/internal/agent/save.go
+++ b/internal/agent/save.go
@@ -48,7 +48,11 @@ func (s *Session) Save(path string) error {
 		os.Remove(tmpPath)
 		return err
 	}
-	return fileutil.ReplaceFile(tmpPath, path)
+	if err := fileutil.ReplaceFile(tmpPath, path); err != nil {
+		os.Remove(tmpPath)
+		return err
+	}
+	return nil
 }
 
 // LoadSession reads a JSONL file written by Save into a fresh Session value.
diff --git a/internal/config/credentials.go b/internal/config/credentials.go
index 0f27b5155..16aada767 100644
--- a/internal/config/credentials.go
+++ b/internal/config/credentials.go
@@ -644,7 +644,7 @@ func writeCredentialFileLines(path string, lines []string) error {
 	}
 	dir := filepath.Dir(path)
 	if dir != "" && dir != "." {
-		if err := os.MkdirAll(dir, 0o755); err != nil {
+		if err := os.MkdirAll(dir, 0o700); err != nil {
 			return err
 		}
 	}
diff --git a/internal/config/mcpjson.go b/internal/config/mcpjson.go
index becefcc14..ebafc11d8 100644
--- a/internal/config/mcpjson.go
+++ b/internal/config/mcpjson.go
@@ -450,5 +450,9 @@ func writeMCPJSON(path string, root map[string]json.RawMessage) error {
 		os.Remove(tmpPath)
 		return fmt.Errorf("mcp config %s: close temp: %w", path, err)
 	}
-	return fileutil.ReplaceFile(tmpPath, path)
+	if err := fileutil.ReplaceFile(tmpPath, path); err != nil {
+		os.Remove(tmpPath)
+		return err
+	}
+	return nil
 }
diff --git a/internal/fileutil/atomicwrite.go b/internal/fileutil/atomicwrite.go
index ed50a1815..b322f2077 100644
--- a/internal/fileutil/atomicwrite.go
+++ b/internal/fileutil/atomicwrite.go
@@ -19,7 +19,11 @@ var (
 // the complete new file, never a truncated one. perm applies to the final file.
 func AtomicWriteFile(path string, data []byte, perm os.FileMode) error {
 	dir := filepath.Dir(path)
-	if err := os.MkdirAll(dir, 0o755); err != nil {
+	dirPerm := os.FileMode(0o755)
+	if perm&0o077 == 0 {
+		dirPerm = 0o700
+	}
+	if err := os.MkdirAll(dir, dirPerm); err != nil {
 		return fmt.Errorf("create dir for %s: %w", path, err)
 	}
 	tmp, err := os.CreateTemp(dir, ".atomic-*.tmp")
@@ -45,7 +49,11 @@ func AtomicWriteFile(path string, data []byte, perm os.FileMode) error {
 		os.Remove(tmpPath)
 		return fmt.Errorf("chmod tmp for %s: %w", path, err)
 	}
-	return ReplaceFile(tmpPath, path)
+	if err := ReplaceFile(tmpPath, path); err != nil {
+		os.Remove(tmpPath)
+		return err
+	}
+	return nil
 }
 
 // ReplaceFile renames tmp onto dest, falling back to a copy when the rename

From ce0b46089fd511895e851664668a22f9102d51d5 Mon Sep 17 00:00:00 2001
From: Diarica <3501108459@qq.com>
Date: Mon, 15 Jun 2026 09:42:35 +0800
Subject: [PATCH 03/10] fix: don't truncate MCP paths containing spaces

Two code paths were splitting MCP server commands on whitespace
without the shouldSplitPluginCommand guard that NormalizePluginCommandLine
has, which truncated any command path with spaces.

1. Frontend AddServerForm / EditServerForm (CapabilitiesPanel.tsx):
   split(/\s+/) on the command field before sending to the Go backend.
   A path like "C:\Program Files\MyApp\server.exe" became command
   "C:\Program" with args ["Files\MyApp\server.exe"].

2. Legacy config parser (mcpjson.go parseLegacyMCPSpec):
   Always split the body on spaces.  The v0.x config.json format uses
   "name=command args..." lines; for paths with spaces like
   "ida=C:\PersonalData\Software\Learn\IDA Pro\ida_bridge.bat", this
   truncated the command to "C:\PersonalData\Software\Learn\IDA".

Fix: in the frontend, pass the full command string through and let
NormalizePluginCommandLine handle the split on the Go side.  In
parseLegacyMCPSpec, only split when shouldSplitPluginCommand returns
true (known runner like npx/node, or quoted command), otherwise keep
the full body as the command.

Fixes e.g. IDA Pro MCP server configuration where the binary path
contains spaces.
---
 desktop/frontend/src/components/CapabilitiesPanel.tsx | 10 ++++------
 internal/config/mcpjson.go                            |  5 ++++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/desktop/frontend/src/components/CapabilitiesPanel.tsx b/desktop/frontend/src/components/CapabilitiesPanel.tsx
index 7836cd4ca..ccd5594af 100644
--- a/desktop/frontend/src/components/CapabilitiesPanel.tsx
+++ b/desktop/frontend/src/components/CapabilitiesPanel.tsx
@@ -1101,13 +1101,12 @@ function EditServerForm({
   const ready = isStdio ? command.trim() !== "" : url.trim() !== "";
 
   const submit = () => {
-    const parts = command.trim().split(/\s+/).filter(Boolean);
     const envText = env.trim();
     onSave({
       name: s.name,
       transport,
-      command: isStdio ? (parts[0] ?? "") : "",
-      args: isStdio ? parts.slice(1) : [],
+      command: isStdio ? command.trim() : "",
+      args: [],
       url: isStdio ? "" : url.trim(),
       env: envText === "" ? null : parseEnvText(envText),
     });
@@ -1407,7 +1406,6 @@ function AddServerForm({
   const ready = name.trim() !== "" && (isStdio ? command.trim() !== "" : url.trim() !== "");
 
   const submit = () => {
-    const parts = command.trim().split(/\s+/).filter(Boolean);
     const envMap: Record<string, string> = {};
     for (const line of env.split("\n")) {
       const eq = line.indexOf("=");
@@ -1416,8 +1414,8 @@ function AddServerForm({
     onAdd({
       name: name.trim(),
       transport,
-      command: isStdio ? (parts[0] ?? "") : "",
-      args: isStdio ? parts.slice(1) : [],
+      command: isStdio ? command.trim() : "",
+      args: [],
       url: isStdio ? "" : url.trim(),
       env: envMap,
     });
diff --git a/internal/config/mcpjson.go b/internal/config/mcpjson.go
index ebafc11d8..e06f12f8d 100644
--- a/internal/config/mcpjson.go
+++ b/internal/config/mcpjson.go
@@ -172,7 +172,10 @@ func parseLegacyMCPSpec(raw string) (PluginEntry, bool) {
 	if !ok || len(parts) == 0 {
 		return PluginEntry{}, false
 	}
-	return PluginEntry{Name: name, Command: parts[0], Args: parts[1:]}, true
+	if shouldSplitPluginCommand(body, parts[0]) {
+		return PluginEntry{Name: name, Command: parts[0], Args: parts[1:]}, true
+	}
+	return PluginEntry{Name: name, Command: body}, true
 }
 
 // anonymousMCPName names a v0.x spec that carried no name= prefix (its tools

From 425445e73db19803883ccf685b2759c762c56881 Mon Sep 17 00:00:00 2001
From: cyq <15000851237@163.com>
Date: Thu, 18 Jun 2026 12:44:14 +0800
Subject: [PATCH 04/10] fix(plugin): reinitialize expired HTTP MCP sessions

---
 internal/plugin/plugin.go              | 12 ++++
 internal/plugin/transport_http.go      | 33 ++++++++-
 internal/plugin/transport_http_test.go | 99 ++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go
index 71a029b16..6ced5f096 100644
--- a/internal/plugin/plugin.go
+++ b/internal/plugin/plugin.go
@@ -896,6 +896,13 @@ func newTransport(ctx context.Context, s Spec) (transport, error) {
 }
 
 func (c *Client) call(ctx context.Context, method string, params any) (json.RawMessage, error) {
+	res, err := c.t.call(ctx, method, params)
+	if err == nil || method == "initialize" || !isHTTPSessionExpired(err) {
+		return res, err
+	}
+	if initErr := c.initialize(ctx); initErr != nil {
+		return nil, fmt.Errorf("%w; reinitialize failed: %v", err, initErr)
+	}
 	return c.t.call(ctx, method, params)
 }
 
@@ -905,6 +912,11 @@ func (c *Client) notify(ctx context.Context, method string, params any) error {
 
 func (c *Client) close() { c.t.close() }
 
+func isHTTPSessionExpired(err error) bool {
+	var expired *httpSessionExpiredError
+	return errors.As(err, &expired)
+}
+
 func (c *Client) initialize(ctx context.Context) error {
 	res, err := c.call(ctx, "initialize", map[string]any{
 		"protocolVersion": protocolVersion,
diff --git a/internal/plugin/transport_http.go b/internal/plugin/transport_http.go
index 7a1c873e0..905e3f26e 100644
--- a/internal/plugin/transport_http.go
+++ b/internal/plugin/transport_http.go
@@ -69,7 +69,15 @@ func (t *httpTransport) call(ctx context.Context, method string, params any) (js
 
 	if resp.StatusCode/100 != 2 {
 		b, _ := io.ReadAll(io.LimitReader(resp.Body, 4096))
-		return nil, fmt.Errorf("plugin %q: %s: http %d: %s", t.name, method, resp.StatusCode, strings.TrimSpace(string(b)))
+		msg := strings.TrimSpace(string(b))
+		if isHTTPSessionExpiredResponse(resp.StatusCode, b) {
+			t.session = ""
+			return nil, fmt.Errorf("plugin %q: %s: %w", t.name, method, &httpSessionExpiredError{
+				status: resp.StatusCode,
+				body:   msg,
+			})
+		}
+		return nil, fmt.Errorf("plugin %q: %s: http %d: %s", t.name, method, resp.StatusCode, msg)
 	}
 
 	if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/event-stream") {
@@ -127,6 +135,29 @@ func (t *httpTransport) captureSession(resp *http.Response) {
 	}
 }
 
+type httpSessionExpiredError struct {
+	status int
+	body   string
+}
+
+func (e *httpSessionExpiredError) Error() string {
+	if e.body == "" {
+		return fmt.Sprintf("http %d: MCP session expired", e.status)
+	}
+	return fmt.Sprintf("http %d: %s", e.status, e.body)
+}
+
+func isHTTPSessionExpiredResponse(status int, body []byte) bool {
+	if status != http.StatusNotFound {
+		return false
+	}
+	var resp rpcResponse
+	if err := json.Unmarshal(bytes.TrimSpace(body), &resp); err != nil || resp.Error == nil {
+		return false
+	}
+	return resp.Error.Code == -32001 && strings.Contains(strings.ToLower(resp.Error.Message), "session not found")
+}
+
 // readSSEResponse scans an SSE stream for the JSON-RPC response matching id,
 // skipping server notifications and any other-id messages. Per the SSE spec,
 // consecutive data: lines within one event are joined with "\n" and an event is
diff --git a/internal/plugin/transport_http_test.go b/internal/plugin/transport_http_test.go
index 4ef285a3a..e37c9dcc6 100644
--- a/internal/plugin/transport_http_test.go
+++ b/internal/plugin/transport_http_test.go
@@ -7,6 +7,7 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"strings"
+	"sync/atomic"
 	"testing"
 	"time"
 
@@ -121,6 +122,94 @@ func runHTTPTransportTest(t *testing.T, sse bool) {
 func TestHTTPTransportJSON(t *testing.T) { runHTTPTransportTest(t, false) }
 func TestHTTPTransportSSE(t *testing.T)  { runHTTPTransportTest(t, true) }
 
+func TestHTTPTransportReinitializesExpiredSession(t *testing.T) {
+	var initializeCount atomic.Int32
+	var toolCallCount atomic.Int32
+
+	srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		var req struct {
+			ID     *int            `json:"id"`
+			Method string          `json:"method"`
+			Params json.RawMessage `json:"params"`
+		}
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			http.Error(w, "bad body", http.StatusBadRequest)
+			return
+		}
+
+		if req.Method == "initialize" {
+			n := initializeCount.Add(1)
+			w.Header().Set("Mcp-Session-Id", fmt.Sprintf("sess-%d", n))
+			writeHTTPRPCResult(w, req.ID, map[string]any{
+				"protocolVersion": protocolVersion,
+				"serverInfo":      map[string]any{"name": "h", "version": "0"},
+			})
+			return
+		}
+
+		expectedSession := fmt.Sprintf("sess-%d", initializeCount.Load())
+		if got := r.Header.Get("Mcp-Session-Id"); got != expectedSession {
+			http.Error(w, "missing session id", http.StatusBadRequest)
+			return
+		}
+
+		if req.ID == nil { // notifications/initialized
+			w.WriteHeader(http.StatusAccepted)
+			return
+		}
+
+		switch req.Method {
+		case "tools/list":
+			writeHTTPRPCResult(w, req.ID, map[string]any{"tools": []map[string]any{{
+				"name":        "greet",
+				"description": "Greet someone.",
+				"inputSchema": map[string]any{"type": "object"},
+			}}})
+		case "tools/call":
+			n := toolCallCount.Add(1)
+			if n == 1 {
+				w.Header().Set("Content-Type", "application/json")
+				w.WriteHeader(http.StatusNotFound)
+				fmt.Fprintf(w, `{"jsonrpc":"2.0","id":%d,"error":{"code":-32001,"message":"Session not found"}}`, *req.ID)
+				return
+			}
+			if got := r.Header.Get("Mcp-Session-Id"); got != "sess-2" {
+				http.Error(w, "retry did not use the new session", http.StatusBadRequest)
+				return
+			}
+			writeHTTPRPCResult(w, req.ID, map[string]any{
+				"content": []map[string]any{{"type": "text", "text": "hello retry"}},
+			})
+		default:
+			http.Error(w, "unknown method", http.StatusBadRequest)
+		}
+	}))
+	defer srv.Close()
+
+	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	host, tools, err := StartAll(ctx, []Spec{{Name: "h", Type: "http", URL: srv.URL}})
+	if err != nil {
+		t.Fatalf("StartAll: %v", err)
+	}
+	defer host.Close()
+
+	got, err := tools[0].Execute(ctx, json.RawMessage(`{"name":"sam"}`))
+	if err != nil {
+		t.Fatalf("Execute after expired session: %v", err)
+	}
+	if got != "hello retry" {
+		t.Errorf("Execute = %q, want %q", got, "hello retry")
+	}
+	if got := initializeCount.Load(); got != 2 {
+		t.Errorf("initialize count = %d, want 2", got)
+	}
+	if got := toolCallCount.Load(); got != 2 {
+		t.Errorf("tools/call count = %d, want 2", got)
+	}
+}
+
 // TestHTTPTransportRPCError checks a JSON-RPC error response surfaces as an
 // error rather than an empty result.
 func TestHTTPTransportRPCError(t *testing.T) {
@@ -154,6 +243,16 @@ func TestSSETransportUnsupported(t *testing.T) {
 	}
 }
 
+func writeHTTPRPCResult(w http.ResponseWriter, id *int, result any) {
+	if id == nil {
+		w.WriteHeader(http.StatusAccepted)
+		return
+	}
+	resp := map[string]any{"jsonrpc": "2.0", "id": *id, "result": result}
+	w.Header().Set("Content-Type", "application/json")
+	_ = json.NewEncoder(w).Encode(resp)
+}
+
 func names(ts []tool.Tool) []string {
 	out := make([]string, len(ts))
 	for i, t := range ts {

From 5e4c70eb292ab6f042f2de198d9022fa3dfdda27 Mon Sep 17 00:00:00 2001
From: SivanCola <32437197+SivanCola@users.noreply.github.com>
Date: Sat, 20 Jun 2026 00:05:38 +0800
Subject: [PATCH 05/10] Fix HTTP MCP session reinit race

Keep startup capability flags immutable during runtime HTTP session refreshes so expired-session recovery does not race with concurrent capability readers.

Co-authored-by: SivanCola <32437197+SivanCola@users.noreply.github.com>
---
 internal/plugin/plugin.go              | 10 +++++++++-
 internal/plugin/transport_http_test.go | 21 +++++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go
index 6ced5f096..04cee3422 100644
--- a/internal/plugin/plugin.go
+++ b/internal/plugin/plugin.go
@@ -900,7 +900,7 @@ func (c *Client) call(ctx context.Context, method string, params any) (json.RawM
 	if err == nil || method == "initialize" || !isHTTPSessionExpired(err) {
 		return res, err
 	}
-	if initErr := c.initialize(ctx); initErr != nil {
+	if initErr := c.initializeSession(ctx, false); initErr != nil {
 		return nil, fmt.Errorf("%w; reinitialize failed: %v", err, initErr)
 	}
 	return c.t.call(ctx, method, params)
@@ -918,6 +918,10 @@ func isHTTPSessionExpired(err error) bool {
 }
 
 func (c *Client) initialize(ctx context.Context) error {
+	return c.initializeSession(ctx, true)
+}
+
+func (c *Client) initializeSession(ctx context.Context, recordCapabilities bool) error {
 	res, err := c.call(ctx, "initialize", map[string]any{
 		"protocolVersion": protocolVersion,
 		"capabilities":    map[string]any{},
@@ -926,6 +930,10 @@ func (c *Client) initialize(ctx context.Context) error {
 	if err != nil {
 		return err
 	}
+	if !recordCapabilities {
+		// Runtime session refresh must not rewrite startup-only capability flags.
+		return c.notify(ctx, "notifications/initialized", map[string]any{})
+	}
 	// Record which optional capabilities the server advertises. Presence of the
 	// key (even with an empty object) signals support.
 	var ir struct {
diff --git a/internal/plugin/transport_http_test.go b/internal/plugin/transport_http_test.go
index e37c9dcc6..10200d22c 100644
--- a/internal/plugin/transport_http_test.go
+++ b/internal/plugin/transport_http_test.go
@@ -194,6 +194,27 @@ func TestHTTPTransportReinitializesExpiredSession(t *testing.T) {
 		t.Fatalf("StartAll: %v", err)
 	}
 	defer host.Close()
+	host.mu.RLock()
+	client := host.clients[0]
+	host.mu.RUnlock()
+
+	done := make(chan struct{})
+	readerDone := make(chan struct{})
+	go func() {
+		defer close(readerDone)
+		for {
+			select {
+			case <-done:
+				return
+			default:
+				_, _ = client.hasPrompts, client.hasResources
+			}
+		}
+	}()
+	defer func() {
+		close(done)
+		<-readerDone
+	}()
 
 	got, err := tools[0].Execute(ctx, json.RawMessage(`{"name":"sam"}`))
 	if err != nil {

From 3791f7a37f9da18977eefc1de1461cbbfcf2782d Mon Sep 17 00:00:00 2001
From: SivanCola <32437197+SivanCola@users.noreply.github.com>
Date: Sat, 20 Jun 2026 00:15:23 +0800
Subject: [PATCH 06/10] Clean integrated config migration test formatting

Keep the integrated migration test file gofmt-clean after combining the MCP-related PRs.

Co-authored-by: SivanCola <32437197+SivanCola@users.noreply.github.com>
---
 internal/config/migrate_test.go | 2 --
 1 file changed, 2 deletions(-)

diff --git a/internal/config/migrate_test.go b/internal/config/migrate_test.go
index cb54a3850..8e4c17bc1 100644
--- a/internal/config/migrate_test.go
+++ b/internal/config/migrate_test.go
@@ -30,7 +30,6 @@ func writeLegacy(t *testing.T, src, body string) {
 		t.Fatal(err)
 	}
 }
-
 func TestMigrateImportsKeyPluginsAndLang(t *testing.T) {
 	src, dest, home := legacyHome(t)
 	writeLegacy(t, src, `{
@@ -813,4 +812,3 @@ func TestMigrateSupportData(t *testing.T) {
 		}
 	}
 }
-

From e4f157c3bb410623bf77ad1846b90ac6328f69ec Mon Sep 17 00:00:00 2001
From: SivanCola <32437197+SivanCola@users.noreply.github.com>
Date: Sat, 20 Jun 2026 00:21:54 +0800
Subject: [PATCH 07/10] Format integrated Go changes

Run gofmt on the integrated bot and control changes so the CI formatting gate passes.

Co-authored-by: SivanCola <32437197+SivanCola@users.noreply.github.com>
---
 internal/bot/gateway.go               | 6 +++---
 internal/control/approval_e2e_test.go | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/internal/bot/gateway.go b/internal/bot/gateway.go
index 5870a7518..31aa12649 100644
--- a/internal/bot/gateway.go
+++ b/internal/bot/gateway.go
@@ -19,9 +19,9 @@ import (
 
 // GatewayConfig 是 BotGateway 的配置。
 type GatewayConfig struct {
-	Model              string
-	ToolApprovalMode   string
-	MaxSteps           int
+	Model            string
+	ToolApprovalMode string
+	MaxSteps         int
 	// ApprovalTimeout bounds how long a tool-approval/ask prompt blocks a bot
 	// session waiting for a remote user's reply. Zero falls back to
 	// defaultBotApprovalTimeout so an abandoned prompt can't wedge the bot forever
diff --git a/internal/control/approval_e2e_test.go b/internal/control/approval_e2e_test.go
index 0382d2c27..ded4768e2 100644
--- a/internal/control/approval_e2e_test.go
+++ b/internal/control/approval_e2e_test.go
@@ -97,8 +97,8 @@ func TestApprovalToolWideEndToEnd(t *testing.T) {
 // (#4626, #4402). Ask shares the same wait context as tool-approval prompts.
 func TestApprovalTimeoutDeniesWhenUnanswered(t *testing.T) {
 	c := New(Options{
-		Policy:         permission.New("ask", nil, nil, nil),
-		Sink:           event.Discard,
+		Policy:          permission.New("ask", nil, nil, nil),
+		Sink:            event.Discard,
 		ApprovalTimeout: 40 * time.Millisecond,
 	})
 	c.EnableInteractiveApproval()

From 27a9ea7c6067146f1e53ac40819efce405afc44b Mon Sep 17 00:00:00 2001
From: SivanCola <32437197+SivanCola@users.noreply.github.com>
Date: Sat, 20 Jun 2026 00:27:57 +0800
Subject: [PATCH 08/10] Fix atomic write permission test on Windows

Skip Unix permission-bit assertions on Windows, where os.FileMode only reflects limited permission state.

Co-authored-by: SivanCola <32437197+SivanCola@users.noreply.github.com>
---
 internal/fileutil/atomicwrite_test.go | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/internal/fileutil/atomicwrite_test.go b/internal/fileutil/atomicwrite_test.go
index c35b13a8d..76a40d7f2 100644
--- a/internal/fileutil/atomicwrite_test.go
+++ b/internal/fileutil/atomicwrite_test.go
@@ -3,6 +3,7 @@ package fileutil
 import (
 	"os"
 	"path/filepath"
+	"runtime"
 	"testing"
 	"time"
 )
@@ -108,7 +109,7 @@ func TestAtomicWriteFileReplacesExisting(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	if perm := info.Mode().Perm(); perm != 0o600 {
+	if perm := info.Mode().Perm(); runtime.GOOS != "windows" && perm != 0o600 {
 		t.Fatalf("perm = %o, want 600", perm)
 	}
 	// No leftover tmp files in the directory.

From 9e17a6079a35870620532cc119a3f2e13d3bd304 Mon Sep 17 00:00:00 2001
From: SivanCola <32437197+SivanCola@users.noreply.github.com>
Date: Sat, 20 Jun 2026 01:12:46 +0800
Subject: [PATCH 09/10] Fix lifecycle MCP spawn coalescing

---
 internal/plugin/lazy_test.go | 41 ++++++++++++++++++++++++++++++++++++
 internal/plugin/plugin.go    | 26 ++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/internal/plugin/lazy_test.go b/internal/plugin/lazy_test.go
index b73a4dd2e..be2ed9821 100644
--- a/internal/plugin/lazy_test.go
+++ b/internal/plugin/lazy_test.go
@@ -184,6 +184,47 @@ func TestLazyCacheHitReusesExistingSharedHostClient(t *testing.T) {
 	}
 }
 
+func TestAddWithLifecycleCoalescesConcurrentSameServer(t *testing.T) {
+	spec := helperSpec()
+	spec.Env["GO_WANT_HELPER_INIT_MS"] = "200"
+
+	host := NewHost()
+	defer host.Close()
+	lifeCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
+	defer cancel()
+
+	start := make(chan struct{})
+	errs := make([]error, 2)
+	toolCounts := make([]int, 2)
+	var wg sync.WaitGroup
+	for i := range errs {
+		wg.Add(1)
+		go func(i int) {
+			defer wg.Done()
+			<-start
+			callCtx, cancelCall := context.WithTimeout(lifeCtx, 5*time.Second)
+			defer cancelCall()
+			tools, err := host.AddWithLifecycle(lifeCtx, callCtx, spec)
+			errs[i] = err
+			toolCounts[i] = len(tools)
+		}(i)
+	}
+	close(start)
+	wg.Wait()
+
+	for i, err := range errs {
+		if err != nil {
+			t.Fatalf("AddWithLifecycle call %d failed: %v (all errors: %v)", i, err, errs)
+		}
+		if toolCounts[i] != 2 {
+			t.Fatalf("AddWithLifecycle call %d returned %d tools, want 2", i, toolCounts[i])
+		}
+	}
+	if got := host.ServerNames(); len(got) != 1 || got[0] != "mock" {
+		t.Fatalf("host should contain exactly one connected server, got %v", got)
+	}
+}
+
 func TestLazyToolsetAppliesSpecReadOnlyOverrideToCachedTools(t *testing.T) {
 	redirectCache(t)
 	spec := helperSpec()
diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go
index 04cee3422..2cdb3621c 100644
--- a/internal/plugin/plugin.go
+++ b/internal/plugin/plugin.go
@@ -758,7 +758,31 @@ func (h *Host) AddWithLifecycle(lifeCtx, callCtx context.Context, s Spec) ([]too
 	if h.has(s.Name) {
 		return nil, serverAlreadyConnectedError(s.Name)
 	}
-	return h.addConnectedWithLifecycle(lifeCtx, callCtx, s)
+	attempt, owner := h.beginSpawn(s.Name)
+	if !owner {
+		select {
+		case <-attempt.done:
+			if attempt.err != nil {
+				return nil, attempt.err
+			}
+			return append([]tool.Tool(nil), attempt.tools...), nil
+		case <-callCtx.Done():
+			return nil, callCtx.Err()
+		case <-lifeCtx.Done():
+			return nil, lifeCtx.Err()
+		}
+	}
+	var tools []tool.Tool
+	var err error
+	defer func() { h.endSpawn(s.Name, tools, err) }()
+	// Double-check after acquiring the spawn token: another caller may have
+	// connected the server between our h.has check and beginSpawn.
+	if h.has(s.Name) {
+		err = serverAlreadyConnectedError(s.Name)
+		return nil, err
+	}
+	tools, err = h.addConnectedWithLifecycle(lifeCtx, callCtx, s)
+	return tools, err
 }
 
 func (h *Host) addConnected(ctx context.Context, s Spec) ([]tool.Tool, error) {

From 87d9d12d88d97a139ae23f9c21db83c7f9f73803 Mon Sep 17 00:00:00 2001
From: SivanCola <32437197+SivanCola@users.noreply.github.com>
Date: Sat, 20 Jun 2026 01:23:24 +0800
Subject: [PATCH 10/10] Address MCP integration review findings

---
 internal/config/config.go       |  2 +-
 internal/config/edit.go         |  9 ++++++-
 internal/config/edit_test.go    | 11 +++++++++
 internal/config/mcp_command.go  | 11 ++++++---
 internal/config/mcpjson_test.go | 17 +++++++++++++
 internal/config/migrate.go      | 34 ++++++++++++++++++++++---
 internal/config/migrate_test.go | 29 ++++++++++++++++++++++
 internal/plugin/lazy.go         |  8 ++++++
 internal/plugin/lazy_test.go    | 44 +++++++++++++++++++++++++++++++++
 9 files changed, 156 insertions(+), 9 deletions(-)

diff --git a/internal/config/config.go b/internal/config/config.go
index c002551a6..aa8543fc6 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -2786,7 +2786,7 @@ func SourcePathForRoot(root string) string {
 // main config into an unparseable state that leaves the app with no usable
 // models (#4615, #4708).
 func (c *Config) WriteFile(path string) error {
-	return fileutil.AtomicWriteFile(path, []byte(RenderTOMLForScope(c, renderScopeForPath(path))), 0o644)
+	return fileutil.AtomicWriteFile(path, []byte(RenderTOMLForScope(c, renderScopeForPath(path))), configFilePerm(path))
 }
 
 // Provider returns the named provider entry.
diff --git a/internal/config/edit.go b/internal/config/edit.go
index 7a097f6c8..20080818f 100644
--- a/internal/config/edit.go
+++ b/internal/config/edit.go
@@ -828,7 +828,14 @@ func writeConfigFile(path, body string) error {
 	if strings.TrimSpace(path) == "" {
 		return fmt.Errorf("save: empty config path")
 	}
-	return fileutil.AtomicWriteFile(path, []byte(body), 0o644)
+	return fileutil.AtomicWriteFile(path, []byte(body), configFilePerm(path))
+}
+
+func configFilePerm(path string) os.FileMode {
+	if isUserConfigPath(path) {
+		return 0o600
+	}
+	return 0o644
 }
 
 func renderScopeForPath(path string) RenderScope {
diff --git a/internal/config/edit_test.go b/internal/config/edit_test.go
index 2012ffae8..805488e0d 100644
--- a/internal/config/edit_test.go
+++ b/internal/config/edit_test.go
@@ -4,6 +4,7 @@ import (
 	"os"
 	"path/filepath"
 	"reflect"
+	"runtime"
 	"slices"
 	"strings"
 	"testing"
@@ -951,6 +952,11 @@ func TestSaveToScopesUserAndProjectFiles(t *testing.T) {
 	if !strings.Contains(string(userBody), "[desktop]") {
 		t.Fatalf("user config should include desktop preferences:\n%s", userBody)
 	}
+	if info, err := os.Stat(userPath); err != nil {
+		t.Fatalf("stat user config: %v", err)
+	} else if runtime.GOOS != "windows" && info.Mode().Perm() != 0o600 {
+		t.Fatalf("user config mode = %o, want 600", info.Mode().Perm())
+	}
 
 	projectPath := filepath.Join(t.TempDir(), "reasonix.toml")
 	if err := c.SaveTo(projectPath); err != nil {
@@ -963,6 +969,11 @@ func TestSaveToScopesUserAndProjectFiles(t *testing.T) {
 	if strings.Contains(string(projectBody), "[desktop]") || strings.Contains(string(projectBody), "close_behavior") {
 		t.Fatalf("project config should not include desktop preferences:\n%s", projectBody)
 	}
+	if info, err := os.Stat(projectPath); err != nil {
+		t.Fatalf("stat project config: %v", err)
+	} else if runtime.GOOS != "windows" && info.Mode().Perm() != 0o644 {
+		t.Fatalf("project config mode = %o, want 644", info.Mode().Perm())
+	}
 }
 
 func TestLoadForRootKeepsOfficialProviderAliasesDistinct(t *testing.T) {
diff --git a/internal/config/mcp_command.go b/internal/config/mcp_command.go
index 1eb6ee506..2d68608cf 100644
--- a/internal/config/mcp_command.go
+++ b/internal/config/mcp_command.go
@@ -8,8 +8,9 @@ import (
 
 // NormalizePluginCommandLine repairs the common MCP copy/paste mistake where a
 // tutorial's full command line is placed in command while args is left empty.
-// Valid commands that are just paths with spaces are left untouched unless they
-// are quoted or start with a known MCP runner such as npx/uvx/node.
+// Valid commands that are just paths with spaces are left untouched when they
+// look path-like; ordinary custom commands such as "custom-mcp --stdio" still
+// split so the executable and arguments survive GUI/legacy normalization.
 func NormalizePluginCommandLine(e PluginEntry) (PluginEntry, bool) {
 	if pluginEntryTransport(e) != "stdio" || len(e.Args) > 0 {
 		e.Command = strings.TrimSpace(e.Command)
@@ -56,7 +57,11 @@ func shouldSplitPluginCommand(original, first string) bool {
 	if strings.HasPrefix(trimmed, `"`) || strings.HasPrefix(trimmed, `'`) {
 		return true
 	}
-	return knownMCPCommandRunner(first)
+	return knownMCPCommandRunner(first) || !hasPathSeparator(first)
+}
+
+func hasPathSeparator(s string) bool {
+	return strings.ContainsAny(s, `/\`)
 }
 
 func knownMCPCommandRunner(command string) bool {
diff --git a/internal/config/mcpjson_test.go b/internal/config/mcpjson_test.go
index 5a5ef5b5e..2776e9e7b 100644
--- a/internal/config/mcpjson_test.go
+++ b/internal/config/mcpjson_test.go
@@ -62,6 +62,13 @@ func TestNormalizePluginCommandLine(t *testing.T) {
 			wantArgs:    []string{"-y", "@playwright/mcp"},
 			wantChanged: true,
 		},
+		{
+			name:        "custom command pasted with args",
+			in:          PluginEntry{Name: "custom", Command: "custom-mcp --stdio"},
+			wantCommand: "custom-mcp",
+			wantArgs:    []string{"--stdio"},
+			wantChanged: true,
+		},
 		{
 			name:        "quoted command path",
 			in:          PluginEntry{Name: "quoted", Command: `"C:\Program Files\nodejs\npx.cmd" -y @example/mcp`},
@@ -105,6 +112,16 @@ func TestNormalizePluginCommandLine(t *testing.T) {
 	}
 }
 
+func TestParseLegacyMCPSpecSplitsCustomCommandArgs(t *testing.T) {
+	got, ok := parseLegacyMCPSpec("fs=custom-mcp --stdio")
+	if !ok {
+		t.Fatal("parseLegacyMCPSpec returned false")
+	}
+	if got.Name != "fs" || got.Command != "custom-mcp" || strings.Join(got.Args, "\x00") != "--stdio" {
+		t.Fatalf("legacy custom MCP spec = %+v, want name fs command custom-mcp args [--stdio]", got)
+	}
+}
+
 func TestUpsertPluginNormalizesPastedCommandLine(t *testing.T) {
 	cfg := &Config{}
 	if err := cfg.UpsertPlugin(PluginEntry{Name: "playwright", Command: "npx -y @playwright/mcp"}); err != nil {
diff --git a/internal/config/migrate.go b/internal/config/migrate.go
index 67b9df4e1..aace4c5cb 100644
--- a/internal/config/migrate.go
+++ b/internal/config/migrate.go
@@ -613,17 +613,29 @@ func migrateSupportData(legacyDir, newDir string) []string {
 }
 
 func copyFile(src, dst string) error {
+	info, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
 	in, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer in.Close()
 
-	if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil {
+	parentMode := os.FileMode(0o755)
+	if info.Mode().Perm()&0o077 == 0 {
+		parentMode = 0o700
+	}
+	if err := os.MkdirAll(filepath.Dir(dst), parentMode); err != nil {
 		return err
 	}
 
-	out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
+	perm := info.Mode().Perm()
+	if perm == 0 {
+		perm = 0o600
+	}
+	out, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, perm)
 	if err != nil {
 		return err
 	}
@@ -632,16 +644,30 @@ func copyFile(src, dst string) error {
 	if _, err = io.Copy(out, in); err != nil {
 		return err
 	}
-	return out.Sync()
+	if err := out.Sync(); err != nil {
+		return err
+	}
+	return os.Chmod(dst, perm)
 }
 
 func copyDir(src, dst string) error {
+	info, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
 	entries, err := os.ReadDir(src)
 	if err != nil {
 		return err
 	}
 
-	if err := os.MkdirAll(dst, 0o755); err != nil {
+	perm := info.Mode().Perm()
+	if perm == 0 {
+		perm = 0o700
+	}
+	if err := os.MkdirAll(dst, perm); err != nil {
+		return err
+	}
+	if err := os.Chmod(dst, perm); err != nil {
 		return err
 	}
 
diff --git a/internal/config/migrate_test.go b/internal/config/migrate_test.go
index 8e4c17bc1..7e9452b74 100644
--- a/internal/config/migrate_test.go
+++ b/internal/config/migrate_test.go
@@ -787,6 +787,17 @@ func TestMigrateSupportData(t *testing.T) {
 			t.Fatal(err)
 		}
 	}
+	if runtime.GOOS != "windows" {
+		if err := os.Chmod(filepath.Join(legacyDir, "sessions"), 0o700); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.Chmod(filepath.Join(legacyDir, "sessions", "s1.json"), 0o600); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.Chmod(filepath.Join(legacyDir, "hooks.json"), 0o600); err != nil {
+			t.Fatal(err)
+		}
+	}
 
 	res, err := MigrateLegacyIfNeeded()
 	if err != nil {
@@ -811,4 +822,22 @@ func TestMigrateSupportData(t *testing.T) {
 			t.Errorf("file %s content mismatch: got %q, want %q", rel, string(data), expectedContent)
 		}
 	}
+	if runtime.GOOS != "windows" {
+		for _, check := range []struct {
+			rel  string
+			perm os.FileMode
+		}{
+			{rel: "sessions", perm: 0o700},
+			{rel: "sessions/s1.json", perm: 0o600},
+			{rel: "hooks.json", perm: 0o600},
+		} {
+			info, err := os.Stat(filepath.Join(newDir, check.rel))
+			if err != nil {
+				t.Fatalf("stat migrated %s: %v", check.rel, err)
+			}
+			if got := info.Mode().Perm(); got != check.perm {
+				t.Fatalf("migrated %s mode = %o, want %o", check.rel, got, check.perm)
+			}
+		}
+	}
 }
diff --git a/internal/plugin/lazy.go b/internal/plugin/lazy.go
index 69a3be004..be72b39e1 100644
--- a/internal/plugin/lazy.go
+++ b/internal/plugin/lazy.go
@@ -233,6 +233,14 @@ func (lt *lazyTool) Execute(ctx context.Context, args json.RawMessage) (string,
 		sp.mu.Lock()
 		defer sp.mu.Unlock()
 		if err != nil {
+			if errors.Is(err, context.DeadlineExceeded) {
+				// A slow cold start can succeed on a later turn after npm/node
+				// caches warm up or a remote MCP endpoint responds. Do not pin
+				// the session into spawnFailed for a transient startup budget miss.
+				sp.state = spawnIdle
+				sp.spawnErr = nil
+				return "", fmt.Errorf("MCP server %q startup timed out — retry this tool on a later turn", sp.spec.Name)
+			}
 			if errors.Is(err, ErrSpawningInFlight) {
 				// Another tab is already spawning this server on the shared
 				// host, but this lazySpawn has no goroutine that can publish
diff --git a/internal/plugin/lazy_test.go b/internal/plugin/lazy_test.go
index be2ed9821..44caf5b61 100644
--- a/internal/plugin/lazy_test.go
+++ b/internal/plugin/lazy_test.go
@@ -225,6 +225,50 @@ func TestAddWithLifecycleCoalescesConcurrentSameServer(t *testing.T) {
 	}
 }
 
+func TestLazyCacheHitStartupTimeoutCanRetry(t *testing.T) {
+	redirectCache(t)
+	spec := helperSpec()
+	spec.Env["GO_WANT_HELPER_INIT_MS"] = fmt.Sprint(int(defaultStartTimeout/time.Millisecond) + 200)
+	writeMockCache(t, spec)
+
+	cs, ok := LoadCachedSchema(spec.Name, SpecFingerprint(spec))
+	if !ok {
+		t.Fatal("LoadCachedSchema: miss right after save (sanity)")
+	}
+
+	host := NewHost()
+	defer host.Close()
+	reg := tool.NewRegistry()
+	ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second)
+	defer cancel()
+
+	tools := LazyToolset(spec, cs, host, reg, ctx, false)
+	for _, lt := range tools {
+		reg.Add(lt)
+	}
+	echo, ok := reg.Get("mcp__mock__echo")
+	if !ok {
+		t.Fatal("registry missing mcp__mock__echo after LazyToolset")
+	}
+	lazyEcho, ok := echo.(*lazyTool)
+	if !ok {
+		t.Fatalf("pre-Execute echo should be a *lazyTool, got %T", echo)
+	}
+
+	if _, err := echo.Execute(ctx, json.RawMessage(`{"msg":"slow"}`)); err == nil || !strings.Contains(err.Error(), "startup timed out") {
+		t.Fatalf("first Execute error = %v, want startup timed out", err)
+	}
+
+	lazyEcho.shared.spec.Env["GO_WANT_HELPER_INIT_MS"] = "0"
+	out, err := echo.Execute(ctx, json.RawMessage(`{"msg":"retry"}`))
+	if err != nil {
+		t.Fatalf("second Execute after timeout should retry: %v", err)
+	}
+	if out != "echo: retry" {
+		t.Fatalf("Execute result = %q, want %q", out, "echo: retry")
+	}
+}
+
 func TestLazyToolsetAppliesSpecReadOnlyOverrideToCachedTools(t *testing.T) {
 	redirectCache(t)
 	spec := helperSpec()