From 3aadc93924dcbd556ed8e28d207164e60d00c0e5 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Thu, 9 Oct 2025 13:34:17 +0100 Subject: [PATCH 01/15] feat: sandboxing for unified exec --- codex-rs/core/src/codex.rs | 10 - codex-rs/core/src/executor/mod.rs | 2 + codex-rs/core/src/executor/runner.rs | 44 +- codex-rs/core/src/executor/sandbox.rs | 26 + codex-rs/core/src/landlock.rs | 2 +- codex-rs/core/src/seatbelt.rs | 4 +- .../core/src/tools/handlers/unified_exec.rs | 21 +- codex-rs/core/src/unified_exec/errors.rs | 4 + codex-rs/core/src/unified_exec/mod.rs | 557 +++++++++++++----- 9 files changed, 498 insertions(+), 172 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index cae47cb322..55ad2d9a9b 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1075,16 +1075,6 @@ impl Session { .map_err(FunctionCallError::RespondToModel) } - pub(crate) async fn run_unified_exec_request( - &self, - request: crate::unified_exec::UnifiedExecRequest<'_>, - ) -> Result { - self.services - .unified_exec_manager - .handle_request(request) - .await - } - pub async fn interrupt_task(self: &Arc) { info!("interrupt received: abort current task, if any"); self.abort_all_tasks(TurnAbortReason::Interrupted).await; diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index 97d7b29294..ddfb722b95 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -8,6 +8,8 @@ pub(crate) use runner::ExecutionRequest; pub(crate) use runner::Executor; pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; +pub(crate) use sandbox::request_retry_without_sandbox; +pub(crate) use sandbox::select_sandbox; pub(crate) mod linkers { use crate::exec::ExecParams; diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index f475aad67e..9a931f1f92 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; use std::sync::RwLock; @@ -17,6 +18,7 @@ use crate::exec::StdoutStream; use crate::exec::StreamOutput; use crate::exec::process_exec_tool_call; use crate::executor::errors::ExecError; +use crate::executor::sandbox::request_retry_without_sandbox; use crate::executor::sandbox::select_sandbox; use crate::function_tool::FunctionCallError; use crate::protocol::AskForApproval; @@ -24,7 +26,6 @@ use crate::protocol::ReviewDecision; use crate::protocol::SandboxPolicy; use crate::shell; use crate::tools::context::ExecCommandContext; -use codex_otel::otel_event_manager::ToolDecisionSource; #[derive(Clone, Debug)] pub(crate) struct ExecutorConfig { @@ -45,6 +46,10 @@ impl ExecutorConfig { codex_linux_sandbox_exe, } } + + pub(crate) fn codex_linux_sandbox_exe(&self) -> Option { + self.codex_linux_sandbox_exe.clone() + } } /// Coordinates sandbox selection, backend-specific preparation, and command @@ -62,6 +67,18 @@ impl Executor { } } + pub(crate) fn approval_cache_snapshot(&self) -> HashSet> { + self.approval_cache.snapshot() + } + + pub(crate) fn record_session_approval(&self, command: Vec) { + self.approval_cache.insert(command); + } + + pub(crate) fn config_snapshot(&self) -> Option { + self.config.read().ok().map(|cfg| cfg.clone()) + } + /// Updates the sandbox policy and working directory used for future /// executions without recreating the executor. pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) { @@ -173,22 +190,17 @@ impl Executor { format!("Execution failed: {sandbox_error}"), ) .await; - let decision = session - .request_command_approval( - context.sub_id.to_string(), - context.call_id.to_string(), - request.approval_command.clone(), - request.params.cwd.clone(), - Some("command failed; retry without sandbox?".to_string()), - ) - .await; - - context.otel_event_manager.tool_decision( - &context.tool_name, + let decision = request_retry_without_sandbox( + session, + &context.sub_id, &context.call_id, - decision, - ToolDecisionSource::User, - ); + request.approval_command.clone(), + request.params.cwd.clone(), + Some("command failed; retry without sandbox?".to_string()), + &context.tool_name, + &context.otel_event_manager, + ) + .await; match decision { ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { if matches!(decision, ReviewDecision::ApprovedForSession) { diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 5c01ff69b4..dd15e62794 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -13,6 +13,7 @@ use codex_otel::otel_event_manager::ToolDecisionSource; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; use std::collections::HashSet; +use std::path::PathBuf; /// Sandbox placement options selected for an execution run, including whether /// to escalate after failures and whether approvals should persist. @@ -50,6 +51,31 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> ) } +pub(crate) async fn request_retry_without_sandbox( + session: &Session, + sub_id: &str, + call_id: &str, + approval_command: Vec, + cwd: PathBuf, + prompt: Option, + tool_name: &str, + otel_event_manager: &OtelEventManager, +) -> ReviewDecision { + let decision = session + .request_command_approval( + sub_id.to_string(), + call_id.to_string(), + approval_command, + cwd, + prompt, + ) + .await; + + otel_event_manager.tool_decision(tool_name, call_id, decision, ToolDecisionSource::User); + + decision +} + /// Determines how a command should be sandboxed, prompting the user when /// policy requires explicit approval. #[allow(clippy::too_many_arguments)] diff --git a/codex-rs/core/src/landlock.rs b/codex-rs/core/src/landlock.rs index 264ea747ca..f5b765efd6 100644 --- a/codex-rs/core/src/landlock.rs +++ b/codex-rs/core/src/landlock.rs @@ -40,7 +40,7 @@ where } /// Converts the sandbox policy into the CLI invocation for `codex-linux-sandbox`. -fn create_linux_sandbox_command_args( +pub(crate) fn create_linux_sandbox_command_args( command: Vec, sandbox_policy: &SandboxPolicy, sandbox_policy_cwd: &Path, diff --git a/codex-rs/core/src/seatbelt.rs b/codex-rs/core/src/seatbelt.rs index 09e93668bc..abd88d41bf 100644 --- a/codex-rs/core/src/seatbelt.rs +++ b/codex-rs/core/src/seatbelt.rs @@ -14,7 +14,7 @@ const MACOS_SEATBELT_BASE_POLICY: &str = include_str!("seatbelt_base_policy.sbpl /// to defend against an attacker trying to inject a malicious version on the /// PATH. If /usr/bin/sandbox-exec has been tampered with, then the attacker /// already has root access. -const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec"; +pub(crate) const MACOS_PATH_TO_SEATBELT_EXECUTABLE: &str = "/usr/bin/sandbox-exec"; pub async fn spawn_command_under_seatbelt( command: Vec, @@ -39,7 +39,7 @@ pub async fn spawn_command_under_seatbelt( .await } -fn create_seatbelt_command_args( +pub(crate) fn create_seatbelt_command_args( command: Vec, sandbox_policy: &SandboxPolicy, sandbox_policy_cwd: &Path, diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index ce47dded3c..8665eeba91 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -35,7 +35,13 @@ impl ToolHandler for UnifiedExecHandler { async fn handle(&self, invocation: ToolInvocation) -> Result { let ToolInvocation { - session, payload, .. + session, + turn, + sub_id, + call_id, + tool_name, + payload, + .. } = invocation; let args = match payload { @@ -79,7 +85,18 @@ impl ToolHandler for UnifiedExecHandler { }; let value = session - .run_unified_exec_request(request) + .services + .unified_exec_manager + .handle_request( + request, + crate::unified_exec::UnifiedExecContext { + session: &session, + turn: turn.as_ref(), + sub_id: &sub_id, + call_id: &call_id, + tool_name: &tool_name, + }, + ) .await .map_err(|err| { FunctionCallError::RespondToModel(format!("unified exec failed: {err:?}")) diff --git a/codex-rs/core/src/unified_exec/errors.rs b/codex-rs/core/src/unified_exec/errors.rs index 6bf5bf7ec5..a162e0f0e7 100644 --- a/codex-rs/core/src/unified_exec/errors.rs +++ b/codex-rs/core/src/unified_exec/errors.rs @@ -13,6 +13,10 @@ pub(crate) enum UnifiedExecError { WriteToStdin, #[error("missing command line for unified exec request")] MissingCommandLine, + #[error("missing codex-linux-sandbox executable path")] + MissingLinuxSandboxExecutable, + #[error("unified exec command rejected by user")] + UserRejected, } impl UnifiedExecError { diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 48b82e3e8c..d2d962060c 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -1,3 +1,4 @@ +use anyhow::anyhow; use portable_pty::CommandBuilder; use portable_pty::PtySize; use portable_pty::native_pty_system; @@ -5,6 +6,8 @@ use std::collections::HashMap; use std::collections::VecDeque; use std::io::ErrorKind; use std::io::Read; +use std::path::Path; +use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex as StdMutex; use std::sync::atomic::AtomicBool; @@ -17,7 +20,22 @@ use tokio::task::JoinHandle; use tokio::time::Duration; use tokio::time::Instant; +use crate::codex::Session; +use crate::codex::TurnContext; +use crate::exec::ExecParams; +use crate::exec::SandboxType; use crate::exec_command::ExecCommandSession; +use crate::executor::ExecutionMode; +use crate::executor::ExecutionRequest; +use crate::executor::request_retry_without_sandbox; +use crate::executor::select_sandbox; +use crate::landlock::create_linux_sandbox_command_args; +use crate::protocol::ReviewDecision; +use crate::protocol::SandboxPolicy; +use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE; +use crate::seatbelt::create_seatbelt_command_args; +use crate::spawn::CODEX_SANDBOX_ENV_VAR; +use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use crate::truncate::truncate_middle; mod errors; @@ -28,6 +46,14 @@ const DEFAULT_TIMEOUT_MS: u64 = 1_000; const MAX_TIMEOUT_MS: u64 = 60_000; const UNIFIED_EXEC_OUTPUT_MAX_BYTES: usize = 128 * 1024; // 128 KiB +pub(crate) struct UnifiedExecContext<'a> { + pub session: &'a Session, + pub turn: &'a TurnContext, + pub sub_id: &'a str, + pub call_id: &'a str, + pub tool_name: &'a str, +} + #[derive(Debug)] pub(crate) struct UnifiedExecRequest<'a> { pub session_id: Option, @@ -99,6 +125,47 @@ impl OutputBufferState { type OutputBuffer = Arc>; type OutputHandles = (OutputBuffer, Arc); +fn build_launch_for_sandbox( + sandbox: SandboxType, + command: &[String], + sandbox_policy: &SandboxPolicy, + sandbox_policy_cwd: &Path, + codex_linux_sandbox_exe: Option<&PathBuf>, +) -> Result<(String, Vec, HashMap), UnifiedExecError> { + let mut env = HashMap::new(); + if !sandbox_policy.has_full_network_access() { + env.insert( + CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(), + "1".to_string(), + ); + } + + match sandbox { + SandboxType::None => { + let (program, args) = command + .split_first() + .ok_or(UnifiedExecError::MissingCommandLine)?; + Ok((program.clone(), args.to_vec(), env)) + } + SandboxType::MacosSeatbelt => { + env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string()); + let args = + create_seatbelt_command_args(command.to_vec(), sandbox_policy, sandbox_policy_cwd); + Ok((MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string(), args, env)) + } + SandboxType::LinuxSeccomp => { + let exe = + codex_linux_sandbox_exe.ok_or(UnifiedExecError::MissingLinuxSandboxExecutable)?; + let args = create_linux_sandbox_command_args( + command.to_vec(), + sandbox_policy, + sandbox_policy_cwd, + ); + Ok((exe.to_string_lossy().to_string(), args, env)) + } + } +} + impl ManagedUnifiedExecSession { fn new( session: ExecCommandSession, @@ -149,9 +216,143 @@ impl Drop for ManagedUnifiedExecSession { } impl UnifiedExecSessionManager { + async fn open_session_with_sandbox( + &self, + command: Vec, + context: &UnifiedExecContext<'_>, + ) -> Result< + ( + ExecCommandSession, + tokio::sync::broadcast::Receiver>, + ), + UnifiedExecError, + > { + let execution_request = ExecutionRequest { + params: ExecParams { + command: command.clone(), + cwd: context.turn.cwd.clone(), + timeout_ms: None, + env: HashMap::new(), + with_escalated_permissions: None, + justification: None, + }, + approval_command: command.clone(), + mode: ExecutionMode::Shell, + stdout_stream: None, + use_shell_profile: false, + }; + + let executor = &context.session.services.executor; + let approval_cache = executor.approval_cache_snapshot(); + let config = executor + .config_snapshot() + .ok_or_else(|| UnifiedExecError::create_session(anyhow!("executor config poisoned")))?; + let codex_linux_sandbox_exe = config.codex_linux_sandbox_exe(); + let otel_event_manager = context.turn.client.get_otel_event_manager(); + let sandbox_decision = select_sandbox( + &execution_request, + context.turn.approval_policy, + approval_cache, + &config, + context.session, + context.sub_id, + context.call_id, + &otel_event_manager, + ) + .await + .map_err(|err| UnifiedExecError::create_session(anyhow!(err.to_string())))?; + + if sandbox_decision.record_session_approval { + context + .session + .services + .executor + .record_session_approval(execution_request.approval_command.clone()); + } + + let (program, args, env) = build_launch_for_sandbox( + sandbox_decision.initial_sandbox, + &command, + &context.turn.sandbox_policy, + &context.turn.cwd, + codex_linux_sandbox_exe.as_ref(), + )?; + + match create_unified_exec_session(&program, &args, &env).await { + Ok(result) => Ok(result), + Err(err) if sandbox_decision.escalate_on_failure => { + self.retry_without_sandbox(&command, context, err, &otel_event_manager) + .await + } + Err(err) => Err(err), + } + } + + async fn retry_without_sandbox( + &self, + command: &[String], + context: &UnifiedExecContext<'_>, + previous_error: UnifiedExecError, + otel_event_manager: &codex_otel::otel_event_manager::OtelEventManager, + ) -> Result< + ( + ExecCommandSession, + tokio::sync::broadcast::Receiver>, + ), + UnifiedExecError, + > { + context + .session + .notify_background_event( + context.sub_id, + format!("Execution failed: {previous_error}"), + ) + .await; + + let decision = request_retry_without_sandbox( + context.session, + context.sub_id, + context.call_id, + command.to_vec(), + context.turn.cwd.clone(), + Some("command failed; retry without sandbox?".to_string()), + context.tool_name, + otel_event_manager, + ) + .await; + + match decision { + ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { + if matches!(decision, ReviewDecision::ApprovedForSession) { + context + .session + .services + .executor + .record_session_approval(command.to_vec()); + } + + context + .session + .notify_background_event(context.sub_id, "retrying command without sandbox") + .await; + + let (program, args, env) = build_launch_for_sandbox( + SandboxType::None, + command, + &context.turn.sandbox_policy, + &context.turn.cwd, + None, + )?; + create_unified_exec_session(&program, &args, &env).await + } + ReviewDecision::Denied | ReviewDecision::Abort => Err(UnifiedExecError::UserRejected), + } + } + pub async fn handle_request( &self, request: UnifiedExecRequest<'_>, + context: UnifiedExecContext<'_>, ) -> Result { let (timeout_ms, timeout_warning) = match request.timeout_ms { Some(requested) if requested > MAX_TIMEOUT_MS => ( @@ -196,7 +397,8 @@ impl UnifiedExecSessionManager { } else { let command = request.input_chunks.to_vec(); let new_id = self.next_session_id.fetch_add(1, Ordering::SeqCst); - let (session, initial_output_rx) = create_unified_exec_session(&command).await?; + let (session, initial_output_rx) = + self.open_session_with_sandbox(command, &context).await?; let managed_session = ManagedUnifiedExecSession::new(session, initial_output_rx); let (buffer, notify) = managed_session.output_handles(); writer_tx = managed_session.writer_sender(); @@ -299,7 +501,9 @@ impl UnifiedExecSessionManager { } async fn create_unified_exec_session( - command: &[String], + program: &str, + args: &[String], + env: &HashMap, ) -> Result< ( ExecCommandSession, @@ -307,7 +511,7 @@ async fn create_unified_exec_session( ), UnifiedExecError, > { - if command.is_empty() { + if program.is_empty() { return Err(UnifiedExecError::MissingCommandLine); } @@ -323,9 +527,12 @@ async fn create_unified_exec_session( .map_err(UnifiedExecError::create_session)?; // Safe thanks to the check at the top of the function. - let mut command_builder = CommandBuilder::new(command[0].clone()); - for arg in &command[1..] { - command_builder.arg(arg); + let mut command_builder = CommandBuilder::new(program.to_string()); + for arg in args { + command_builder.arg(arg.clone()); + } + for (key, value) in env { + command_builder.env(key.clone(), value.clone()); } let mut child = pair @@ -404,8 +611,55 @@ async fn create_unified_exec_session( #[cfg(test)] mod tests { use super::*; + use crate::codex::Session; + use crate::codex::TurnContext; + use crate::codex::make_session_and_context; + use crate::protocol::AskForApproval; + use crate::protocol::SandboxPolicy; #[cfg(unix)] use core_test_support::skip_if_sandbox; + use std::sync::Arc; + + fn test_session_and_turn() -> (Arc, Arc) { + let (session, mut turn) = make_session_and_context(); + turn.approval_policy = AskForApproval::Never; + turn.sandbox_policy = SandboxPolicy::DangerFullAccess; + session + .services + .executor + .update_environment(turn.sandbox_policy.clone(), turn.cwd.clone()); + (Arc::new(session), Arc::new(turn)) + } + + async fn run_unified_exec_request( + session: &Arc, + turn: &Arc, + session_id: Option, + input: Vec, + timeout_ms: Option, + ) -> Result { + let request_input = input; + let request = UnifiedExecRequest { + session_id, + input_chunks: &request_input, + timeout_ms, + }; + + session + .services + .unified_exec_manager + .handle_request( + request, + UnifiedExecContext { + session: &session, + turn: turn.as_ref(), + sub_id: "sub", + call_id: "call", + tool_name: "unified_exec", + }, + ) + .await + } #[test] fn push_chunk_trims_only_excess_bytes() { @@ -429,35 +683,38 @@ mod tests { async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let open_shell = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let open_shell = run_unified_exec_request( + &session, + &turn, + None, + vec!["bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_id = open_shell.session_id.expect("expected session_id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &[ - "export".to_string(), - "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), - ], - timeout_ms: Some(2_500), - }) - .await?; - - let out_2 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec![ + "export".to_string(), + "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), + ], + Some(2_500), + ) + .await?; + + let out_2 = run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], + Some(2_500), + ) + .await?; assert!(out_2.output.contains("codex")); Ok(()) @@ -468,44 +725,48 @@ mod tests { async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let shell_a = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["/bin/bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let shell_a = run_unified_exec_request( + &session, + &turn, + None, + vec!["/bin/bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_a = shell_a.session_id.expect("expected session id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_a), - input_chunks: &["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; - - let out_2 = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &[ - "echo".to_string(), - "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(), - ], - timeout_ms: Some(2_500), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_a), + vec!["export CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string()], + Some(2_500), + ) + .await?; + + let out_2 = run_unified_exec_request( + &session, + &turn, + None, + vec![ + "echo".to_string(), + "$CODEX_INTERACTIVE_SHELL_VAR\n".to_string(), + ], + Some(2_500), + ) + .await?; assert!(!out_2.output.contains("codex")); - let out_3 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_a), - input_chunks: &["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let out_3 = run_unified_exec_request( + &session, + &turn, + Some(session_a), + vec!["echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], + Some(2_500), + ) + .await?; assert!(out_3.output.contains("codex")); Ok(()) @@ -516,47 +777,45 @@ mod tests { async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let open_shell = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let open_shell = run_unified_exec_request( + &session, + &turn, + None, + vec!["bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_id = open_shell.session_id.expect("expected session id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &[ - "export".to_string(), - "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), - ], - timeout_ms: Some(2_500), - }) - .await?; - - let out_2 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], - timeout_ms: Some(10), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec![ + "export".to_string(), + "CODEX_INTERACTIVE_SHELL_VAR=codex\n".to_string(), + ], + Some(2_500), + ) + .await?; + + let out_2 = run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec!["sleep 5 && echo $CODEX_INTERACTIVE_SHELL_VAR\n".to_string()], + Some(10), + ) + .await?; assert!(!out_2.output.contains("codex")); tokio::time::sleep(Duration::from_secs(7)).await; - let empty = Vec::new(); - let out_3 = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &empty, - timeout_ms: Some(100), - }) - .await?; + let out_3 = + run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100)) + .await?; assert!(out_3.output.contains("codex")); @@ -567,15 +826,16 @@ mod tests { #[tokio::test] #[ignore] // Ignored while we have a better way to test this. async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> { - let manager = UnifiedExecSessionManager::default(); - - let result = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["echo".to_string(), "codex".to_string()], - timeout_ms: Some(120_000), - }) - .await?; + let (session, turn) = test_session_and_turn(); + + let result = run_unified_exec_request( + &session, + &turn, + None, + vec!["echo".to_string(), "codex".to_string()], + Some(120_000), + ) + .await?; assert!(result.output.starts_with( "Warning: requested timeout 120000ms exceeds maximum of 60000ms; clamping to 60000ms.\n" @@ -589,19 +849,28 @@ mod tests { #[tokio::test] #[ignore] // Ignored while we have a better way to test this. async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> { - let manager = UnifiedExecSessionManager::default(); - let result = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["/bin/echo".to_string(), "codex".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let (session, turn) = test_session_and_turn(); + let result = run_unified_exec_request( + &session, + &turn, + None, + vec!["/bin/echo".to_string(), "codex".to_string()], + Some(2_500), + ) + .await?; assert!(result.session_id.is_none()); assert!(result.output.contains("codex")); - assert!(manager.sessions.lock().await.is_empty()); + assert!( + session + .services + .unified_exec_manager + .sessions + .lock() + .await + .is_empty() + ); Ok(()) } @@ -611,35 +880,33 @@ mod tests { async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); - let manager = UnifiedExecSessionManager::default(); + let (session, turn) = test_session_and_turn(); - let open_shell = manager - .handle_request(UnifiedExecRequest { - session_id: None, - input_chunks: &["/bin/bash".to_string(), "-i".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + let open_shell = run_unified_exec_request( + &session, + &turn, + None, + vec!["/bin/bash".to_string(), "-i".to_string()], + Some(2_500), + ) + .await?; let session_id = open_shell.session_id.expect("expected session id"); - manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &["exit\n".to_string()], - timeout_ms: Some(2_500), - }) - .await?; + run_unified_exec_request( + &session, + &turn, + Some(session_id), + vec!["exit\n".to_string()], + Some(2_500), + ) + .await?; tokio::time::sleep(Duration::from_millis(200)).await; - let err = manager - .handle_request(UnifiedExecRequest { - session_id: Some(session_id), - input_chunks: &[], - timeout_ms: Some(100), - }) - .await - .expect_err("expected unknown session error"); + let err = + run_unified_exec_request(&session, &turn, Some(session_id), Vec::new(), Some(100)) + .await + .expect_err("expected unknown session error"); match err { UnifiedExecError::UnknownSessionId { session_id: err_id } => { @@ -648,7 +915,15 @@ mod tests { other => panic!("expected UnknownSessionId, got {other:?}"), } - assert!(!manager.sessions.lock().await.contains_key(&session_id)); + assert!( + !session + .services + .unified_exec_manager + .sessions + .lock() + .await + .contains_key(&session_id) + ); Ok(()) } From 29a23050258ea51d5ccbc849bb0a26826c86f322 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Thu, 9 Oct 2025 13:51:25 +0100 Subject: [PATCH 02/15] V2 --- codex-rs/core/src/executor/mod.rs | 3 + codex-rs/core/src/executor/sandbox.rs | 77 ++++++++++++++++++++++++ codex-rs/core/src/unified_exec/errors.rs | 12 ++++ codex-rs/core/src/unified_exec/mod.rs | 71 ++++------------------ 4 files changed, 103 insertions(+), 60 deletions(-) diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index ddfb722b95..aee0e62531 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -8,6 +8,9 @@ pub(crate) use runner::ExecutionRequest; pub(crate) use runner::Executor; pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; +pub(crate) use sandbox::SandboxLaunch; +pub(crate) use sandbox::SandboxLaunchError; +pub(crate) use sandbox::build_launch_for_sandbox; pub(crate) use sandbox::request_retry_without_sandbox; pub(crate) use sandbox::select_sandbox; diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index dd15e62794..a7e26d2c59 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -5,15 +5,92 @@ use crate::executor::ExecutionMode; use crate::executor::ExecutionRequest; use crate::executor::ExecutorConfig; use crate::executor::errors::ExecError; +use crate::landlock::create_linux_sandbox_command_args; +use crate::protocol::SandboxPolicy; use crate::safety::SafetyCheck; use crate::safety::assess_command_safety; use crate::safety::assess_patch_safety; +use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE; +use crate::seatbelt::create_seatbelt_command_args; +use crate::spawn::CODEX_SANDBOX_ENV_VAR; +use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use codex_otel::otel_event_manager::OtelEventManager; use codex_otel::otel_event_manager::ToolDecisionSource; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; +use std::collections::HashMap; use std::collections::HashSet; +use std::path::Path; use std::path::PathBuf; +use thiserror::Error; + +#[derive(Debug)] +pub(crate) struct SandboxLaunch { + pub program: String, + pub args: Vec, + pub env: HashMap, +} + +#[derive(Debug, Error)] +pub(crate) enum SandboxLaunchError { + #[error("missing command line for sandbox launch")] + MissingCommandLine, + #[error("missing codex-linux-sandbox executable path")] + MissingLinuxSandboxExecutable, +} + +pub(crate) fn build_launch_for_sandbox( + sandbox: SandboxType, + command: &[String], + sandbox_policy: &SandboxPolicy, + sandbox_policy_cwd: &Path, + codex_linux_sandbox_exe: Option<&PathBuf>, +) -> Result { + let mut env = HashMap::new(); + if !sandbox_policy.has_full_network_access() { + env.insert( + CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(), + "1".to_string(), + ); + } + + match sandbox { + SandboxType::None => { + let (program, args) = command + .split_first() + .ok_or(SandboxLaunchError::MissingCommandLine)?; + Ok(SandboxLaunch { + program: program.clone(), + args: args.to_vec(), + env, + }) + } + SandboxType::MacosSeatbelt => { + env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string()); + let args = + create_seatbelt_command_args(command.to_vec(), sandbox_policy, sandbox_policy_cwd); + Ok(SandboxLaunch { + program: MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string(), + args, + env, + }) + } + SandboxType::LinuxSeccomp => { + let exe = + codex_linux_sandbox_exe.ok_or(SandboxLaunchError::MissingLinuxSandboxExecutable)?; + let args = create_linux_sandbox_command_args( + command.to_vec(), + sandbox_policy, + sandbox_policy_cwd, + ); + Ok(SandboxLaunch { + program: exe.to_string_lossy().to_string(), + args, + env, + }) + } + } +} /// Sandbox placement options selected for an execution run, including whether /// to escalate after failures and whether approvals should persist. diff --git a/codex-rs/core/src/unified_exec/errors.rs b/codex-rs/core/src/unified_exec/errors.rs index a162e0f0e7..d635e5ad85 100644 --- a/codex-rs/core/src/unified_exec/errors.rs +++ b/codex-rs/core/src/unified_exec/errors.rs @@ -1,3 +1,4 @@ +use crate::executor::SandboxLaunchError; use thiserror::Error; #[derive(Debug, Error)] @@ -24,3 +25,14 @@ impl UnifiedExecError { Self::CreateSession { pty_error: error } } } + +impl From for UnifiedExecError { + fn from(err: SandboxLaunchError) -> Self { + match err { + SandboxLaunchError::MissingCommandLine => UnifiedExecError::MissingCommandLine, + SandboxLaunchError::MissingLinuxSandboxExecutable => { + UnifiedExecError::MissingLinuxSandboxExecutable + } + } + } +} diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index d2d962060c..4fc85388ad 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -6,8 +6,6 @@ use std::collections::HashMap; use std::collections::VecDeque; use std::io::ErrorKind; use std::io::Read; -use std::path::Path; -use std::path::PathBuf; use std::sync::Arc; use std::sync::Mutex as StdMutex; use std::sync::atomic::AtomicBool; @@ -27,15 +25,11 @@ use crate::exec::SandboxType; use crate::exec_command::ExecCommandSession; use crate::executor::ExecutionMode; use crate::executor::ExecutionRequest; +use crate::executor::SandboxLaunch; +use crate::executor::build_launch_for_sandbox; use crate::executor::request_retry_without_sandbox; use crate::executor::select_sandbox; -use crate::landlock::create_linux_sandbox_command_args; use crate::protocol::ReviewDecision; -use crate::protocol::SandboxPolicy; -use crate::seatbelt::MACOS_PATH_TO_SEATBELT_EXECUTABLE; -use crate::seatbelt::create_seatbelt_command_args; -use crate::spawn::CODEX_SANDBOX_ENV_VAR; -use crate::spawn::CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR; use crate::truncate::truncate_middle; mod errors; @@ -125,47 +119,6 @@ impl OutputBufferState { type OutputBuffer = Arc>; type OutputHandles = (OutputBuffer, Arc); -fn build_launch_for_sandbox( - sandbox: SandboxType, - command: &[String], - sandbox_policy: &SandboxPolicy, - sandbox_policy_cwd: &Path, - codex_linux_sandbox_exe: Option<&PathBuf>, -) -> Result<(String, Vec, HashMap), UnifiedExecError> { - let mut env = HashMap::new(); - if !sandbox_policy.has_full_network_access() { - env.insert( - CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(), - "1".to_string(), - ); - } - - match sandbox { - SandboxType::None => { - let (program, args) = command - .split_first() - .ok_or(UnifiedExecError::MissingCommandLine)?; - Ok((program.clone(), args.to_vec(), env)) - } - SandboxType::MacosSeatbelt => { - env.insert(CODEX_SANDBOX_ENV_VAR.to_string(), "seatbelt".to_string()); - let args = - create_seatbelt_command_args(command.to_vec(), sandbox_policy, sandbox_policy_cwd); - Ok((MACOS_PATH_TO_SEATBELT_EXECUTABLE.to_string(), args, env)) - } - SandboxType::LinuxSeccomp => { - let exe = - codex_linux_sandbox_exe.ok_or(UnifiedExecError::MissingLinuxSandboxExecutable)?; - let args = create_linux_sandbox_command_args( - command.to_vec(), - sandbox_policy, - sandbox_policy_cwd, - ); - Ok((exe.to_string_lossy().to_string(), args, env)) - } - } -} - impl ManagedUnifiedExecSession { fn new( session: ExecCommandSession, @@ -270,7 +223,7 @@ impl UnifiedExecSessionManager { .record_session_approval(execution_request.approval_command.clone()); } - let (program, args, env) = build_launch_for_sandbox( + let launch = build_launch_for_sandbox( sandbox_decision.initial_sandbox, &command, &context.turn.sandbox_policy, @@ -278,7 +231,7 @@ impl UnifiedExecSessionManager { codex_linux_sandbox_exe.as_ref(), )?; - match create_unified_exec_session(&program, &args, &env).await { + match create_unified_exec_session(&launch).await { Ok(result) => Ok(result), Err(err) if sandbox_decision.escalate_on_failure => { self.retry_without_sandbox(&command, context, err, &otel_event_manager) @@ -336,14 +289,14 @@ impl UnifiedExecSessionManager { .notify_background_event(context.sub_id, "retrying command without sandbox") .await; - let (program, args, env) = build_launch_for_sandbox( + let launch = build_launch_for_sandbox( SandboxType::None, command, &context.turn.sandbox_policy, &context.turn.cwd, None, )?; - create_unified_exec_session(&program, &args, &env).await + create_unified_exec_session(&launch).await } ReviewDecision::Denied | ReviewDecision::Abort => Err(UnifiedExecError::UserRejected), } @@ -501,9 +454,7 @@ impl UnifiedExecSessionManager { } async fn create_unified_exec_session( - program: &str, - args: &[String], - env: &HashMap, + launch: &SandboxLaunch, ) -> Result< ( ExecCommandSession, @@ -511,7 +462,7 @@ async fn create_unified_exec_session( ), UnifiedExecError, > { - if program.is_empty() { + if launch.program.is_empty() { return Err(UnifiedExecError::MissingCommandLine); } @@ -527,11 +478,11 @@ async fn create_unified_exec_session( .map_err(UnifiedExecError::create_session)?; // Safe thanks to the check at the top of the function. - let mut command_builder = CommandBuilder::new(program.to_string()); - for arg in args { + let mut command_builder = CommandBuilder::new(launch.program.clone()); + for arg in &launch.args { command_builder.arg(arg.clone()); } - for (key, value) in env { + for (key, value) in &launch.env { command_builder.env(key.clone(), value.clone()); } From b47d54d57c417855780e310283cae3c231d406d7 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Thu, 9 Oct 2025 14:15:20 +0100 Subject: [PATCH 03/15] V3 --- codex-rs/core/src/executor/mod.rs | 1 + codex-rs/core/src/executor/runner.rs | 60 +++++++---------- codex-rs/core/src/executor/sandbox.rs | 55 ++++++++++++---- codex-rs/core/src/unified_exec/mod.rs | 92 ++++++++------------------- 4 files changed, 93 insertions(+), 115 deletions(-) diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index aee0e62531..ac312b41f0 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -8,6 +8,7 @@ pub(crate) use runner::ExecutionRequest; pub(crate) use runner::Executor; pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; +pub(crate) use sandbox::RetrySandboxContext; pub(crate) use sandbox::SandboxLaunch; pub(crate) use sandbox::SandboxLaunchError; pub(crate) use sandbox::build_launch_for_sandbox; diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 9a931f1f92..8da3107fcd 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -18,11 +18,11 @@ use crate::exec::StdoutStream; use crate::exec::StreamOutput; use crate::exec::process_exec_tool_call; use crate::executor::errors::ExecError; +use crate::executor::sandbox::RetrySandboxContext; use crate::executor::sandbox::request_retry_without_sandbox; use crate::executor::sandbox::select_sandbox; use crate::function_tool::FunctionCallError; use crate::protocol::AskForApproval; -use crate::protocol::ReviewDecision; use crate::protocol::SandboxPolicy; use crate::shell; use crate::tools::context::ExecCommandContext; @@ -184,46 +184,32 @@ impl Executor { stdout_stream: Option, sandbox_error: SandboxErr, ) -> Result { - session - .notify_background_event( - &context.sub_id, - format!("Execution failed: {sandbox_error}"), - ) - .await; - let decision = request_retry_without_sandbox( + let approval = request_retry_without_sandbox( session, - &context.sub_id, - &context.call_id, - request.approval_command.clone(), + format!("Execution failed: {sandbox_error}"), + &request.approval_command, request.params.cwd.clone(), - Some("command failed; retry without sandbox?".to_string()), - &context.tool_name, - &context.otel_event_manager, + RetrySandboxContext { + sub_id: &context.sub_id, + call_id: &context.call_id, + tool_name: &context.tool_name, + otel_event_manager: &context.otel_event_manager, + }, ) .await; - match decision { - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { - if matches!(decision, ReviewDecision::ApprovedForSession) { - self.approval_cache.insert(request.approval_command.clone()); - } - session - .notify_background_event(&context.sub_id, "retrying command without sandbox") - .await; - - let retry_output = self - .spawn( - request.params.clone(), - SandboxType::None, - config, - stdout_stream, - ) - .await?; - - Ok(retry_output) - } - ReviewDecision::Denied | ReviewDecision::Abort => { - Err(ExecError::rejection("exec command rejected by user")) - } + if approval.is_some() { + let retry_output = self + .spawn( + request.params.clone(), + SandboxType::None, + config, + stdout_stream, + ) + .await?; + + Ok(retry_output) + } else { + Err(ExecError::rejection("exec command rejected by user")) } } diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index a7e26d2c59..349c72c15b 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -92,6 +92,13 @@ pub(crate) fn build_launch_for_sandbox( } } +pub(crate) struct RetrySandboxContext<'a> { + pub sub_id: &'a str, + pub call_id: &'a str, + pub tool_name: &'a str, + pub otel_event_manager: &'a OtelEventManager, +} + /// Sandbox placement options selected for an execution run, including whether /// to escalate after failures and whether approvals should persist. pub(crate) struct SandboxDecision { @@ -130,27 +137,49 @@ fn should_escalate_on_failure(approval: AskForApproval, sandbox: SandboxType) -> pub(crate) async fn request_retry_without_sandbox( session: &Session, - sub_id: &str, - call_id: &str, - approval_command: Vec, + failure_message: impl Into, + command: &[String], cwd: PathBuf, - prompt: Option, - tool_name: &str, - otel_event_manager: &OtelEventManager, -) -> ReviewDecision { + ctx: RetrySandboxContext<'_>, +) -> Option { + session + .notify_background_event(ctx.sub_id, failure_message.into()) + .await; + + let approval_command = command.to_vec(); let decision = session .request_command_approval( - sub_id.to_string(), - call_id.to_string(), - approval_command, + ctx.sub_id.to_string(), + ctx.call_id.to_string(), + approval_command.clone(), cwd, - prompt, + Some("command failed; retry without sandbox?".to_string()), ) .await; - otel_event_manager.tool_decision(tool_name, call_id, decision, ToolDecisionSource::User); + ctx.otel_event_manager.tool_decision( + ctx.tool_name, + ctx.call_id, + decision, + ToolDecisionSource::User, + ); + + match decision { + ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { + if matches!(decision, ReviewDecision::ApprovedForSession) { + session + .services + .executor + .record_session_approval(approval_command); + } - decision + session + .notify_background_event(ctx.sub_id, "retrying command without sandbox") + .await; + Some(decision) + } + ReviewDecision::Denied | ReviewDecision::Abort => None, + } } /// Determines how a command should be sandboxed, prompting the user when diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 4fc85388ad..66326ad712 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -21,15 +21,14 @@ use tokio::time::Instant; use crate::codex::Session; use crate::codex::TurnContext; use crate::exec::ExecParams; -use crate::exec::SandboxType; use crate::exec_command::ExecCommandSession; use crate::executor::ExecutionMode; use crate::executor::ExecutionRequest; +use crate::executor::RetrySandboxContext; use crate::executor::SandboxLaunch; use crate::executor::build_launch_for_sandbox; use crate::executor::request_retry_without_sandbox; use crate::executor::select_sandbox; -use crate::protocol::ReviewDecision; use crate::truncate::truncate_middle; mod errors; @@ -234,71 +233,34 @@ impl UnifiedExecSessionManager { match create_unified_exec_session(&launch).await { Ok(result) => Ok(result), Err(err) if sandbox_decision.escalate_on_failure => { - self.retry_without_sandbox(&command, context, err, &otel_event_manager) - .await - } - Err(err) => Err(err), - } - } - - async fn retry_without_sandbox( - &self, - command: &[String], - context: &UnifiedExecContext<'_>, - previous_error: UnifiedExecError, - otel_event_manager: &codex_otel::otel_event_manager::OtelEventManager, - ) -> Result< - ( - ExecCommandSession, - tokio::sync::broadcast::Receiver>, - ), - UnifiedExecError, - > { - context - .session - .notify_background_event( - context.sub_id, - format!("Execution failed: {previous_error}"), - ) - .await; + let approval = request_retry_without_sandbox( + context.session, + format!("Execution failed: {err}"), + &command, + context.turn.cwd.clone(), + RetrySandboxContext { + sub_id: context.sub_id, + call_id: context.call_id, + tool_name: context.tool_name, + otel_event_manager: &otel_event_manager, + }, + ) + .await; - let decision = request_retry_without_sandbox( - context.session, - context.sub_id, - context.call_id, - command.to_vec(), - context.turn.cwd.clone(), - Some("command failed; retry without sandbox?".to_string()), - context.tool_name, - otel_event_manager, - ) - .await; - - match decision { - ReviewDecision::Approved | ReviewDecision::ApprovedForSession => { - if matches!(decision, ReviewDecision::ApprovedForSession) { - context - .session - .services - .executor - .record_session_approval(command.to_vec()); + if approval.is_some() { + let retry_launch = build_launch_for_sandbox( + crate::exec::SandboxType::None, + &command, + &context.turn.sandbox_policy, + &context.turn.cwd, + None, + )?; + create_unified_exec_session(&retry_launch).await + } else { + Err(UnifiedExecError::UserRejected) } - - context - .session - .notify_background_event(context.sub_id, "retrying command without sandbox") - .await; - - let launch = build_launch_for_sandbox( - SandboxType::None, - command, - &context.turn.sandbox_policy, - &context.turn.cwd, - None, - )?; - create_unified_exec_session(&launch).await } - ReviewDecision::Denied | ReviewDecision::Abort => Err(UnifiedExecError::UserRejected), + Err(err) => Err(err), } } @@ -602,7 +564,7 @@ mod tests { .handle_request( request, UnifiedExecContext { - session: &session, + session, turn: turn.as_ref(), sub_id: "sub", call_id: "call", From 30ae983c3a84188ad40b3f2cfd7dd39d93efed71 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Thu, 9 Oct 2025 14:21:11 +0100 Subject: [PATCH 04/15] Fix 1 --- codex-rs/core/src/tools/handlers/unified_exec.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 8665eeba91..a0b647e5fb 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -84,6 +84,11 @@ impl ToolHandler for UnifiedExecHandler { timeout_ms, }; + session + .services + .executor + .update_environment(turn.sandbox_policy.clone(), turn.cwd.clone()); + let value = session .services .unified_exec_manager From f5c626b2e0e57e6457c78d645c7545e25c58ac78 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Thu, 9 Oct 2025 14:31:47 +0100 Subject: [PATCH 05/15] Fix 2 --- codex-rs/core/src/unified_exec/mod.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 66326ad712..aae1accb38 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -179,16 +179,17 @@ impl UnifiedExecSessionManager { ), UnifiedExecError, > { + let approval_command = command; let execution_request = ExecutionRequest { params: ExecParams { - command: command.clone(), + command: approval_command.clone(), cwd: context.turn.cwd.clone(), timeout_ms: None, env: HashMap::new(), with_escalated_permissions: None, justification: None, }, - approval_command: command.clone(), + approval_command, mode: ExecutionMode::Shell, stdout_stream: None, use_shell_profile: false, @@ -224,7 +225,7 @@ impl UnifiedExecSessionManager { let launch = build_launch_for_sandbox( sandbox_decision.initial_sandbox, - &command, + &execution_request.approval_command, &context.turn.sandbox_policy, &context.turn.cwd, codex_linux_sandbox_exe.as_ref(), @@ -236,7 +237,7 @@ impl UnifiedExecSessionManager { let approval = request_retry_without_sandbox( context.session, format!("Execution failed: {err}"), - &command, + &execution_request.approval_command, context.turn.cwd.clone(), RetrySandboxContext { sub_id: context.sub_id, @@ -250,7 +251,7 @@ impl UnifiedExecSessionManager { if approval.is_some() { let retry_launch = build_launch_for_sandbox( crate::exec::SandboxType::None, - &command, + &execution_request.approval_command, &context.turn.sandbox_policy, &context.turn.cwd, None, From b3cf2a4b41a91832d7d38fa77c82cc78ff7df693 Mon Sep 17 00:00:00 2001 From: jimmyfraiture Date: Thu, 9 Oct 2025 14:37:45 +0100 Subject: [PATCH 06/15] Fix 3 --- codex-rs/core/src/unified_exec/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index aae1accb38..19fdfe1fda 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -525,15 +525,22 @@ async fn create_unified_exec_session( #[cfg(test)] mod tests { use super::*; + #[cfg(unix)] use crate::codex::Session; + #[cfg(unix)] use crate::codex::TurnContext; + #[cfg(unix)] use crate::codex::make_session_and_context; + #[cfg(unix)] use crate::protocol::AskForApproval; + #[cfg(unix)] use crate::protocol::SandboxPolicy; #[cfg(unix)] use core_test_support::skip_if_sandbox; + #[cfg(unix)] use std::sync::Arc; + #[cfg(unix)] fn test_session_and_turn() -> (Arc, Arc) { let (session, mut turn) = make_session_and_context(); turn.approval_policy = AskForApproval::Never; @@ -545,6 +552,7 @@ mod tests { (Arc::new(session), Arc::new(turn)) } + #[cfg(unix)] async fn run_unified_exec_request( session: &Arc, turn: &Arc, From f61c7aa1600f6e0674621b403eef73a0a06e501b Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 11:15:52 +0100 Subject: [PATCH 07/15] RV1 --- codex-rs/core/src/executor/mod.rs | 5 +- codex-rs/core/src/executor/runner.rs | 220 ++++++++++++++++---------- codex-rs/core/src/unified_exec/mod.rs | 98 ++++-------- 3 files changed, 172 insertions(+), 151 deletions(-) diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index ac312b41f0..ef5e8b06f5 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -4,16 +4,13 @@ mod runner; mod sandbox; pub(crate) use backends::ExecutionMode; +pub(crate) use runner::ExecutionPlan; pub(crate) use runner::ExecutionRequest; pub(crate) use runner::Executor; pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; -pub(crate) use sandbox::RetrySandboxContext; pub(crate) use sandbox::SandboxLaunch; pub(crate) use sandbox::SandboxLaunchError; -pub(crate) use sandbox::build_launch_for_sandbox; -pub(crate) use sandbox::request_retry_without_sandbox; -pub(crate) use sandbox::select_sandbox; pub(crate) mod linkers { use crate::exec::ExecParams; diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index e5daf0399e..3be8b8a3df 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -1,4 +1,3 @@ -use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; use std::sync::RwLock; @@ -19,7 +18,10 @@ use crate::exec::StreamOutput; use crate::exec::process_exec_tool_call; use crate::executor::errors::ExecError; use crate::executor::sandbox::RetrySandboxContext; -use crate::executor::sandbox::request_retry_without_sandbox; +use crate::executor::sandbox::SandboxDecision; +use crate::executor::sandbox::SandboxLaunch; +use crate::executor::sandbox::SandboxLaunchError; +use crate::executor::sandbox::build_launch_for_sandbox; use crate::executor::sandbox::select_sandbox; use crate::function_tool::FunctionCallError; use crate::protocol::AskForApproval; @@ -46,9 +48,85 @@ impl ExecutorConfig { codex_linux_sandbox_exe, } } +} + +pub(crate) struct ExecutionPlan { + request: ExecutionRequest, + config: ExecutorConfig, + sandbox_decision: SandboxDecision, + stdout_stream: Option, + context: ExecCommandContext, +} + +impl ExecutionPlan { + pub(crate) fn request(&self) -> &ExecutionRequest { + &self.request + } + + pub(crate) fn config(&self) -> &ExecutorConfig { + &self.config + } + + pub(crate) fn stdout_stream(&self) -> Option { + self.stdout_stream.clone() + } + + pub(crate) fn initial_sandbox(&self) -> SandboxType { + self.sandbox_decision.initial_sandbox + } - pub(crate) fn codex_linux_sandbox_exe(&self) -> Option { - self.codex_linux_sandbox_exe.clone() + pub(crate) fn should_retry_without_sandbox(&self) -> bool { + self.sandbox_decision.escalate_on_failure + } + + pub(crate) fn initial_launch(&self) -> Result { + build_launch_for_sandbox( + self.sandbox_decision.initial_sandbox, + &self.request.params.command, + &self.config.sandbox_policy, + &self.config.sandbox_cwd, + self.config.codex_linux_sandbox_exe.as_ref(), + ) + } + + pub(crate) fn retry_launch(&self) -> Result { + build_launch_for_sandbox( + SandboxType::None, + &self.request.params.command, + &self.config.sandbox_policy, + &self.config.sandbox_cwd, + None, + ) + } + + pub(crate) fn approval_command(&self) -> &[String] { + &self.request.approval_command + } + + pub(crate) async fn prompt_retry_without_sandbox( + &self, + session: &Session, + failure_message: impl Into, + ) -> bool { + if !self.should_retry_without_sandbox() { + return false; + } + + let approval = crate::executor::sandbox::request_retry_without_sandbox( + session, + failure_message.into(), + self.approval_command(), + self.request.params.cwd.clone(), + RetrySandboxContext { + sub_id: &self.context.sub_id, + call_id: &self.context.call_id, + tool_name: &self.context.tool_name, + otel_event_manager: &self.context.otel_event_manager, + }, + ) + .await; + + approval.is_some() } } @@ -67,43 +145,22 @@ impl Executor { } } - pub(crate) fn approval_cache_snapshot(&self) -> HashSet> { - self.approval_cache.snapshot() - } - pub(crate) fn record_session_approval(&self, command: Vec) { self.approval_cache.insert(command); } - pub(crate) fn config_snapshot(&self) -> Option { - self.config.read().ok().map(|cfg| cfg.clone()) - } - - /// Updates the sandbox policy and working directory used for future - /// executions without recreating the executor. - pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) { - if let Ok(mut cfg) = self.config.write() { - cfg.sandbox_policy = sandbox_policy; - cfg.sandbox_cwd = sandbox_cwd; - } - } - - /// Runs a prepared execution request end-to-end: prepares parameters, decides on - /// sandbox placement (prompting the user when necessary), launches the command, - /// and lets the backend post-process the final output. - pub(crate) async fn run( + pub(crate) async fn prepare_execution_plan( &self, mut request: ExecutionRequest, session: &Session, approval_policy: AskForApproval, context: &ExecCommandContext, - ) -> Result { + ) -> Result { if matches!(request.mode, ExecutionMode::Shell) { request.params = maybe_translate_shell_command(request.params, session, request.use_shell_profile); } - // Step 1: Normalise parameters via the selected backend. let backend = backend_for_mode(&request.mode); let stdout_stream = if backend.stream_stdout(&request.mode) { request.stdout_stream.clone() @@ -114,14 +171,12 @@ impl Executor { .prepare(request.params, &request.mode) .map_err(ExecError::from)?; - // Step 2: Snapshot sandbox configuration so it stays stable for this run. let config = self .config .read() .map_err(|_| ExecError::rejection("executor config poisoned"))? .clone(); - // Step 3: Decide sandbox placement, prompting for approval when needed. let sandbox_decision = select_sandbox( &request, approval_policy, @@ -137,33 +192,70 @@ impl Executor { self.approval_cache.insert(request.approval_command.clone()); } - // Step 4: Launch the command within the chosen sandbox. + Ok(ExecutionPlan { + request, + config, + sandbox_decision, + stdout_stream, + context: context.clone(), + }) + } + + /// Updates the sandbox policy and working directory used for future + /// executions without recreating the executor. + pub(crate) fn update_environment(&self, sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf) { + if let Ok(mut cfg) = self.config.write() { + cfg.sandbox_policy = sandbox_policy; + cfg.sandbox_cwd = sandbox_cwd; + } + } + + /// Runs a prepared execution request end-to-end: prepares parameters, decides on + /// sandbox placement (prompting the user when necessary), launches the command, + /// and lets the backend post-process the final output. + pub(crate) async fn run( + &self, + request: ExecutionRequest, + session: &Session, + approval_policy: AskForApproval, + context: &ExecCommandContext, + ) -> Result { + let plan = self + .prepare_execution_plan(request, session, approval_policy, context) + .await?; + + let stdout_stream = plan.stdout_stream(); let first_attempt = self .spawn( - request.params.clone(), - sandbox_decision.initial_sandbox, - &config, + plan.request().params.clone(), + plan.initial_sandbox(), + plan.config(), stdout_stream.clone(), ) .await; - // Step 5: Handle sandbox outcomes, optionally escalating to an unsandboxed retry. match first_attempt { Ok(output) => Ok(output), Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => { Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into()) } Err(CodexErr::Sandbox(error)) => { - if sandbox_decision.escalate_on_failure { - self.retry_without_sandbox( - &request, - &config, - session, - context, - stdout_stream, - error, - ) - .await + if plan.should_retry_without_sandbox() { + if plan + .prompt_retry_without_sandbox(session, format!("Execution failed: {error}")) + .await + { + self.spawn( + plan.request().params.clone(), + SandboxType::None, + plan.config(), + stdout_stream, + ) + .await + .map_err(ExecError::from) + } else { + Err(ExecError::rejection("exec command rejected by user")) + } } else { let message = sandbox_failure_message(error); Err(ExecError::rejection(message)) @@ -173,46 +265,6 @@ impl Executor { } } - /// Fallback path invoked when a sandboxed run is denied so the user can - /// approve rerunning without isolation. - async fn retry_without_sandbox( - &self, - request: &ExecutionRequest, - config: &ExecutorConfig, - session: &Session, - context: &ExecCommandContext, - stdout_stream: Option, - sandbox_error: SandboxErr, - ) -> Result { - let approval = request_retry_without_sandbox( - session, - format!("Execution failed: {sandbox_error}"), - &request.approval_command, - request.params.cwd.clone(), - RetrySandboxContext { - sub_id: &context.sub_id, - call_id: &context.call_id, - tool_name: &context.tool_name, - otel_event_manager: &context.otel_event_manager, - }, - ) - .await; - if approval.is_some() { - let retry_output = self - .spawn( - request.params.clone(), - SandboxType::None, - config, - stdout_stream, - ) - .await?; - - Ok(retry_output) - } else { - Err(ExecError::rejection("exec command rejected by user")) - } - } - async fn spawn( &self, params: ExecParams, diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index c0921b452b..c276efce21 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -23,12 +23,10 @@ use crate::codex::TurnContext; use crate::exec::ExecParams; use crate::exec_command::ExecCommandSession; use crate::executor::ExecutionMode; +use crate::executor::ExecutionPlan; use crate::executor::ExecutionRequest; -use crate::executor::RetrySandboxContext; use crate::executor::SandboxLaunch; -use crate::executor::build_launch_for_sandbox; -use crate::executor::request_retry_without_sandbox; -use crate::executor::select_sandbox; +use crate::tools::context::ExecCommandContext; use crate::truncate::truncate_middle; mod errors; @@ -190,10 +188,22 @@ impl UnifiedExecSessionManager { ), UnifiedExecError, > { - let approval_command = command; + let executor = &context.session.services.executor; + let otel_event_manager = context.turn.client.get_otel_event_manager(); + let approval_command = command.clone(); + let exec_context = ExecCommandContext { + sub_id: context.sub_id.to_string(), + call_id: context.call_id.to_string(), + command_for_display: approval_command.clone(), + cwd: context.turn.cwd.clone(), + apply_patch: None, + tool_name: context.tool_name.to_string(), + otel_event_manager, + }; + let execution_request = ExecutionRequest { params: ExecParams { - command: approval_command.clone(), + command, cwd: context.turn.cwd.clone(), timeout_ms: None, env: HashMap::new(), @@ -206,67 +216,29 @@ impl UnifiedExecSessionManager { use_shell_profile: false, }; - let executor = &context.session.services.executor; - let approval_cache = executor.approval_cache_snapshot(); - let config = executor - .config_snapshot() - .ok_or_else(|| UnifiedExecError::create_session(anyhow!("executor config poisoned")))?; - let codex_linux_sandbox_exe = config.codex_linux_sandbox_exe(); - let otel_event_manager = context.turn.client.get_otel_event_manager(); - let sandbox_decision = select_sandbox( - &execution_request, - context.turn.approval_policy, - approval_cache, - &config, - context.session, - context.sub_id, - context.call_id, - &otel_event_manager, - ) - .await - .map_err(|err| UnifiedExecError::create_session(anyhow!(err.to_string())))?; - - if sandbox_decision.record_session_approval { - context - .session - .services - .executor - .record_session_approval(execution_request.approval_command.clone()); - } + let plan: ExecutionPlan = executor + .prepare_execution_plan( + execution_request, + context.session, + context.turn.approval_policy, + &exec_context, + ) + .await + .map_err(|err| UnifiedExecError::create_session(anyhow!(err.to_string())))?; - let launch = build_launch_for_sandbox( - sandbox_decision.initial_sandbox, - &execution_request.approval_command, - &context.turn.sandbox_policy, - &context.turn.cwd, - codex_linux_sandbox_exe.as_ref(), - )?; + let launch = plan.initial_launch()?; match create_unified_exec_session(&launch).await { Ok(result) => Ok(result), - Err(err) if sandbox_decision.escalate_on_failure => { - let approval = request_retry_without_sandbox( - context.session, - format!("Execution failed: {err}"), - &execution_request.approval_command, - context.turn.cwd.clone(), - RetrySandboxContext { - sub_id: context.sub_id, - call_id: context.call_id, - tool_name: context.tool_name, - otel_event_manager: &otel_event_manager, - }, - ) - .await; - - if approval.is_some() { - let retry_launch = build_launch_for_sandbox( - crate::exec::SandboxType::None, - &execution_request.approval_command, - &context.turn.sandbox_policy, - &context.turn.cwd, - None, - )?; + Err(err) if plan.should_retry_without_sandbox() => { + if plan + .prompt_retry_without_sandbox( + context.session, + format!("Execution failed: {err}"), + ) + .await + { + let retry_launch = plan.retry_launch()?; create_unified_exec_session(&retry_launch).await } else { Err(UnifiedExecError::UserRejected) From 703c3825cecf7d0c6d7babd38c686b9d7a929821 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 11:40:07 +0100 Subject: [PATCH 08/15] RV2 --- codex-rs/core/src/exec.rs | 131 +++++++++++------- codex-rs/core/src/executor/mod.rs | 1 + codex-rs/core/src/executor/runner.rs | 49 +++---- .../core/src/tools/handlers/unified_exec.rs | 7 +- codex-rs/core/src/unified_exec/errors.rs | 11 +- codex-rs/core/src/unified_exec/mod.rs | 19 ++- 6 files changed, 115 insertions(+), 103 deletions(-) diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 22d1deeb1c..75e459b102 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -18,13 +18,14 @@ use tokio::process::Child; use crate::error::CodexErr; use crate::error::Result; use crate::error::SandboxErr; -use crate::landlock::spawn_command_under_linux_sandbox; +use crate::executor::SandboxLaunch; +use crate::executor::SandboxLaunchError; +use crate::executor::sandbox::build_launch_for_sandbox; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::ExecCommandOutputDeltaEvent; use crate::protocol::ExecOutputStream; use crate::protocol::SandboxPolicy; -use crate::seatbelt::spawn_command_under_seatbelt; use crate::spawn::StdioPolicy; use crate::spawn::spawn_child_async; @@ -87,57 +88,35 @@ pub async fn process_exec_tool_call( codex_linux_sandbox_exe: &Option, stdout_stream: Option, ) -> Result { - let start = Instant::now(); - - let timeout_duration = params.timeout_duration(); + let launch = build_launch_for_sandbox( + sandbox_type, + ¶ms.command, + sandbox_policy, + sandbox_cwd, + codex_linux_sandbox_exe.as_ref(), + ) + .map_err(sandbox_launch_error_to_codex)?; + execute_sandbox_launch(params, launch, sandbox_type, sandbox_policy, stdout_stream).await +} - let raw_output_result: std::result::Result = match sandbox_type - { - SandboxType::None => exec(params, sandbox_policy, stdout_stream.clone()).await, - SandboxType::MacosSeatbelt => { - let ExecParams { - command, - cwd: command_cwd, - env, - .. - } = params; - let child = spawn_command_under_seatbelt( - command, - command_cwd, - sandbox_policy, - sandbox_cwd, - StdioPolicy::RedirectForShellTool, - env, - ) - .await?; - consume_truncated_output(child, timeout_duration, stdout_stream.clone()).await - } - SandboxType::LinuxSeccomp => { - let ExecParams { - command, - cwd: command_cwd, - env, - .. - } = params; - - let codex_linux_sandbox_exe = codex_linux_sandbox_exe - .as_ref() - .ok_or(CodexErr::LandlockSandboxExecutableNotProvided)?; - let child = spawn_command_under_linux_sandbox( - codex_linux_sandbox_exe, - command, - command_cwd, - sandbox_policy, - sandbox_cwd, - StdioPolicy::RedirectForShellTool, - env, - ) - .await?; - - consume_truncated_output(child, timeout_duration, stdout_stream).await - } - }; +pub(crate) async fn execute_sandbox_launch( + params: ExecParams, + launch: SandboxLaunch, + sandbox_type: SandboxType, + sandbox_policy: &SandboxPolicy, + stdout_stream: Option, +) -> Result { + let start = Instant::now(); + let raw_output_result = spawn_with_launch(params, launch, sandbox_policy, stdout_stream).await; let duration = start.elapsed(); + finalize_exec_result(raw_output_result, sandbox_type, duration) +} + +fn finalize_exec_result( + raw_output_result: std::result::Result, + sandbox_type: SandboxType, + duration: Duration, +) -> Result { match raw_output_result { Ok(raw_output) => { #[allow(unused_mut)] @@ -192,6 +171,56 @@ pub async fn process_exec_tool_call( } } +pub(crate) fn sandbox_launch_error_to_codex(err: SandboxLaunchError) -> CodexErr { + match err { + SandboxLaunchError::MissingCommandLine => CodexErr::Io(io::Error::new( + io::ErrorKind::InvalidInput, + "command args are empty", + )), + SandboxLaunchError::MissingLinuxSandboxExecutable => { + CodexErr::LandlockSandboxExecutableNotProvided + } + } +} + +async fn spawn_with_launch( + params: ExecParams, + launch: SandboxLaunch, + sandbox_policy: &SandboxPolicy, + stdout_stream: Option, +) -> std::result::Result { + let ExecParams { + command: _, + cwd, + timeout_ms, + mut env, + with_escalated_permissions, + justification, + } = params; + + let SandboxLaunch { + program, + args, + env: launch_env, + } = launch; + env.extend(launch_env); + + let mut command = Vec::with_capacity(1 + args.len()); + command.push(program); + command.extend(args); + + let updated_params = ExecParams { + command, + cwd, + timeout_ms, + env, + with_escalated_permissions, + justification, + }; + + exec(updated_params, sandbox_policy, stdout_stream).await +} + /// We don't have a fully deterministic way to tell if our command failed /// because of the sandbox - a command in the user's zshrc file might hit an /// error, but the command itself might fail or succeed for other reasons. diff --git a/codex-rs/core/src/executor/mod.rs b/codex-rs/core/src/executor/mod.rs index ef5e8b06f5..4beb926903 100644 --- a/codex-rs/core/src/executor/mod.rs +++ b/codex-rs/core/src/executor/mod.rs @@ -11,6 +11,7 @@ pub(crate) use runner::ExecutorConfig; pub(crate) use runner::normalize_exec_result; pub(crate) use sandbox::SandboxLaunch; pub(crate) use sandbox::SandboxLaunchError; +pub(crate) use sandbox::build_launch_for_sandbox; pub(crate) mod linkers { use crate::exec::ExecParams; diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 3be8b8a3df..b28042b874 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -15,7 +15,8 @@ use crate::exec::ExecToolCallOutput; use crate::exec::SandboxType; use crate::exec::StdoutStream; use crate::exec::StreamOutput; -use crate::exec::process_exec_tool_call; +use crate::exec::execute_sandbox_launch; +use crate::exec::sandbox_launch_error_to_codex; use crate::executor::errors::ExecError; use crate::executor::sandbox::RetrySandboxContext; use crate::executor::sandbox::SandboxDecision; @@ -225,14 +226,18 @@ impl Executor { .await?; let stdout_stream = plan.stdout_stream(); - let first_attempt = self - .spawn( - plan.request().params.clone(), - plan.initial_sandbox(), - plan.config(), - stdout_stream.clone(), - ) - .await; + let sandbox_policy = plan.config().sandbox_policy.clone(); + let initial_launch = plan + .initial_launch() + .map_err(|err| ExecError::Codex(sandbox_launch_error_to_codex(err)))?; + let first_attempt = execute_sandbox_launch( + plan.request().params.clone(), + initial_launch, + plan.initial_sandbox(), + &sandbox_policy, + stdout_stream.clone(), + ) + .await; match first_attempt { Ok(output) => Ok(output), @@ -245,10 +250,14 @@ impl Executor { .prompt_retry_without_sandbox(session, format!("Execution failed: {error}")) .await { - self.spawn( + let retry_launch = plan + .retry_launch() + .map_err(|err| ExecError::Codex(sandbox_launch_error_to_codex(err)))?; + execute_sandbox_launch( plan.request().params.clone(), + retry_launch, SandboxType::None, - plan.config(), + &sandbox_policy, stdout_stream, ) .await @@ -264,24 +273,6 @@ impl Executor { Err(err) => Err(err.into()), } } - - async fn spawn( - &self, - params: ExecParams, - sandbox: SandboxType, - config: &ExecutorConfig, - stdout_stream: Option, - ) -> Result { - process_exec_tool_call( - params, - sandbox, - &config.sandbox_policy, - &config.sandbox_cwd, - &config.codex_linux_sandbox_exe, - stdout_stream, - ) - .await - } } fn maybe_translate_shell_command( diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index a0b647e5fb..074082771e 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -79,16 +79,10 @@ impl ToolHandler for UnifiedExecHandler { }; let request = UnifiedExecRequest { - session_id: parsed_session_id, input_chunks: &input, timeout_ms, }; - session - .services - .executor - .update_environment(turn.sandbox_policy.clone(), turn.cwd.clone()); - let value = session .services .unified_exec_manager @@ -100,6 +94,7 @@ impl ToolHandler for UnifiedExecHandler { sub_id: &sub_id, call_id: &call_id, tool_name: &tool_name, + session_id: parsed_session_id, }, ) .await diff --git a/codex-rs/core/src/unified_exec/errors.rs b/codex-rs/core/src/unified_exec/errors.rs index d635e5ad85..55ee019661 100644 --- a/codex-rs/core/src/unified_exec/errors.rs +++ b/codex-rs/core/src/unified_exec/errors.rs @@ -3,11 +3,8 @@ use thiserror::Error; #[derive(Debug, Error)] pub(crate) enum UnifiedExecError { - #[error("Failed to create unified exec session: {pty_error}")] - CreateSession { - #[source] - pty_error: anyhow::Error, - }, + #[error("Failed to create unified exec session: {message}")] + CreateSession { message: String }, #[error("Unknown session id {session_id}")] UnknownSessionId { session_id: i32 }, #[error("failed to write to stdin")] @@ -21,8 +18,8 @@ pub(crate) enum UnifiedExecError { } impl UnifiedExecError { - pub(crate) fn create_session(error: anyhow::Error) -> Self { - Self::CreateSession { pty_error: error } + pub(crate) fn create_session(message: String) -> Self { + Self::CreateSession { message } } } diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index c276efce21..cb2b25011a 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -1,4 +1,3 @@ -use anyhow::anyhow; use portable_pty::CommandBuilder; use portable_pty::PtySize; use portable_pty::native_pty_system; @@ -43,11 +42,11 @@ pub(crate) struct UnifiedExecContext<'a> { pub sub_id: &'a str, pub call_id: &'a str, pub tool_name: &'a str, + pub session_id: Option, } #[derive(Debug)] pub(crate) struct UnifiedExecRequest<'a> { - pub session_id: Option, pub input_chunks: &'a [String], pub timeout_ms: Option, } @@ -224,7 +223,7 @@ impl UnifiedExecSessionManager { &exec_context, ) .await - .map_err(|err| UnifiedExecError::create_session(anyhow!(err.to_string())))?; + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; let launch = plan.initial_launch()?; @@ -270,7 +269,7 @@ impl UnifiedExecSessionManager { let output_buffer; let output_notify; - if let Some(existing_id) = request.session_id { + if let Some(existing_id) = context.session_id { let mut sessions = self.sessions.lock().await; match sessions.get(&existing_id) { Some(session) => { @@ -307,7 +306,7 @@ impl UnifiedExecSessionManager { new_session = Some(managed_session); }; - if request.session_id.is_some() { + if context.session_id.is_some() { let joined_input = request.input_chunks.join(" "); if !joined_input.is_empty() && writer_tx.send(joined_input.into_bytes()).await.is_err() { @@ -366,7 +365,7 @@ impl UnifiedExecSessionManager { let should_store_session = if let Some(session) = new_session.as_ref() { !session.has_exited() - } else if request.session_id.is_some() { + } else if context.session_id.is_some() { let mut sessions = self.sessions.lock().await; if let Some(existing) = sessions.get(&session_id) { if existing.has_exited() { @@ -421,7 +420,7 @@ async fn create_unified_exec_session( pixel_width: 0, pixel_height: 0, }) - .map_err(UnifiedExecError::create_session)?; + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; // Safe thanks to the check at the top of the function. let mut command_builder = CommandBuilder::new(launch.program.clone()); @@ -435,7 +434,7 @@ async fn create_unified_exec_session( let mut child = pair .slave .spawn_command(command_builder) - .map_err(UnifiedExecError::create_session)?; + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; let killer = child.clone_killer(); let (writer_tx, mut writer_rx) = mpsc::channel::>(128); @@ -444,7 +443,7 @@ async fn create_unified_exec_session( let mut reader = pair .master .try_clone_reader() - .map_err(UnifiedExecError::create_session)?; + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; let output_tx_clone = output_tx.clone(); let reader_handle = tokio::task::spawn_blocking(move || { let mut buf = [0u8; 8192]; @@ -467,7 +466,7 @@ async fn create_unified_exec_session( let writer = pair .master .take_writer() - .map_err(UnifiedExecError::create_session)?; + .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; let writer = Arc::new(StdMutex::new(writer)); let writer_handle = tokio::spawn({ let writer = writer.clone(); From 1e2f6f21e242d4c7e1034269b0654aa0cf8d959c Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 11:46:06 +0100 Subject: [PATCH 09/15] RV3 --- codex-rs/core/src/exec.rs | 2 +- codex-rs/core/src/unified_exec/mod.rs | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 75e459b102..0e831f1e91 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -20,7 +20,7 @@ use crate::error::Result; use crate::error::SandboxErr; use crate::executor::SandboxLaunch; use crate::executor::SandboxLaunchError; -use crate::executor::sandbox::build_launch_for_sandbox; +use crate::executor::build_launch_for_sandbox; use crate::protocol::Event; use crate::protocol::EventMsg; use crate::protocol::ExecCommandOutputDeltaEvent; diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index cb2b25011a..c60ad64755 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -527,10 +527,6 @@ mod tests { let (session, mut turn) = make_session_and_context(); turn.approval_policy = AskForApproval::Never; turn.sandbox_policy = SandboxPolicy::DangerFullAccess; - session - .services - .executor - .update_environment(turn.sandbox_policy.clone(), turn.cwd.clone()); (Arc::new(session), Arc::new(turn)) } @@ -544,7 +540,6 @@ mod tests { ) -> Result { let request_input = input; let request = UnifiedExecRequest { - session_id, input_chunks: &request_input, timeout_ms, }; @@ -560,6 +555,7 @@ mod tests { sub_id: "sub", call_id: "call", tool_name: "unified_exec", + session_id, }, ) .await From 53f07c304b1bb7b03646eea999b70279a5e9c610 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 11:49:55 +0100 Subject: [PATCH 10/15] RV4 --- codex-rs/core/src/exec.rs | 33 +++++++++++++++++----------- codex-rs/core/src/executor/runner.rs | 5 ++--- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 0e831f1e91..3a0220e49f 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -95,7 +95,7 @@ pub async fn process_exec_tool_call( sandbox_cwd, codex_linux_sandbox_exe.as_ref(), ) - .map_err(sandbox_launch_error_to_codex)?; + .map_err(CodexErr::from)?; execute_sandbox_launch(params, launch, sandbox_type, sandbox_policy, stdout_stream).await } @@ -171,18 +171,6 @@ fn finalize_exec_result( } } -pub(crate) fn sandbox_launch_error_to_codex(err: SandboxLaunchError) -> CodexErr { - match err { - SandboxLaunchError::MissingCommandLine => CodexErr::Io(io::Error::new( - io::ErrorKind::InvalidInput, - "command args are empty", - )), - SandboxLaunchError::MissingLinuxSandboxExecutable => { - CodexErr::LandlockSandboxExecutableNotProvided - } - } -} - async fn spawn_with_launch( params: ExecParams, launch: SandboxLaunch, @@ -221,6 +209,25 @@ async fn spawn_with_launch( exec(updated_params, sandbox_policy, stdout_stream).await } +pub(crate) mod errors { + use super::CodexErr; + use super::SandboxLaunchError; + + impl From for CodexErr { + fn from(err: SandboxLaunchError) -> Self { + match err { + SandboxLaunchError::MissingCommandLine => CodexErr::Io(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "command args are empty", + )), + SandboxLaunchError::MissingLinuxSandboxExecutable => { + CodexErr::LandlockSandboxExecutableNotProvided + } + } + } + } +} + /// We don't have a fully deterministic way to tell if our command failed /// because of the sandbox - a command in the user's zshrc file might hit an /// error, but the command itself might fail or succeed for other reasons. diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index b28042b874..9bb5ce5311 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -16,7 +16,6 @@ use crate::exec::SandboxType; use crate::exec::StdoutStream; use crate::exec::StreamOutput; use crate::exec::execute_sandbox_launch; -use crate::exec::sandbox_launch_error_to_codex; use crate::executor::errors::ExecError; use crate::executor::sandbox::RetrySandboxContext; use crate::executor::sandbox::SandboxDecision; @@ -229,7 +228,7 @@ impl Executor { let sandbox_policy = plan.config().sandbox_policy.clone(); let initial_launch = plan .initial_launch() - .map_err(|err| ExecError::Codex(sandbox_launch_error_to_codex(err)))?; + .map_err(|err| ExecError::Codex(err.into()))?; let first_attempt = execute_sandbox_launch( plan.request().params.clone(), initial_launch, @@ -252,7 +251,7 @@ impl Executor { { let retry_launch = plan .retry_launch() - .map_err(|err| ExecError::Codex(sandbox_launch_error_to_codex(err)))?; + .map_err(|err| ExecError::Codex(err.into()))?; execute_sandbox_launch( plan.request().params.clone(), retry_launch, From b622a1c85081e4b16cf503cb459c4b09e42698aa Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 12:10:32 +0100 Subject: [PATCH 11/15] RV5 --- codex-rs/core/src/unified_exec/mod.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index c60ad64755..e34cf699fe 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -215,6 +215,12 @@ impl UnifiedExecSessionManager { use_shell_profile: false, }; + // Ensure the executor's environment reflects this turn before planning + executor.update_environment( + context.turn.sandbox_policy.clone(), + context.turn.cwd.clone(), + ); + let plan: ExecutionPlan = executor .prepare_execution_plan( execution_request, From a55fc6a88f2b6c26984a4b32f6aa8914927ee93e Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 12:19:53 +0100 Subject: [PATCH 12/15] RV6 --- codex-rs/core/src/executor/runner.rs | 104 +++++++++++++++++--------- codex-rs/core/src/unified_exec/mod.rs | 24 +----- 2 files changed, 71 insertions(+), 57 deletions(-) diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 9bb5ce5311..d0fff17f96 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -1,3 +1,4 @@ +use std::future::Future; use std::path::PathBuf; use std::sync::Arc; use std::sync::RwLock; @@ -128,6 +129,50 @@ impl ExecutionPlan { approval.is_some() } + + /// Runs the provided attempt with the initially selected sandbox. + /// If it fails and policy allows, prompt to retry without sandbox and run again. + pub(crate) async fn attempt_with_retry( + &self, + session: &Session, + attempt: Attempt, + ) -> Result + where + Attempt: Fn(SandboxLaunch) -> Fut, + Fut: Future>, + E: From + std::fmt::Display, + { + self.attempt_with_retry_if(session, attempt, |_| true).await + } + + /// Like `attempt_with_retry`, but only retries if `should_retry(err)` returns true. + pub(crate) async fn attempt_with_retry_if( + &self, + session: &Session, + attempt: Attempt, + should_retry: Should, + ) -> Result + where + Attempt: Fn(SandboxLaunch) -> Fut, + Fut: Future>, + Should: Fn(&E) -> bool, + E: From + std::fmt::Display, + { + let initial_launch = self.initial_launch().map_err(E::from)?; + match attempt(initial_launch).await { + Ok(result) => Ok(result), + Err(err) if self.should_retry_without_sandbox() && should_retry(&err) => { + let failure = format!("Execution failed: {err}"); + if self.prompt_retry_without_sandbox(session, failure).await { + let retry_launch = self.retry_launch().map_err(E::from)?; + attempt(retry_launch).await + } else { + Err(err) + } + } + Err(err) => Err(err), + } + } } /// Coordinates sandbox selection, backend-specific preparation, and command @@ -226,48 +271,33 @@ impl Executor { let stdout_stream = plan.stdout_stream(); let sandbox_policy = plan.config().sandbox_policy.clone(); - let initial_launch = plan - .initial_launch() - .map_err(|err| ExecError::Codex(err.into()))?; - let first_attempt = execute_sandbox_launch( - plan.request().params.clone(), - initial_launch, - plan.initial_sandbox(), - &sandbox_policy, - stdout_stream.clone(), - ) - .await; - match first_attempt { + // Drive attempts via the shared helper, but do not retry on timeouts. + let result: Result = plan + .attempt_with_retry_if( + session, + |launch| { + let params = plan.request().params.clone(); + let sandbox = plan.initial_sandbox(); + let policy = sandbox_policy.clone(); + let stream = stdout_stream.clone(); + async move { execute_sandbox_launch(params, launch, sandbox, &policy, stream).await } + }, + |err: &CodexErr| { !matches!(err, CodexErr::Sandbox(SandboxErr::Timeout { .. })) }, + ) + .await; + + match result { Ok(output) => Ok(output), Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => { - Err(CodexErr::Sandbox(SandboxErr::Timeout { output }).into()) + Err(ExecError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { + output, + }))) } Err(CodexErr::Sandbox(error)) => { - if plan.should_retry_without_sandbox() { - if plan - .prompt_retry_without_sandbox(session, format!("Execution failed: {error}")) - .await - { - let retry_launch = plan - .retry_launch() - .map_err(|err| ExecError::Codex(err.into()))?; - execute_sandbox_launch( - plan.request().params.clone(), - retry_launch, - SandboxType::None, - &sandbox_policy, - stdout_stream, - ) - .await - .map_err(ExecError::from) - } else { - Err(ExecError::rejection("exec command rejected by user")) - } - } else { - let message = sandbox_failure_message(error); - Err(ExecError::rejection(message)) - } + // Convert non-timeout sandbox errors into user-facing rejection messages. + let message = sandbox_failure_message(error); + Err(ExecError::rejection(message)) } Err(err) => Err(err.into()), } diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index e34cf699fe..ebb977424f 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -231,26 +231,10 @@ impl UnifiedExecSessionManager { .await .map_err(|err| UnifiedExecError::create_session(err.to_string()))?; - let launch = plan.initial_launch()?; - - match create_unified_exec_session(&launch).await { - Ok(result) => Ok(result), - Err(err) if plan.should_retry_without_sandbox() => { - if plan - .prompt_retry_without_sandbox( - context.session, - format!("Execution failed: {err}"), - ) - .await - { - let retry_launch = plan.retry_launch()?; - create_unified_exec_session(&retry_launch).await - } else { - Err(UnifiedExecError::UserRejected) - } - } - Err(err) => Err(err), - } + plan.attempt_with_retry(context.session, |launch| async move { + create_unified_exec_session(&launch).await + }) + .await } pub async fn handle_request( From 9865a0cfe127240a0f6c6fbc13da93cd31495267 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 12:26:08 +0100 Subject: [PATCH 13/15] RV7 --- codex-rs/core/src/executor/runner.rs | 2 +- codex-rs/core/src/unified_exec/errors.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index 1f0e83a5a7..f3ade5b34f 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -86,7 +86,7 @@ impl ExecutionPlan { &self.request.params.command, &self.config.sandbox_policy, &self.config.sandbox_cwd, - self.config.codex_linux_sandbox_exe.as_ref(), + self.config.codex_exe.as_ref(), ) } diff --git a/codex-rs/core/src/unified_exec/errors.rs b/codex-rs/core/src/unified_exec/errors.rs index 55ee019661..ba38194c6e 100644 --- a/codex-rs/core/src/unified_exec/errors.rs +++ b/codex-rs/core/src/unified_exec/errors.rs @@ -13,8 +13,6 @@ pub(crate) enum UnifiedExecError { MissingCommandLine, #[error("missing codex-linux-sandbox executable path")] MissingLinuxSandboxExecutable, - #[error("unified exec command rejected by user")] - UserRejected, } impl UnifiedExecError { From b7e834a02b7164dd363bb71e677a080ad46b72e9 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 12:55:22 +0100 Subject: [PATCH 14/15] Fix tests --- codex-rs/core/src/codex.rs | 3 +++ codex-rs/core/src/executor/runner.rs | 22 +++++++++++++++++++++- codex-rs/core/src/executor/sandbox.rs | 20 +++++++++++++++----- codex-rs/core/src/unified_exec/mod.rs | 17 ++--------------- 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 4e6d065366..39a2711f22 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -472,6 +472,7 @@ impl Session { turn_context.sandbox_policy.clone(), turn_context.cwd.clone(), config.codex_linux_sandbox_exe.clone(), + None, )), }; @@ -2761,6 +2762,7 @@ mod tests { turn_context.sandbox_policy.clone(), turn_context.cwd.clone(), None, + None, )), }; let session = Session { @@ -2829,6 +2831,7 @@ mod tests { config.sandbox_policy.clone(), config.cwd.clone(), None, + None, )), }; let session = Arc::new(Session { diff --git a/codex-rs/core/src/executor/runner.rs b/codex-rs/core/src/executor/runner.rs index f3ade5b34f..74536605ef 100644 --- a/codex-rs/core/src/executor/runner.rs +++ b/codex-rs/core/src/executor/runner.rs @@ -34,6 +34,9 @@ use crate::tools::context::ExecCommandContext; pub(crate) struct ExecutorConfig { pub(crate) sandbox_policy: SandboxPolicy, pub(crate) sandbox_cwd: PathBuf, + // Path to codex-linux-sandbox executable (Linux-only). Used by initial_launch when selecting Linux sandbox. + pub(crate) codex_linux_sandbox_exe: Option, + // Path to the codex binary itself (used by apply_patch backend to self-invoke when needed). pub(crate) codex_exe: Option, } @@ -41,16 +44,33 @@ impl ExecutorConfig { pub(crate) fn new( sandbox_policy: SandboxPolicy, sandbox_cwd: PathBuf, + codex_linux_sandbox_exe: Option, codex_exe: Option, ) -> Self { + let codex_exe = codex_exe.or_else(|| derive_codex_exe(&codex_linux_sandbox_exe)); Self { sandbox_policy, sandbox_cwd, + codex_linux_sandbox_exe, codex_exe, } } } +fn derive_codex_exe(sandbox_exe: &Option) -> Option { + sandbox_exe.as_ref().and_then(|path| { + let stem_matches_sandbox = path + .file_stem() + .and_then(|stem| stem.to_str()) + .is_some_and(|stem| stem == "codex-linux-sandbox"); + if stem_matches_sandbox { + None + } else { + Some(path.clone()) + } + }) +} + pub(crate) struct ExecutionPlan { request: ExecutionRequest, config: ExecutorConfig, @@ -86,7 +106,7 @@ impl ExecutionPlan { &self.request.params.command, &self.config.sandbox_policy, &self.config.sandbox_cwd, - self.config.codex_exe.as_ref(), + self.config.codex_linux_sandbox_exe.as_ref(), ) } diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 349c72c15b..313222dd24 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -339,7 +339,7 @@ mod tests { action, user_explicitly_approved_this_action: true, }; - let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); + let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None, None); let request = ExecutionRequest { params: ExecParams { command: vec!["apply_patch".into()], @@ -382,7 +382,12 @@ mod tests { action, user_explicitly_approved_this_action: false, }; - let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); + let cfg = ExecutorConfig::new( + SandboxPolicy::DangerFullAccess, + std::env::temp_dir(), + None, + None, + ); let request = ExecutionRequest { params: ExecParams { command: vec!["apply_patch".into()], @@ -426,7 +431,7 @@ mod tests { action, user_explicitly_approved_this_action: false, }; - let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); + let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None, None); let request = ExecutionRequest { params: ExecParams { command: vec!["apply_patch".into()], @@ -465,7 +470,12 @@ mod tests { #[tokio::test] async fn select_shell_autoapprove_in_danger_mode() { let (session, ctx) = make_session_and_context(); - let cfg = ExecutorConfig::new(SandboxPolicy::DangerFullAccess, std::env::temp_dir(), None); + let cfg = ExecutorConfig::new( + SandboxPolicy::DangerFullAccess, + std::env::temp_dir(), + None, + None, + ); let request = ExecutionRequest { params: ExecParams { command: vec!["some-unknown".into()], @@ -501,7 +511,7 @@ mod tests { #[tokio::test] async fn select_shell_escalates_on_failure_with_platform_sandbox() { let (session, ctx) = make_session_and_context(); - let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None); + let cfg = ExecutorConfig::new(SandboxPolicy::ReadOnly, std::env::temp_dir(), None, None); let request = ExecutionRequest { params: ExecParams { // Unknown command => untrusted but not flagged dangerous diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index ebb977424f..06f76f96bf 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -495,24 +495,18 @@ async fn create_unified_exec_session( } #[cfg(test)] +#[cfg(unix)] mod tests { use super::*; - #[cfg(unix)] + use crate::codex::Session; - #[cfg(unix)] use crate::codex::TurnContext; - #[cfg(unix)] use crate::codex::make_session_and_context; - #[cfg(unix)] use crate::protocol::AskForApproval; - #[cfg(unix)] use crate::protocol::SandboxPolicy; - #[cfg(unix)] use core_test_support::skip_if_sandbox; - #[cfg(unix)] use std::sync::Arc; - #[cfg(unix)] fn test_session_and_turn() -> (Arc, Arc) { let (session, mut turn) = make_session_and_context(); turn.approval_policy = AskForApproval::Never; @@ -520,7 +514,6 @@ mod tests { (Arc::new(session), Arc::new(turn)) } - #[cfg(unix)] async fn run_unified_exec_request( session: &Arc, turn: &Arc, @@ -568,7 +561,6 @@ mod tests { assert_eq!(buffer.chunks.pop_back().unwrap(), vec![b'b']); } - #[cfg(unix)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn unified_exec_persists_across_requests_jif() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); @@ -610,7 +602,6 @@ mod tests { Ok(()) } - #[cfg(unix)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn multi_unified_exec_sessions() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); @@ -662,7 +653,6 @@ mod tests { Ok(()) } - #[cfg(unix)] #[tokio::test] async fn unified_exec_timeouts() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); @@ -712,7 +702,6 @@ mod tests { Ok(()) } - #[cfg(unix)] #[tokio::test] #[ignore] // Ignored while we have a better way to test this. async fn requests_with_large_timeout_are_capped() -> Result<(), UnifiedExecError> { @@ -735,7 +724,6 @@ mod tests { Ok(()) } - #[cfg(unix)] #[tokio::test] #[ignore] // Ignored while we have a better way to test this. async fn completed_commands_do_not_persist_sessions() -> Result<(), UnifiedExecError> { @@ -765,7 +753,6 @@ mod tests { Ok(()) } - #[cfg(unix)] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn reusing_completed_session_returns_unknown_session() -> Result<(), UnifiedExecError> { skip_if_sandbox!(Ok(())); From 65be622e9fac9eb3cb4fda8333ec1be47caac0a0 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Wed, 15 Oct 2025 15:18:26 +0100 Subject: [PATCH 15/15] Fix tests 2 --- codex-rs/core/src/executor/sandbox.rs | 5 ++--- codex-rs/core/src/safety.rs | 28 +++++++++++++-------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/codex-rs/core/src/executor/sandbox.rs b/codex-rs/core/src/executor/sandbox.rs index 313222dd24..eee65a5b4f 100644 --- a/codex-rs/core/src/executor/sandbox.rs +++ b/codex-rs/core/src/executor/sandbox.rs @@ -415,9 +415,8 @@ mod tests { ) .await .expect("ok"); - // On platforms with a sandbox, DangerFullAccess still prefers it - let expected = crate::safety::get_platform_sandbox().unwrap_or(SandboxType::None); - assert_eq!(decision.initial_sandbox, expected); + // DangerFullAccess bypasses sandboxing entirely. + assert_eq!(decision.initial_sandbox, SandboxType::None); assert_eq!(decision.escalate_on_failure, false); } diff --git a/codex-rs/core/src/safety.rs b/codex-rs/core/src/safety.rs index 0ed0f929ff..bf5c2c1319 100644 --- a/codex-rs/core/src/safety.rs +++ b/codex-rs/core/src/safety.rs @@ -55,23 +55,23 @@ pub fn assess_patch_safety( if is_write_patch_constrained_to_writable_paths(action, sandbox_policy, cwd) || policy == AskForApproval::OnFailure { - // Only auto‑approve when we can actually enforce a sandbox. Otherwise - // fall back to asking the user because the patch may touch arbitrary - // paths outside the project. - match get_platform_sandbox() { - Some(sandbox_type) => SafetyCheck::AutoApprove { - sandbox_type, + if matches!(sandbox_policy, SandboxPolicy::DangerFullAccess) { + // DangerFullAccess is intended to bypass sandboxing entirely. + SafetyCheck::AutoApprove { + sandbox_type: SandboxType::None, user_explicitly_approved: false, - }, - None if sandbox_policy == &SandboxPolicy::DangerFullAccess => { - // If the user has explicitly requested DangerFullAccess, then - // we can auto-approve even without a sandbox. - SafetyCheck::AutoApprove { - sandbox_type: SandboxType::None, + } + } else { + // Only auto‑approve when we can actually enforce a sandbox. Otherwise + // fall back to asking the user because the patch may touch arbitrary + // paths outside the project. + match get_platform_sandbox() { + Some(sandbox_type) => SafetyCheck::AutoApprove { + sandbox_type, user_explicitly_approved: false, - } + }, + None => SafetyCheck::AskUser, } - None => SafetyCheck::AskUser, } } else if policy == AskForApproval::Never { SafetyCheck::Reject {