diff --git a/nexus/mgs-updates/tests/host_phase1_hash.rs b/nexus/mgs-updates/tests/host_phase1_hash.rs index 0723c9f2e2..cbb87126db 100644 --- a/nexus/mgs-updates/tests/host_phase1_hash.rs +++ b/nexus/mgs-updates/tests/host_phase1_hash.rs @@ -19,6 +19,7 @@ use omicron_test_utils::dev::poll::CondCheckError; use omicron_test_utils::dev::poll::wait_for_condition; use sha2::Digest as _; use sha2::Sha256; +use sp_sim::HostFlashHashPolicy; use sp_sim::SimulatedSp; use std::time::Duration; use uuid::Uuid; @@ -79,6 +80,14 @@ async fn test_host_phase1_hashing() { sp_component, }; + // We want explicit (i.e., not-timer-based) control over when hashing + // completes. + let hashing_complete_sender = { + let (policy, sender) = HostFlashHashPolicy::channel(); + sp_sim.set_phase1_hash_policy(policy).await; + sender + }; + // We haven't yet started hashing; we should get the error we expect for // both slots. for firmware_slot in [0, 1] { @@ -97,11 +106,6 @@ async fn test_host_phase1_hashing() { } } - // We want explicit (i.e., not-timer-based) control over when hashing - // completes. - let hashing_complete_sender = - sp_sim.set_phase1_hash_policy_explicit_control().await; - // Start hashing firmware slot 0. mgs_client .sp_component_hash_firmware_start(sp_type, sp_slot, sp_component, 0) diff --git a/sp-sim/src/gimlet.rs b/sp-sim/src/gimlet.rs index 4dc2e19d20..e1f4773578 100644 --- a/sp-sim/src/gimlet.rs +++ b/sp-sim/src/gimlet.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::HostFlashHashCompletionSender; +use crate::HostFlashHashPolicy; use crate::Responsiveness; use crate::SimulatedSp; use crate::config::GimletConfig; @@ -207,7 +207,11 @@ impl SimulatedSp for Gimlet { } impl Gimlet { - pub async fn spawn(gimlet: &GimletConfig, log: Logger) -> Result { + pub async fn spawn( + gimlet: &GimletConfig, + phase1_hash_policy: HostFlashHashPolicy, + log: Logger, + ) -> Result { info!(log, "setting up simulated gimlet"); let attached_mgs = Arc::new(Mutex::new(None)); @@ -265,6 +269,7 @@ impl Gimlet { let mut update_state = SimSpUpdate::new( BaseboardKind::Gimlet, gimlet.common.no_stage0_caboose, + phase1_hash_policy, ); let ereport_state = { let mut cfg = gimlet.common.ereport_config.clone(); @@ -406,23 +411,20 @@ impl Gimlet { *self.last_request_handled.lock().unwrap() } - /// Instead of host phase 1 hashing completing after a few seconds, return a - /// handle that can be used to explicitly trigger completion. + /// Set the policy for simulating host phase 1 flash hashing. /// /// # Panics /// /// Panics if this `Gimlet` was created with only an RoT instead of a full /// SP + RoT complex. - pub async fn set_phase1_hash_policy_explicit_control( - &self, - ) -> HostFlashHashCompletionSender { + pub async fn set_phase1_hash_policy(&self, policy: HostFlashHashPolicy) { self.handler .as_ref() .expect("gimlet was created with SP config") .lock() .await .update_state - .set_phase1_hash_policy_explicit_control() + .set_phase1_hash_policy(policy) } } diff --git a/sp-sim/src/lib.rs b/sp-sim/src/lib.rs index d439f9477f..a4d0a9082e 100644 --- a/sp-sim/src/lib.rs +++ b/sp-sim/src/lib.rs @@ -27,6 +27,7 @@ use std::net::SocketAddrV6; use tokio::sync::mpsc; use tokio::sync::watch; pub use update::HostFlashHashCompletionSender; +pub use update::HostFlashHashPolicy; pub const SIM_ROT_BOARD: &str = "SimRot"; pub const SIM_ROT_STAGE0_BOARD: &str = "SimRotStage0"; @@ -156,6 +157,11 @@ impl SimRack { gimlets.push( Gimlet::spawn( gimlet, + // We could expose this in the config file if we want + // callers to be able configure timer-based hashing instead? + // For now, just use the fastest version (assume contents + // are always hashed). + HostFlashHashPolicy::assume_already_hashed(), log.new(slog::o!("slot" => format!("gimlet {}", i))), ) .await?, diff --git a/sp-sim/src/sidecar.rs b/sp-sim/src/sidecar.rs index f0be739afc..73abe51dbd 100644 --- a/sp-sim/src/sidecar.rs +++ b/sp-sim/src/sidecar.rs @@ -2,6 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::HostFlashHashPolicy; use crate::Responsiveness; use crate::SimulatedSp; use crate::config::Config; @@ -531,6 +532,8 @@ impl Handler { update_state: SimSpUpdate::new( BaseboardKind::Sidecar, no_stage0_caboose, + // sidecar doesn't have phase 1 flash; any policy is fine + HostFlashHashPolicy::assume_already_hashed(), ), reset_pending: None, should_fail_to_respond_signal: None, diff --git a/sp-sim/src/update.rs b/sp-sim/src/update.rs index 032f1491c5..92e582f5b1 100644 --- a/sp-sim/src/update.rs +++ b/sp-sim/src/update.rs @@ -29,10 +29,6 @@ use sha3::Digest; use sha3::Sha3_256; use tokio::sync::mpsc; -// How long do we take to hash host flash? Real SPs take a handful of seconds; -// we'll pick something similar. -const TIME_TO_HASH_HOST_PHASE_1: Duration = Duration::from_secs(5); - pub(crate) struct SimSpUpdate { /// tracks the state of any ongoing simulated update /// @@ -53,7 +49,7 @@ pub(crate) struct SimSpUpdate { /// us to default to `TIME_TO_HASH_HOST_PHASE_1` (e.g., for running sp-sim /// as a part of `omicron-dev`) while giving tests that want explicit /// control the ability to precisely trigger completion of hashing. - phase1_hash_policy: HostFlashHashPolicy, + phase1_hash_policy: HostFlashHashPolicyInner, /// records whether a change to the stage0 "active slot" has been requested pending_stage0_update: bool, @@ -78,6 +74,7 @@ impl SimSpUpdate { pub(crate) fn new( baseboard_kind: BaseboardKind, no_stage0_caboose: bool, + phase1_hash_policy: HostFlashHashPolicy, ) -> Self { const SP_GITC0: &str = "ffffffff"; const SP_GITC1: &str = "fefefefe"; @@ -194,9 +191,7 @@ impl SimSpUpdate { last_rot_update_data: None, last_host_phase1_update_data: BTreeMap::new(), phase1_hash_state: BTreeMap::new(), - phase1_hash_policy: HostFlashHashPolicy::Timer( - TIME_TO_HASH_HOST_PHASE_1, - ), + phase1_hash_policy: phase1_hash_policy.0, pending_stage0_update: false, @@ -211,14 +206,11 @@ impl SimSpUpdate { } } - /// Instead of host phase 1 hashing completing after a few seconds, return a - /// handle that can be used to explicitly trigger completion. - pub(crate) fn set_phase1_hash_policy_explicit_control( + pub(crate) fn set_phase1_hash_policy( &mut self, - ) -> HostFlashHashCompletionSender { - let (tx, rx) = mpsc::unbounded_channel(); - self.phase1_hash_policy = HostFlashHashPolicy::Channel(rx); - HostFlashHashCompletionSender(tx) + policy: HostFlashHashPolicy, + ) { + self.phase1_hash_policy = policy.0; } pub(crate) fn sp_update_prepare( @@ -493,28 +485,23 @@ impl SimSpUpdate { &mut self, slot: u16, ) -> Result<(), SpError> { + self.check_host_flash_state_and_policy(slot); match self .phase1_hash_state - .entry(slot) - .or_insert(HostFlashHashState::NeverHashed) + .get_mut(&slot) + .expect("check_host_flash_state_and_policy always inserts") { - // No current hash; record our start time so we can emulate hashing - // taking a few seconds. + // Already hashed; this is a no-op. + HostFlashHashState::Hashed(_) => Ok(()), + // No current hash; record our start time. state @ (HostFlashHashState::NeverHashed | HostFlashHashState::HashInvalidated) => { *state = HostFlashHashState::HashStarted(Instant::now()); Ok(()) } - // Already hashed; this is a no-op. - HostFlashHashState::Hashed(_) => Ok(()), - // Still hashing; check and see if it's done. This is either an - // error (if we're still hashing) or a no-op (if we're done). - HostFlashHashState::HashStarted(started) => { - let started = *started; - self.finalize_host_flash_hash_if_sufficient_time_elapsed( - slot, started, - )?; - Ok(()) + // Still hashing; this is an error. + HostFlashHashState::HashStarted(_) => { + Err(SpError::Hf(HfError::HashInProgress)) } } } @@ -523,51 +510,68 @@ impl SimSpUpdate { &mut self, slot: u16, ) -> Result<[u8; 32], SpError> { + self.check_host_flash_state_and_policy(slot); match self .phase1_hash_state - .entry(slot) - .or_insert(HostFlashHashState::NeverHashed) + .get_mut(&slot) + .expect("check_host_flash_state_and_policy always inserts") { + HostFlashHashState::Hashed(hash) => Ok(*hash), HostFlashHashState::NeverHashed => { Err(SpError::Hf(HfError::HashUncalculated)) } - HostFlashHashState::HashStarted(started) => { - let started = *started; - self.finalize_host_flash_hash_if_sufficient_time_elapsed( - slot, started, - ) + HostFlashHashState::HashStarted(_) => { + Err(SpError::Hf(HfError::HashInProgress)) } - HostFlashHashState::Hashed(hash) => Ok(*hash), HostFlashHashState::HashInvalidated => { Err(SpError::Hf(HfError::RecalculateHash)) } } } - fn finalize_host_flash_hash_if_sufficient_time_elapsed( - &mut self, - slot: u16, - started: Instant, - ) -> Result<[u8; 32], SpError> { - match &mut self.phase1_hash_policy { - HostFlashHashPolicy::Timer(duration) => { - if started.elapsed() < *duration { - return Err(SpError::Hf(HfError::HashInProgress)); - } - } - HostFlashHashPolicy::Channel(rx) => { - if rx.try_recv().is_err() { - return Err(SpError::Hf(HfError::HashInProgress)); - } - } + fn check_host_flash_state_and_policy(&mut self, slot: u16) { + let state = self + .phase1_hash_state + .entry(slot) + .or_insert(HostFlashHashState::NeverHashed); + + // If we've already hashed this slot, we're done. + if matches!(state, HostFlashHashState::Hashed(_)) { + return; } - let data = self.last_host_phase1_update_data(slot); - let data = data.as_deref().unwrap_or(&[]); - let hash = Sha256::digest(&data).into(); - self.phase1_hash_state.insert(slot, HostFlashHashState::Hashed(hash)); + // Should we hash the flash now? It depends on our state + policy. + let should_hash = match (&mut self.phase1_hash_policy, state) { + // If we want to always assume contents are hashed, compute that + // hash _unless_ the contents have changed (in which case a client + // would need to send us an explicit "start hashing" request). + ( + HostFlashHashPolicyInner::AssumeAlreadyHashed, + HostFlashHashState::HashInvalidated, + ) => false, + (HostFlashHashPolicyInner::AssumeAlreadyHashed, _) => true, + // If we're timer based, only hash if the timer has elapsed. + ( + HostFlashHashPolicyInner::Timer(timeout), + HostFlashHashState::HashStarted(started), + ) => *timeout >= started.elapsed(), + (HostFlashHashPolicyInner::Timer(_), _) => false, + // If we're channel based, only hash if we've gotten a request to + // start hashing and there's a message in the channel. + ( + HostFlashHashPolicyInner::Channel(rx), + HostFlashHashState::HashStarted(_), + ) => rx.try_recv().is_ok(), + (HostFlashHashPolicyInner::Channel(_), _) => false, + }; - Ok(hash) + if should_hash { + let data = self.last_host_phase1_update_data(slot); + let data = data.as_deref().unwrap_or(&[]); + let hash = Sha256::digest(&data).into(); + self.phase1_hash_state + .insert(slot, HostFlashHashState::Hashed(hash)); + } } pub(crate) fn get_component_caboose_value( @@ -784,14 +788,48 @@ fn fake_fwid_compute(data: &[u8]) -> Fwid { #[derive(Debug, Clone, Copy)] enum HostFlashHashState { + Hashed([u8; 32]), NeverHashed, HashStarted(Instant), - Hashed([u8; 32]), HashInvalidated, } +/// Policy controlling how `sp-sim` behaves when asked to flash its host phase 1 +/// contents. +#[derive(Debug)] +pub struct HostFlashHashPolicy(HostFlashHashPolicyInner); + +impl HostFlashHashPolicy { + /// Always return computed hashes when asked. + /// + /// This emulates an SP that has previously computed its phase 1 flash hash + /// and whose contents haven't changed. Most Nexus tests should use this + /// policy by default to allow inventory collections to complete promptly. + pub fn assume_already_hashed() -> Self { + Self(HostFlashHashPolicyInner::AssumeAlreadyHashed) + } + + /// Return `HashInProgress` for `timeout` after hashing has started before + /// completing it successfully. + pub fn timer(timeout: Duration) -> Self { + Self(HostFlashHashPolicyInner::Timer(timeout)) + } + + /// Returns a channel that allows the caller to control when hashing + /// completes. + pub fn channel() -> (Self, HostFlashHashCompletionSender) { + let (tx, rx) = mpsc::unbounded_channel(); + ( + Self(HostFlashHashPolicyInner::Channel(rx)), + HostFlashHashCompletionSender(tx), + ) + } +} + #[derive(Debug)] -enum HostFlashHashPolicy { +enum HostFlashHashPolicyInner { + /// always assume hashing has already been computed + AssumeAlreadyHashed, /// complete hashing after `Duration` has elapsed Timer(Duration), /// complete hashing if there's a message in this channel