From eb068406d39323b715f257a93c10b9cfa4261a35 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Wed, 23 Apr 2025 15:41:57 -0700 Subject: [PATCH 01/25] cherry-pick gdma changes --- Cargo.lock | 8 + Cargo.toml | 1 + vm/devices/net/mana_driver/Cargo.toml | 1 + vm/devices/net/mana_driver/src/gdma_driver.rs | 223 +++++++++++++++++- vm/devices/net/mana_driver/src/lib.rs | 1 + vm/devices/net/mana_driver/src/mana.rs | 2 +- vm/devices/net/mana_driver/src/queues.rs | 118 +++++++++ vm/devices/net/mana_driver/src/resources.rs | 40 ++++ .../net/mana_driver/src/save_restore.rs | 96 ++++++++ vm/devices/net/mana_driver/src/tests.rs | 46 +++- vm/devices/net/mana_save_restore/Cargo.toml | 13 + vm/devices/net/mana_save_restore/src/lib.rs | 95 ++++++++ .../user_driver_emulated_mock/src/lib.rs | 14 +- 13 files changed, 649 insertions(+), 9 deletions(-) create mode 100644 vm/devices/net/mana_driver/src/save_restore.rs create mode 100644 vm/devices/net/mana_save_restore/Cargo.toml create mode 100644 vm/devices/net/mana_save_restore/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index d2907f8140..e7f1cf936e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3865,6 +3865,7 @@ dependencies = [ "gdma_defs", "getrandom 0.3.2", "inspect", + "mana_save_restore", "mesh", "net_backend", "net_backend_resources", @@ -3880,6 +3881,13 @@ dependencies = [ "zerocopy 0.8.24", ] +[[package]] +name = "mana_save_restore" +version = "0.0.0" +dependencies = [ + "mesh", +] + [[package]] name = "managed" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 1083398cb4..6c95be669e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,6 +229,7 @@ gdma_defs = { path = "vm/devices/net/gdma_defs" } gdma_resources = { path = "vm/devices/net/gdma_resources" } linux_net_bindings = { path = "vm/devices/net/linux_net_bindings" } mana_driver = { path = "vm/devices/net/mana_driver" } +mana_save_restore = { path = "vm/devices/net/mana_save_restore" } vfio_sys = { path = "vm/devices/user_driver/vfio_sys" } net_backend = { path = "vm/devices/net/net_backend" } net_backend_resources = { path = "vm/devices/net/net_backend_resources" } diff --git a/vm/devices/net/mana_driver/Cargo.toml b/vm/devices/net/mana_driver/Cargo.toml index b6a468a6b2..c8854e054a 100644 --- a/vm/devices/net/mana_driver/Cargo.toml +++ b/vm/devices/net/mana_driver/Cargo.toml @@ -10,6 +10,7 @@ rust-version.workspace = true gdma_defs.workspace = true net_backend.workspace = true net_backend_resources.workspace = true +mana_save_restore.workspace = true chipset_device.workspace = true user_driver.workspace = true diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 4038454766..cd6ab7b81c 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -9,6 +9,8 @@ use crate::queues::Eq; use crate::queues::Wq; use crate::resources::Resource; use crate::resources::ResourceArena; +use crate::save_restore::GdmaDriverSavedState; +use crate::save_restore::InterruptSavedState; use anyhow::Context; use futures::FutureExt; use gdma_defs::Cqe; @@ -67,6 +69,8 @@ use gdma_defs::Sge; use gdma_defs::SmcMessageType; use gdma_defs::SmcProtoHdr; use inspect::Inspect; +use mana_save_restore::save_restore::DoorbellSavedState; +use mana_save_restore::save_restore::SavedMemoryState; use pal_async::driver::Driver; use std::collections::HashMap; use std::mem::ManuallyDrop; @@ -118,6 +122,13 @@ impl Doorbell for Bar0 { safe_intrinsics::store_fence(); self.mem.write_u64(offset as usize, value); } + + fn save(&self, doorbell_id: Option) -> DoorbellSavedState { + DoorbellSavedState { + doorbell_id: doorbell_id.unwrap(), + page_count: self.page_count(), + } + } } #[derive(Inspect)] @@ -148,6 +159,8 @@ pub struct GdmaDriver { hwc_warning_time_in_ms: u32, hwc_timeout_in_ms: u32, hwc_failure: bool, + db_id: u32, + saving: bool, } const EQ_PAGE: usize = 0; @@ -163,9 +176,17 @@ const RWQE_SIZE: u32 = 32; impl Drop for GdmaDriver { fn drop(&mut self) { + tracing::debug!(?self.saving, ?self.hwc_failure, "dropping gdma driver"); + + // Don't destroy anything if we're saving its state for restoration. + if self.saving { + return; + } + if self.hwc_failure { return; } + let data = self .bar0 .mem @@ -230,7 +251,12 @@ impl GdmaDriver { self.bar0.clone() as _ } - pub async fn new(driver: &impl Driver, mut device: T, num_vps: u32) -> anyhow::Result { + pub async fn new( + driver: &impl Driver, + mut device: T, + num_vps: u32, + dma_buffer: Option, + ) -> anyhow::Result { let bar0_mapping = device.map_bar(0)?; let bar0_len = bar0_mapping.len(); if bar0_len < size_of::() { @@ -280,11 +306,14 @@ impl GdmaDriver { ); } - let dma_client = device.dma_client(); - - let dma_buffer = dma_client - .allocate_dma_buffer(NUM_PAGES * PAGE_SIZE) - .context("failed to allocate DMA buffer")?; + let dma_buffer = if let Some(dma_buffer) = dma_buffer { + dma_buffer + } else { + let dma_client = device.dma_client(); + dma_client + .allocate_dma_buffer(NUM_PAGES * PAGE_SIZE) + .context("failed to allocate DMA buffer")? + }; let pages = dma_buffer.pfns(); @@ -475,6 +504,8 @@ impl GdmaDriver { hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, hwc_failure: false, + saving: false, + db_id, }; this.push_rqe(); @@ -499,6 +530,183 @@ impl GdmaDriver { Ok(this) } + #[allow(dead_code)] + pub async fn save(mut self) -> anyhow::Result { + self.saving = true; + + let doorbell = self.bar0.save(Some(self.db_id as u64)); + + let mut interrupt_config = Vec::new(); + for (index, interrupt) in self.interrupts.iter().enumerate() { + if interrupt.is_some() { + interrupt_config.push(InterruptSavedState { + msix_index: index as u32, + cpu: index as u32, + }); + } + } + + Ok(GdmaDriverSavedState { + mem: SavedMemoryState { + base_pfn: self.dma_buffer.pfns()[0], + len: self.dma_buffer.len(), + }, + eq: self.eq.save(), + cq: self.cq.save(), + rq: self.rq.save(), + sq: self.sq.save(), + db_id: doorbell.doorbell_id, + gpa_mkey: self.gpa_mkey, + pdid: self._pdid, + cq_armed: self.cq_armed, + eq_armed: self.eq_armed, + hwc_subscribed: self.hwc_subscribed, + eq_id_msix: self.eq_id_msix.clone(), + hwc_activity_id: self.hwc_activity_id, + num_msix: self.num_msix, + min_queue_avail: self.min_queue_avail, + link_toggle: self.link_toggle.clone(), + interrupt_config, + }) + } + + #[allow(dead_code)] + pub async fn restore( + saved_state: GdmaDriverSavedState, + mut device: T, + dma_buffer: MemoryBlock, + ) -> anyhow::Result { + tracing::info!("restoring gdma driver"); + + let bar0_mapping = device.map_bar(0)?; + let bar0_len = bar0_mapping.len(); + if bar0_len < size_of::() { + anyhow::bail!("bar0 ({} bytes) too small for reg map", bar0_mapping.len()); + } + + let mut map = RegMap::new_zeroed(); + for i in 0..size_of_val(&map) / 4 { + let v = bar0_mapping.read_u32(i * 4); + // Unmapped device memory will return -1 on reads, so check the first 32 + // bits for this condition to get a clear error message early. + if i == 0 && v == !0 { + anyhow::bail!("bar0 read returned -1, device is not present"); + } + map.as_mut_bytes()[i * 4..(i + 1) * 4].copy_from_slice(&v.to_ne_bytes()); + } + + tracing::debug!(?map, "register map on restore"); + + // Log on unknown major version numbers. This is not necessarily an + // error, so continue. + if map.major_version_number != 0 && map.major_version_number != 1 { + tracing::warn!( + major = map.major_version_number, + minor = map.minor_version_number, + micro = map.micro_version_number, + "unrecognized major version" + ); + } + + if map.vf_gdma_sriov_shared_sz != 32 { + anyhow::bail!( + "unexpected shared memory size: {}", + map.vf_gdma_sriov_shared_sz + ); + } + + if (bar0_len as u64).saturating_sub(map.vf_gdma_sriov_shared_reg_start) + < map.vf_gdma_sriov_shared_sz as u64 + { + anyhow::bail!( + "bar0 ({} bytes) too small for shared memory at {}", + bar0_mapping.len(), + map.vf_gdma_sriov_shared_reg_start + ); + } + + let doorbell_shift = map.vf_db_page_sz.trailing_zeros(); + let bar0 = Arc::new(Bar0 { + mem: bar0_mapping, + map, + doorbell_shift, + }); + + let eq = Eq::restore( + dma_buffer.subblock(0, PAGE_SIZE), + saved_state.eq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let db_id = saved_state.db_id; + let cq = Cq::restore( + dma_buffer.subblock(CQ_PAGE * PAGE_SIZE, PAGE_SIZE), + saved_state.cq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let rq = Wq::restore_rq( + dma_buffer.subblock(RQ_PAGE * PAGE_SIZE, PAGE_SIZE), + saved_state.rq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let sq = Wq::restore_sq( + dma_buffer.subblock(SQ_PAGE * PAGE_SIZE, PAGE_SIZE), + saved_state.sq, + DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, + )?; + + let mut interrupts = vec![None; saved_state.num_msix as usize]; + for int_state in &saved_state.interrupt_config { + let interrupt = device.map_interrupt(int_state.msix_index, int_state.cpu)?; + + interrupts[int_state.msix_index as usize] = Some(interrupt); + } + + let mut this = Self { + device: Some(device), + bar0, + dma_buffer, + interrupts, + eq, + cq, + rq, + sq, + test_events: 0, + eq_armed: saved_state.eq_armed, + cq_armed: saved_state.cq_armed, + gpa_mkey: saved_state.gpa_mkey, + _pdid: saved_state.pdid, + eq_id_msix: saved_state.eq_id_msix, + num_msix: saved_state.num_msix, + min_queue_avail: saved_state.min_queue_avail, + hwc_activity_id: saved_state.hwc_activity_id, + link_toggle: saved_state.link_toggle, + hwc_subscribed: saved_state.hwc_subscribed, + hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, + hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, + hwc_failure: false, + saving: false, + db_id: db_id as u32, + }; + + if saved_state.hwc_subscribed { + this.hwc_subscribe(); + } + + if saved_state.eq_armed { + this.eq.arm(); + } + + if saved_state.cq_armed { + this.cq.arm(); + } + + tracing::info!("exiting restore"); + Ok(this) + } + async fn report_hwc_timeout( &mut self, last_cmd_failed: bool, @@ -1017,6 +1225,7 @@ impl GdmaDriver { ) })?; } + Ok(()) } @@ -1061,11 +1270,13 @@ impl GdmaDriver { &mut self, dev_id: GdmaDevId, ) -> anyhow::Result { + tracing::info!("registering device"); self.request(GdmaRequestType::GDMA_REGISTER_DEVICE.0, dev_id, ()) .await } pub async fn deregister_device(&mut self, dev_id: GdmaDevId) -> anyhow::Result<()> { + tracing::info!("deregistering device"); self.hwc_timeout_in_ms = HWC_TIMEOUT_FOR_SHUTDOWN_IN_MS; self.request(GdmaRequestType::GDMA_DEREGISTER_DEVICE.0, dev_id, ()) .await diff --git a/vm/devices/net/mana_driver/src/lib.rs b/vm/devices/net/mana_driver/src/lib.rs index a510b4b65e..2572de3a1a 100644 --- a/vm/devices/net/mana_driver/src/lib.rs +++ b/vm/devices/net/mana_driver/src/lib.rs @@ -10,5 +10,6 @@ mod gdma_driver; pub mod mana; pub mod queues; mod resources; +pub mod save_restore; #[cfg(test)] mod tests; diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index 8bfa10cd12..3af920e270 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -77,7 +77,7 @@ impl ManaDevice { num_vps: u32, max_queues_per_vport: u16, ) -> anyhow::Result { - let mut gdma = GdmaDriver::new(driver, device, num_vps).await?; + let mut gdma = GdmaDriver::new(driver, device, num_vps, None).await?; gdma.test_eq().await?; gdma.verify_vf_driver_version().await?; diff --git a/vm/devices/net/mana_driver/src/queues.rs b/vm/devices/net/mana_driver/src/queues.rs index f0c696ab32..7679b60bfc 100644 --- a/vm/devices/net/mana_driver/src/queues.rs +++ b/vm/devices/net/mana_driver/src/queues.rs @@ -22,6 +22,9 @@ use gdma_defs::WqDoorbellValue; use gdma_defs::WqeHeader; use gdma_defs::WqeParams; use inspect::Inspect; +use mana_save_restore::save_restore::CqEqSavedState; +use mana_save_restore::save_restore::DoorbellSavedState; +use mana_save_restore::save_restore::WqSavedState; use std::marker::PhantomData; use std::sync::Arc; use std::sync::atomic::Ordering::Acquire; @@ -37,6 +40,8 @@ pub trait Doorbell: Send + Sync { fn page_count(&self) -> u32; /// Write a doorbell value at page `page`, offset `address`. fn write(&self, page: u32, address: u32, value: u64); + /// Save the doorbell state. + fn save(&self, doorbell_id: Option) -> DoorbellSavedState; } struct NullDoorbell; @@ -47,6 +52,13 @@ impl Doorbell for NullDoorbell { } fn write(&self, _page: u32, _address: u32, _value: u64) {} + + fn save(&self, _doorbell_id: Option) -> DoorbellSavedState { + DoorbellSavedState { + doorbell_id: 0, + page_count: 0, + } + } } /// A single GDMA doorbell page. @@ -114,6 +126,25 @@ impl CqEq { pub fn new_cq(mem: MemoryBlock, doorbell: DoorbellPage, id: u32) -> Self { Self::new(GdmaQueueType::GDMA_CQ, DB_CQ, mem, doorbell, id) } + + /// Restores an existing completion queue. + pub fn restore( + mem: MemoryBlock, + state: CqEqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_CQ, + mem, + id: state.id, + next: state.next, + size: state.size, + shift: state.shift, + _phantom: PhantomData, + }) + } } impl CqEq { @@ -121,6 +152,25 @@ impl CqEq { pub fn new_eq(mem: MemoryBlock, doorbell: DoorbellPage, id: u32) -> Self { Self::new(GdmaQueueType::GDMA_EQ, DB_EQ, mem, doorbell, id) } + + /// Restores an existing event queue. + pub fn restore( + mem: MemoryBlock, + state: CqEqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_EQ, + mem, + id: state.id, + next: state.next, + size: state.size, + shift: state.shift, + _phantom: PhantomData, + }) + } } impl CqEq { @@ -147,6 +197,21 @@ impl CqEq { } } + /// Save the state of the queue for restoration after servicing. + pub fn save(&self) -> CqEqSavedState { + CqEqSavedState { + doorbell: DoorbellSavedState { + doorbell_id: self.doorbell.doorbell_id as u64, + page_count: self.doorbell.doorbell.page_count(), + }, + doorbell_addr: self.doorbell_addr, + id: self.id, + next: self.next, + size: self.size, + shift: self.shift, + } + } + /// Updates the queue ID. pub(crate) fn set_id(&mut self, id: u32) { self.id = id; @@ -284,6 +349,59 @@ impl Wq { } } + /// Save the state of the Wq for restoration after servicing + pub fn save(&self) -> WqSavedState { + WqSavedState { + doorbell: DoorbellSavedState { + doorbell_id: self.doorbell.doorbell_id as u64, + page_count: self.doorbell.doorbell.page_count(), + }, + doorbell_addr: self.doorbell_addr, + id: self.id, + head: self.head, + tail: self.tail, + mask: self.mask, + } + } + + /// Restores an existing receive work queue. + pub fn restore_rq( + mem: MemoryBlock, + state: WqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_RQ, + mem, + id: state.id, + head: state.head, + tail: state.tail, + mask: state.mask, + uncommitted_count: 0, + }) + } + + /// Restores an existing send work queue. + pub fn restore_sq( + mem: MemoryBlock, + state: WqSavedState, + doorbell: DoorbellPage, + ) -> anyhow::Result { + Ok(Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type: GdmaQueueType::GDMA_SQ, + mem, + id: state.id, + head: state.head, + tail: state.tail, + mask: state.mask, + uncommitted_count: 0, + }) + } + /// Returns the queue ID. pub fn id(&self) -> u32 { self.id diff --git a/vm/devices/net/mana_driver/src/resources.rs b/vm/devices/net/mana_driver/src/resources.rs index 9066c17b3e..58bea4e798 100644 --- a/vm/devices/net/mana_driver/src/resources.rs +++ b/vm/devices/net/mana_driver/src/resources.rs @@ -5,6 +5,7 @@ use crate::bnic_driver::BnicDriver; use crate::gdma_driver::GdmaDriver; use gdma_defs::GdmaDevId; use gdma_defs::GdmaQueueType; +use std::fmt::Debug; use std::mem::ManuallyDrop; use user_driver::DeviceBacking; use user_driver::memory::MemoryBlock; @@ -38,6 +39,45 @@ pub(crate) enum Resource { }, } +impl Debug for Resource { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Resource::MemoryBlock(_) => f.debug_tuple("MemoryBlock").finish(), + Resource::DmaRegion { + dev_id, + gdma_region, + } => f + .debug_struct("DmaRegion") + .field("dev_id", dev_id) + .field("gdma_region", gdma_region) + .finish(), + Resource::Eq { dev_id, eq_id } => f + .debug_struct("Eq") + .field("dev_id", dev_id) + .field("eq_id", eq_id) + .finish(), + Resource::BnicQueue { + dev_id, + wq_type, + wq_obj, + } => f + .debug_struct("BnicQueue") + .field("dev_id", dev_id) + .field("wq_type", wq_type) + .field("wq_obj", wq_obj) + .finish(), + } + } +} + +impl Debug for ResourceArena { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("ResourceArena") + .field("resources", &self.resources) + .finish() + } +} + impl ResourceArena { /// Creates a new empty resource arena. pub fn new() -> Self { diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs new file mode 100644 index 0000000000..8baef1cbde --- /dev/null +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -0,0 +1,96 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Types to save and restore the state of a MANA device. + +use mana_save_restore::save_restore::CqEqSavedState; +use mana_save_restore::save_restore::SavedMemoryState; +use mana_save_restore::save_restore::WqSavedState; +use mesh::payload::Protobuf; +use std::collections::HashMap; + +/// Top level saved state for the GDMA driver's saved state +#[derive(Protobuf, Clone, Debug)] +#[mesh(package = "mana_driver")] +pub struct GdmaDriverSavedState { + /// Memory to be restored by a DMA client + #[mesh(1)] + pub mem: SavedMemoryState, + + /// EQ to be restored + #[mesh(2)] + pub eq: CqEqSavedState, + + /// CQ to be restored + #[mesh(3)] + pub cq: CqEqSavedState, + + /// RQ to be restored + #[mesh(4)] + pub rq: WqSavedState, + + /// SQ to be restored + #[mesh(5)] + pub sq: WqSavedState, + + /// Doorbell id + #[mesh(6)] + pub db_id: u64, + + /// Guest physical address memory key + #[mesh(7)] + pub gpa_mkey: u32, + + /// Protection domain id + #[mesh(8)] + pub pdid: u32, + + /// Whether the driver is subscribed to hwc + #[mesh(9)] + pub hwc_subscribed: bool, + + /// Whether the eq is armed or not + #[mesh(10)] + pub eq_armed: bool, + + /// Whether the cq is armed or not + #[mesh(11)] + pub cq_armed: bool, + + /// Event queue id to msix mapping + #[mesh(12)] + pub eq_id_msix: HashMap, + + /// The id of the hwc activity + #[mesh(13)] + pub hwc_activity_id: u32, + + /// How many msix vectors are available + #[mesh(14)] + pub num_msix: u32, + + /// Minimum number of queues available + #[mesh(15)] + pub min_queue_avail: u32, + + /// Saved interrupts for restoration + #[mesh(16)] + pub interrupt_config: Vec, + + /// Link status by vport index + #[mesh(17)] + pub link_toggle: Vec<(u32, bool)>, +} + +/// Saved state of an interrupt for restoration during servicing +#[derive(Protobuf, Clone, Debug)] +#[mesh(package = "mana_driver")] +pub struct InterruptSavedState { + /// The index in the msix table for this interrupt + #[mesh(1)] + pub msix_index: u32, + + /// Which CPU this interrupt is assigned to + #[mesh(2)] + pub cpu: u32, +} diff --git a/vm/devices/net/mana_driver/src/tests.rs b/vm/devices/net/mana_driver/src/tests.rs index 77b2d38c84..019cd9ffa0 100644 --- a/vm/devices/net/mana_driver/src/tests.rs +++ b/vm/devices/net/mana_driver/src/tests.rs @@ -42,8 +42,12 @@ async fn test_gdma(driver: DefaultDriver) { ); let dma_client = mem.dma_client(); let device = EmulatedDevice::new(device, msi_set, dma_client); + let dma_client = device.dma_client(); + let buffer = dma_client.allocate_dma_buffer(6 * PAGE_SIZE).unwrap(); - let mut gdma = GdmaDriver::new(&driver, device, 1).await.unwrap(); + let mut gdma = GdmaDriver::new(&driver, device, 1, Some(buffer)) + .await + .unwrap(); gdma.test_eq().await.unwrap(); gdma.verify_vf_driver_version().await.unwrap(); let dev_id = gdma @@ -159,3 +163,43 @@ async fn test_gdma(driver: DefaultDriver) { .unwrap(); arena.destroy(&mut gdma).await; } + +#[async_test] +async fn test_gdma_save_restore(driver: DefaultDriver) { + let mem = DeviceTestMemory::new(128, false, "test_gdma"); + let mut msi_set = MsiInterruptSet::new(); + let device = gdma::GdmaDevice::new( + &VmTaskDriverSource::new(SingleDriverBackend::new(driver.clone())), + mem.guest_memory(), + &mut msi_set, + vec![VportConfig { + mac_address: [1, 2, 3, 4, 5, 6].into(), + endpoint: Box::new(NullEndpoint::new()), + }], + &mut ExternallyManagedMmioIntercepts, + ); + let dma_client = mem.dma_client(); + + let device = EmulatedDevice::new(device, msi_set, dma_client); + let cloned_device = device.clone(); + + let dma_client = device.dma_client(); + let gdma_buffer = dma_client.allocate_dma_buffer(6 * PAGE_SIZE).unwrap(); + + let saved_state = { + let mut gdma = GdmaDriver::new(&driver, device, 1, Some(gdma_buffer.clone())) + .await + .unwrap(); + + gdma.test_eq().await.unwrap(); + gdma.verify_vf_driver_version().await.unwrap(); + gdma.save().await.unwrap() + }; + + let mut new_gdma = GdmaDriver::restore(saved_state, cloned_device, gdma_buffer) + .await + .unwrap(); + + // Validate that the new driver still works after restoration. + new_gdma.test_eq().await.unwrap(); +} diff --git a/vm/devices/net/mana_save_restore/Cargo.toml b/vm/devices/net/mana_save_restore/Cargo.toml new file mode 100644 index 0000000000..cc9d0d0e00 --- /dev/null +++ b/vm/devices/net/mana_save_restore/Cargo.toml @@ -0,0 +1,13 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +[package] +name = "mana_save_restore" +edition.workspace = true +rust-version.workspace = true + +[dependencies] +mesh.workspace = true + +[lints] +workspace = true diff --git a/vm/devices/net/mana_save_restore/src/lib.rs b/vm/devices/net/mana_save_restore/src/lib.rs new file mode 100644 index 0000000000..8064eb799b --- /dev/null +++ b/vm/devices/net/mana_save_restore/src/lib.rs @@ -0,0 +1,95 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Mana save/restore module + +/// Module containing structures for saving and restoring MANA device state +pub mod save_restore { + use mesh::payload::Protobuf; + + /// The saved state of a completion queue or event queue for restoration + /// during servicing + #[derive(Clone, Protobuf, Debug)] + #[mesh(package = "mana_driver")] + pub struct CqEqSavedState { + /// The doorbell state of the queue, which is how the device is notified + #[mesh(1)] + pub doorbell: DoorbellSavedState, + + /// The address of the doorbell register + #[mesh(2)] + pub doorbell_addr: u32, + + /// The id of the queue + #[mesh(3)] + pub id: u32, + + /// The index of the next entry in the queue + #[mesh(4)] + pub next: u32, + + /// The total size of the queue + #[mesh(5)] + pub size: u32, + + /// The bit shift value for the queue + #[mesh(6)] + pub shift: u32, + } + + /// Saved state of a doorbell for restoration during servicing + #[derive(Clone, Protobuf, Debug)] + #[mesh(package = "mana_driver")] + pub struct DoorbellSavedState { + /// The doorbell's id + #[mesh(1)] + pub doorbell_id: u64, + + /// The number of pages allocated for the doorbell + #[mesh(2)] + pub page_count: u32, + } + + /// Saved state of a work queue for restoration during servicing + #[derive(Debug, Protobuf, Clone)] + #[mesh(package = "mana_driver")] + pub struct WqSavedState { + /// The doorbell state of the queue, which is how the device is notified + #[mesh(1)] + pub doorbell: DoorbellSavedState, + + /// The address of the doorbell + #[mesh(2)] + pub doorbell_addr: u32, + + /// The id of the queue + #[mesh(3)] + pub id: u32, + + /// The head of the queue + #[mesh(4)] + pub head: u32, + + /// The tail of the queue + #[mesh(5)] + pub tail: u32, + + /// The bitmask for wrapping queue indices + #[mesh(6)] + pub mask: u32, + } + + /// Saved state for a memory region used by the driver + /// to be restored by a DMA client during servicing + #[derive(Debug, Protobuf, Clone)] + #[mesh(package = "mana_driver")] + pub struct SavedMemoryState { + /// The base page frame number of the memory region + #[mesh(1)] + pub base_pfn: u64, + + /// How long the memory region is + #[mesh(2)] + pub len: usize, + } +} diff --git a/vm/devices/user_driver_emulated_mock/src/lib.rs b/vm/devices/user_driver_emulated_mock/src/lib.rs index fb68810cd5..0f04b9f2b5 100644 --- a/vm/devices/user_driver_emulated_mock/src/lib.rs +++ b/vm/devices/user_driver_emulated_mock/src/lib.rs @@ -39,7 +39,7 @@ use user_driver::memory::PAGE_SIZE64; /// allowing the user to control device behaviour to a certain extent. Can be used with devices such as the `NvmeController` pub struct EmulatedDevice { device: Arc>, - controller: MsiController, + controller: Arc, dma_client: Arc, bar0_len: usize, } @@ -77,6 +77,17 @@ impl MsiInterruptTarget for MsiController { } } +impl Clone for EmulatedDevice { + fn clone(&self) -> Self { + Self { + device: self.device.clone(), + controller: self.controller.clone(), + dma_client: self.dma_client.clone(), + bar0_len: self.bar0_len, + } + } +} + impl EmulatedDevice { /// Creates a new emulated device, wrapping `device` of type T, using the provided MSI Interrupt Set. Dma_client should point to memory /// shared with the device. @@ -84,6 +95,7 @@ impl EmulatedDevice { // Connect an interrupt controller. let controller = MsiController::new(msi_set.len()); msi_set.connect(&controller); + let controller = Arc::new(controller); let bars = device.probe_bar_masks(); let bar0_len = !(bars[0] & !0xf) as usize + 1; From e9e293f307a85e18cd677f5546bad4b50acdd174 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 24 Apr 2025 10:18:34 -0700 Subject: [PATCH 02/25] some cleanup --- vm/devices/net/mana_driver/src/gdma_driver.rs | 4 -- vm/devices/net/mana_driver/src/resources.rs | 39 ------------------- 2 files changed, 43 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index cd6ab7b81c..935e07fda3 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -703,7 +703,6 @@ impl GdmaDriver { this.cq.arm(); } - tracing::info!("exiting restore"); Ok(this) } @@ -1225,7 +1224,6 @@ impl GdmaDriver { ) })?; } - Ok(()) } @@ -1270,13 +1268,11 @@ impl GdmaDriver { &mut self, dev_id: GdmaDevId, ) -> anyhow::Result { - tracing::info!("registering device"); self.request(GdmaRequestType::GDMA_REGISTER_DEVICE.0, dev_id, ()) .await } pub async fn deregister_device(&mut self, dev_id: GdmaDevId) -> anyhow::Result<()> { - tracing::info!("deregistering device"); self.hwc_timeout_in_ms = HWC_TIMEOUT_FOR_SHUTDOWN_IN_MS; self.request(GdmaRequestType::GDMA_DEREGISTER_DEVICE.0, dev_id, ()) .await diff --git a/vm/devices/net/mana_driver/src/resources.rs b/vm/devices/net/mana_driver/src/resources.rs index 58bea4e798..2d64e24d04 100644 --- a/vm/devices/net/mana_driver/src/resources.rs +++ b/vm/devices/net/mana_driver/src/resources.rs @@ -39,45 +39,6 @@ pub(crate) enum Resource { }, } -impl Debug for Resource { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Resource::MemoryBlock(_) => f.debug_tuple("MemoryBlock").finish(), - Resource::DmaRegion { - dev_id, - gdma_region, - } => f - .debug_struct("DmaRegion") - .field("dev_id", dev_id) - .field("gdma_region", gdma_region) - .finish(), - Resource::Eq { dev_id, eq_id } => f - .debug_struct("Eq") - .field("dev_id", dev_id) - .field("eq_id", eq_id) - .finish(), - Resource::BnicQueue { - dev_id, - wq_type, - wq_obj, - } => f - .debug_struct("BnicQueue") - .field("dev_id", dev_id) - .field("wq_type", wq_type) - .field("wq_obj", wq_obj) - .finish(), - } - } -} - -impl Debug for ResourceArena { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("ResourceArena") - .field("resources", &self.resources) - .finish() - } -} - impl ResourceArena { /// Creates a new empty resource arena. pub fn new() -> Self { From 1d5479dbf697598f20ffc46c0b835b0929ebc98a Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 24 Apr 2025 10:19:58 -0700 Subject: [PATCH 03/25] unused import --- vm/devices/net/mana_driver/src/resources.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vm/devices/net/mana_driver/src/resources.rs b/vm/devices/net/mana_driver/src/resources.rs index 2d64e24d04..9066c17b3e 100644 --- a/vm/devices/net/mana_driver/src/resources.rs +++ b/vm/devices/net/mana_driver/src/resources.rs @@ -5,7 +5,6 @@ use crate::bnic_driver::BnicDriver; use crate::gdma_driver::GdmaDriver; use gdma_defs::GdmaDevId; use gdma_defs::GdmaQueueType; -use std::fmt::Debug; use std::mem::ManuallyDrop; use user_driver::DeviceBacking; use user_driver::memory::MemoryBlock; From 93dfba985c4226f909a387d2ec35552057088b9d Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 24 Apr 2025 12:12:11 -0700 Subject: [PATCH 04/25] get rid of crate --- Cargo.lock | 8 -- Cargo.toml | 1 - vm/devices/net/mana_driver/Cargo.toml | 1 - vm/devices/net/mana_driver/src/gdma_driver.rs | 4 +- vm/devices/net/mana_driver/src/queues.rs | 6 +- .../net/mana_driver/src/save_restore.rs | 89 ++++++++++++++++- vm/devices/net/mana_save_restore/Cargo.toml | 13 --- vm/devices/net/mana_save_restore/src/lib.rs | 95 ------------------- 8 files changed, 91 insertions(+), 126 deletions(-) delete mode 100644 vm/devices/net/mana_save_restore/Cargo.toml delete mode 100644 vm/devices/net/mana_save_restore/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index e7f1cf936e..d2907f8140 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3865,7 +3865,6 @@ dependencies = [ "gdma_defs", "getrandom 0.3.2", "inspect", - "mana_save_restore", "mesh", "net_backend", "net_backend_resources", @@ -3881,13 +3880,6 @@ dependencies = [ "zerocopy 0.8.24", ] -[[package]] -name = "mana_save_restore" -version = "0.0.0" -dependencies = [ - "mesh", -] - [[package]] name = "managed" version = "0.8.0" diff --git a/Cargo.toml b/Cargo.toml index 6c95be669e..1083398cb4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -229,7 +229,6 @@ gdma_defs = { path = "vm/devices/net/gdma_defs" } gdma_resources = { path = "vm/devices/net/gdma_resources" } linux_net_bindings = { path = "vm/devices/net/linux_net_bindings" } mana_driver = { path = "vm/devices/net/mana_driver" } -mana_save_restore = { path = "vm/devices/net/mana_save_restore" } vfio_sys = { path = "vm/devices/user_driver/vfio_sys" } net_backend = { path = "vm/devices/net/net_backend" } net_backend_resources = { path = "vm/devices/net/net_backend_resources" } diff --git a/vm/devices/net/mana_driver/Cargo.toml b/vm/devices/net/mana_driver/Cargo.toml index c8854e054a..b6a468a6b2 100644 --- a/vm/devices/net/mana_driver/Cargo.toml +++ b/vm/devices/net/mana_driver/Cargo.toml @@ -10,7 +10,6 @@ rust-version.workspace = true gdma_defs.workspace = true net_backend.workspace = true net_backend_resources.workspace = true -mana_save_restore.workspace = true chipset_device.workspace = true user_driver.workspace = true diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 935e07fda3..20e64d6e53 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -9,8 +9,10 @@ use crate::queues::Eq; use crate::queues::Wq; use crate::resources::Resource; use crate::resources::ResourceArena; +use crate::save_restore::DoorbellSavedState; use crate::save_restore::GdmaDriverSavedState; use crate::save_restore::InterruptSavedState; +use crate::save_restore::SavedMemoryState; use anyhow::Context; use futures::FutureExt; use gdma_defs::Cqe; @@ -69,8 +71,6 @@ use gdma_defs::Sge; use gdma_defs::SmcMessageType; use gdma_defs::SmcProtoHdr; use inspect::Inspect; -use mana_save_restore::save_restore::DoorbellSavedState; -use mana_save_restore::save_restore::SavedMemoryState; use pal_async::driver::Driver; use std::collections::HashMap; use std::mem::ManuallyDrop; diff --git a/vm/devices/net/mana_driver/src/queues.rs b/vm/devices/net/mana_driver/src/queues.rs index 7679b60bfc..272ba16915 100644 --- a/vm/devices/net/mana_driver/src/queues.rs +++ b/vm/devices/net/mana_driver/src/queues.rs @@ -3,6 +3,9 @@ //! Types to access work, completion, and event queues. +use crate::save_restore::CqEqSavedState; +use crate::save_restore::DoorbellSavedState; +use crate::save_restore::WqSavedState; use gdma_defs::CLIENT_OOB_8; use gdma_defs::CLIENT_OOB_24; use gdma_defs::CLIENT_OOB_32; @@ -22,9 +25,6 @@ use gdma_defs::WqDoorbellValue; use gdma_defs::WqeHeader; use gdma_defs::WqeParams; use inspect::Inspect; -use mana_save_restore::save_restore::CqEqSavedState; -use mana_save_restore::save_restore::DoorbellSavedState; -use mana_save_restore::save_restore::WqSavedState; use std::marker::PhantomData; use std::sync::Arc; use std::sync::atomic::Ordering::Acquire; diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index 8baef1cbde..2246706fc8 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -3,9 +3,6 @@ //! Types to save and restore the state of a MANA device. -use mana_save_restore::save_restore::CqEqSavedState; -use mana_save_restore::save_restore::SavedMemoryState; -use mana_save_restore::save_restore::WqSavedState; use mesh::payload::Protobuf; use std::collections::HashMap; @@ -94,3 +91,89 @@ pub struct InterruptSavedState { #[mesh(2)] pub cpu: u32, } + +/// The saved state of a completion queue or event queue for restoration +/// during servicing +#[derive(Clone, Protobuf, Debug)] +#[mesh(package = "mana_driver")] +pub struct CqEqSavedState { + /// The doorbell state of the queue, which is how the device is notified + #[mesh(1)] + pub doorbell: DoorbellSavedState, + + /// The address of the doorbell register + #[mesh(2)] + pub doorbell_addr: u32, + + /// The id of the queue + #[mesh(3)] + pub id: u32, + + /// The index of the next entry in the queue + #[mesh(4)] + pub next: u32, + + /// The total size of the queue + #[mesh(5)] + pub size: u32, + + /// The bit shift value for the queue + #[mesh(6)] + pub shift: u32, +} + +/// Saved state of a doorbell for restoration during servicing +#[derive(Clone, Protobuf, Debug)] +#[mesh(package = "mana_driver")] +pub struct DoorbellSavedState { + /// The doorbell's id + #[mesh(1)] + pub doorbell_id: u64, + + /// The number of pages allocated for the doorbell + #[mesh(2)] + pub page_count: u32, +} + +/// Saved state of a work queue for restoration during servicing +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct WqSavedState { + /// The doorbell state of the queue, which is how the device is notified + #[mesh(1)] + pub doorbell: DoorbellSavedState, + + /// The address of the doorbell + #[mesh(2)] + pub doorbell_addr: u32, + + /// The id of the queue + #[mesh(3)] + pub id: u32, + + /// The head of the queue + #[mesh(4)] + pub head: u32, + + /// The tail of the queue + #[mesh(5)] + pub tail: u32, + + /// The bitmask for wrapping queue indices + #[mesh(6)] + pub mask: u32, +} + +/// Saved state for a memory region used by the driver +/// to be restored by a DMA client during servicing +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct SavedMemoryState { + /// The base page frame number of the memory region + #[mesh(1)] + pub base_pfn: u64, + + /// How long the memory region is + #[mesh(2)] + pub len: usize, +} diff --git a/vm/devices/net/mana_save_restore/Cargo.toml b/vm/devices/net/mana_save_restore/Cargo.toml deleted file mode 100644 index cc9d0d0e00..0000000000 --- a/vm/devices/net/mana_save_restore/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. - -[package] -name = "mana_save_restore" -edition.workspace = true -rust-version.workspace = true - -[dependencies] -mesh.workspace = true - -[lints] -workspace = true diff --git a/vm/devices/net/mana_save_restore/src/lib.rs b/vm/devices/net/mana_save_restore/src/lib.rs deleted file mode 100644 index 8064eb799b..0000000000 --- a/vm/devices/net/mana_save_restore/src/lib.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT License. - -//! Mana save/restore module - -/// Module containing structures for saving and restoring MANA device state -pub mod save_restore { - use mesh::payload::Protobuf; - - /// The saved state of a completion queue or event queue for restoration - /// during servicing - #[derive(Clone, Protobuf, Debug)] - #[mesh(package = "mana_driver")] - pub struct CqEqSavedState { - /// The doorbell state of the queue, which is how the device is notified - #[mesh(1)] - pub doorbell: DoorbellSavedState, - - /// The address of the doorbell register - #[mesh(2)] - pub doorbell_addr: u32, - - /// The id of the queue - #[mesh(3)] - pub id: u32, - - /// The index of the next entry in the queue - #[mesh(4)] - pub next: u32, - - /// The total size of the queue - #[mesh(5)] - pub size: u32, - - /// The bit shift value for the queue - #[mesh(6)] - pub shift: u32, - } - - /// Saved state of a doorbell for restoration during servicing - #[derive(Clone, Protobuf, Debug)] - #[mesh(package = "mana_driver")] - pub struct DoorbellSavedState { - /// The doorbell's id - #[mesh(1)] - pub doorbell_id: u64, - - /// The number of pages allocated for the doorbell - #[mesh(2)] - pub page_count: u32, - } - - /// Saved state of a work queue for restoration during servicing - #[derive(Debug, Protobuf, Clone)] - #[mesh(package = "mana_driver")] - pub struct WqSavedState { - /// The doorbell state of the queue, which is how the device is notified - #[mesh(1)] - pub doorbell: DoorbellSavedState, - - /// The address of the doorbell - #[mesh(2)] - pub doorbell_addr: u32, - - /// The id of the queue - #[mesh(3)] - pub id: u32, - - /// The head of the queue - #[mesh(4)] - pub head: u32, - - /// The tail of the queue - #[mesh(5)] - pub tail: u32, - - /// The bitmask for wrapping queue indices - #[mesh(6)] - pub mask: u32, - } - - /// Saved state for a memory region used by the driver - /// to be restored by a DMA client during servicing - #[derive(Debug, Protobuf, Clone)] - #[mesh(package = "mana_driver")] - pub struct SavedMemoryState { - /// The base page frame number of the memory region - #[mesh(1)] - pub base_pfn: u64, - - /// How long the memory region is - #[mesh(2)] - pub len: usize, - } -} From eea14e1f8be85920e05e8429bf4951f69222f8aa Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Fri, 2 May 2025 16:20:38 -0700 Subject: [PATCH 05/25] some of feedback --- vm/devices/net/mana_driver/src/gdma_driver.rs | 37 +++++++----- vm/devices/net/mana_driver/src/queues.rs | 56 ++++++++----------- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 20e64d6e53..dae2846f26 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -532,19 +532,28 @@ impl GdmaDriver { #[allow(dead_code)] pub async fn save(mut self) -> anyhow::Result { + if self.hwc_failure { + anyhow::bail!("can't save after hwc failure"); + } + self.saving = true; let doorbell = self.bar0.save(Some(self.db_id as u64)); - - let mut interrupt_config = Vec::new(); - for (index, interrupt) in self.interrupts.iter().enumerate() { - if interrupt.is_some() { - interrupt_config.push(InterruptSavedState { - msix_index: index as u32, - cpu: index as u32, - }); - } - } + let interrupt_config = self + .interrupts + .iter() + .enumerate() + .filter_map(|(index, interrupt)| { + if interrupt.is_some() { + Some(InterruptSavedState { + msix_index: index as u32, + cpu: index as u32, + }) + } else { + None + } + }) + .collect(); Ok(GdmaDriverSavedState { mem: SavedMemoryState { @@ -632,18 +641,18 @@ impl GdmaDriver { doorbell_shift, }); - let eq = Eq::restore( + let eq = Eq::restore_eq( dma_buffer.subblock(0, PAGE_SIZE), saved_state.eq, DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, - )?; + ); let db_id = saved_state.db_id; - let cq = Cq::restore( + let cq = Cq::restore_cq( dma_buffer.subblock(CQ_PAGE * PAGE_SIZE, PAGE_SIZE), saved_state.cq, DoorbellPage::new(bar0.clone(), saved_state.db_id as u32)?, - )?; + ); let rq = Wq::restore_rq( dma_buffer.subblock(RQ_PAGE * PAGE_SIZE, PAGE_SIZE), diff --git a/vm/devices/net/mana_driver/src/queues.rs b/vm/devices/net/mana_driver/src/queues.rs index 272ba16915..97695b7813 100644 --- a/vm/devices/net/mana_driver/src/queues.rs +++ b/vm/devices/net/mana_driver/src/queues.rs @@ -128,22 +128,8 @@ impl CqEq { } /// Restores an existing completion queue. - pub fn restore( - mem: MemoryBlock, - state: CqEqSavedState, - doorbell: DoorbellPage, - ) -> anyhow::Result { - Ok(Self { - doorbell, - doorbell_addr: state.doorbell_addr, - queue_type: GdmaQueueType::GDMA_CQ, - mem, - id: state.id, - next: state.next, - size: state.size, - shift: state.shift, - _phantom: PhantomData, - }) + pub fn restore_cq(mem: MemoryBlock, state: CqEqSavedState, doorbell: DoorbellPage) -> Self { + Self::restore(GdmaQueueType::GDMA_CQ, mem, doorbell, state) } } @@ -154,22 +140,8 @@ impl CqEq { } /// Restores an existing event queue. - pub fn restore( - mem: MemoryBlock, - state: CqEqSavedState, - doorbell: DoorbellPage, - ) -> anyhow::Result { - Ok(Self { - doorbell, - doorbell_addr: state.doorbell_addr, - queue_type: GdmaQueueType::GDMA_EQ, - mem, - id: state.id, - next: state.next, - size: state.size, - shift: state.shift, - _phantom: PhantomData, - }) + pub fn restore_eq(mem: MemoryBlock, state: CqEqSavedState, doorbell: DoorbellPage) -> Self { + Self::restore(GdmaQueueType::GDMA_EQ, mem, doorbell, state) } } @@ -212,6 +184,26 @@ impl CqEq { } } + /// Restore a queue from saved state. + pub fn restore( + queue_type: GdmaQueueType, + mem: MemoryBlock, + doorbell: DoorbellPage, + state: CqEqSavedState, + ) -> Self { + Self { + doorbell, + doorbell_addr: state.doorbell_addr, + queue_type, + mem, + id: state.id, + next: state.next, + size: state.size, + shift: state.shift, + _phantom: PhantomData, + } + } + /// Updates the queue ID. pub(crate) fn set_id(&mut self, id: u32) { self.id = id; From d6fc1308e425d9195609060435378902d8a8f978 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Fri, 2 May 2025 16:28:09 -0700 Subject: [PATCH 06/25] remove arm state, re-arm on restore --- vm/devices/net/mana_driver/src/gdma_driver.rs | 22 ++++------------- .../net/mana_driver/src/save_restore.rs | 24 +++++-------------- 2 files changed, 11 insertions(+), 35 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index dae2846f26..9bd4320243 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -567,9 +567,6 @@ impl GdmaDriver { db_id: doorbell.doorbell_id, gpa_mkey: self.gpa_mkey, pdid: self._pdid, - cq_armed: self.cq_armed, - eq_armed: self.eq_armed, - hwc_subscribed: self.hwc_subscribed, eq_id_msix: self.eq_id_msix.clone(), hwc_activity_id: self.hwc_activity_id, num_msix: self.num_msix, @@ -683,8 +680,8 @@ impl GdmaDriver { rq, sq, test_events: 0, - eq_armed: saved_state.eq_armed, - cq_armed: saved_state.cq_armed, + eq_armed: true, + cq_armed: true, gpa_mkey: saved_state.gpa_mkey, _pdid: saved_state.pdid, eq_id_msix: saved_state.eq_id_msix, @@ -692,7 +689,7 @@ impl GdmaDriver { min_queue_avail: saved_state.min_queue_avail, hwc_activity_id: saved_state.hwc_activity_id, link_toggle: saved_state.link_toggle, - hwc_subscribed: saved_state.hwc_subscribed, + hwc_subscribed: false, hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, hwc_failure: false, @@ -700,17 +697,8 @@ impl GdmaDriver { db_id: db_id as u32, }; - if saved_state.hwc_subscribed { - this.hwc_subscribe(); - } - - if saved_state.eq_armed { - this.eq.arm(); - } - - if saved_state.cq_armed { - this.cq.arm(); - } + this.eq.arm(); + this.cq.arm(); Ok(this) } diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index 2246706fc8..4423034913 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -42,40 +42,28 @@ pub struct GdmaDriverSavedState { #[mesh(8)] pub pdid: u32, - /// Whether the driver is subscribed to hwc - #[mesh(9)] - pub hwc_subscribed: bool, - - /// Whether the eq is armed or not - #[mesh(10)] - pub eq_armed: bool, - - /// Whether the cq is armed or not - #[mesh(11)] - pub cq_armed: bool, - /// Event queue id to msix mapping - #[mesh(12)] + #[mesh(9)] pub eq_id_msix: HashMap, /// The id of the hwc activity - #[mesh(13)] + #[mesh(10)] pub hwc_activity_id: u32, /// How many msix vectors are available - #[mesh(14)] + #[mesh(11)] pub num_msix: u32, /// Minimum number of queues available - #[mesh(15)] + #[mesh(12)] pub min_queue_avail: u32, /// Saved interrupts for restoration - #[mesh(16)] + #[mesh(13)] pub interrupt_config: Vec, /// Link status by vport index - #[mesh(17)] + #[mesh(14)] pub link_toggle: Vec<(u32, bool)>, } From fe7e8bd610168a19466ec867efc8442a70ab2992 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Wed, 7 May 2025 09:53:36 -0700 Subject: [PATCH 07/25] save on hwc failure, save hwc failure state --- vm/devices/net/mana_driver/src/gdma_driver.rs | 19 ++++++++----------- .../net/mana_driver/src/save_restore.rs | 3 +++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 9bd4320243..1786ac927a 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -160,7 +160,7 @@ pub struct GdmaDriver { hwc_timeout_in_ms: u32, hwc_failure: bool, db_id: u32, - saving: bool, + state_saved: bool, } const EQ_PAGE: usize = 0; @@ -176,10 +176,10 @@ const RWQE_SIZE: u32 = 32; impl Drop for GdmaDriver { fn drop(&mut self) { - tracing::debug!(?self.saving, ?self.hwc_failure, "dropping gdma driver"); + tracing::debug!(?self.state_saved, ?self.hwc_failure, "dropping gdma driver"); // Don't destroy anything if we're saving its state for restoration. - if self.saving { + if self.state_saved { return; } @@ -504,7 +504,7 @@ impl GdmaDriver { hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, hwc_failure: false, - saving: false, + state_saved: false, db_id, }; @@ -532,11 +532,7 @@ impl GdmaDriver { #[allow(dead_code)] pub async fn save(mut self) -> anyhow::Result { - if self.hwc_failure { - anyhow::bail!("can't save after hwc failure"); - } - - self.saving = true; + self.state_saved = true; let doorbell = self.bar0.save(Some(self.db_id as u64)); let interrupt_config = self @@ -572,6 +568,7 @@ impl GdmaDriver { num_msix: self.num_msix, min_queue_avail: self.min_queue_avail, link_toggle: self.link_toggle.clone(), + hwc_failure: self.hwc_failure, interrupt_config, }) } @@ -692,8 +689,8 @@ impl GdmaDriver { hwc_subscribed: false, hwc_warning_time_in_ms: HWC_WARNING_TIME_IN_MS, hwc_timeout_in_ms: HWC_TIMEOUT_DEFAULT_IN_MS, - hwc_failure: false, - saving: false, + hwc_failure: saved_state.hwc_failure, + state_saved: false, db_id: db_id as u32, }; diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index 4423034913..83123248af 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -65,6 +65,9 @@ pub struct GdmaDriverSavedState { /// Link status by vport index #[mesh(14)] pub link_toggle: Vec<(u32, bool)>, + + #[mesh(15)] + pub hwc_failure: bool, } /// Saved state of an interrupt for restoration during servicing From a48267e8298455593e02fc8a7d5cd12e8d2eeb40 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Wed, 7 May 2025 10:15:25 -0700 Subject: [PATCH 08/25] move some duplicated code to init function --- vm/devices/net/mana_driver/src/gdma_driver.rs | 71 +++++-------------- .../net/mana_driver/src/save_restore.rs | 1 + 2 files changed, 18 insertions(+), 54 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 1786ac927a..51afc6add8 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -257,54 +257,11 @@ impl GdmaDriver { num_vps: u32, dma_buffer: Option, ) -> anyhow::Result { - let bar0_mapping = device.map_bar(0)?; - let bar0_len = bar0_mapping.len(); - if bar0_len < size_of::() { - anyhow::bail!("bar0 ({} bytes) too small for reg map", bar0_mapping.len()); - } + let (bar0_mapping, map) = Self::init(&mut device)?; + // Only allocate the HWC interrupt now. Rest will be allocated later. let num_msix = 1; let mut interrupt0 = device.map_interrupt(0, 0)?; - let mut map = RegMap::new_zeroed(); - for i in 0..size_of_val(&map) / 4 { - let v = bar0_mapping.read_u32(i * 4); - // Unmapped device memory will return -1 on reads, so check the first 32 - // bits for this condition to get a clear error message early. - if i == 0 && v == !0 { - anyhow::bail!("bar0 read returned -1, device is not present"); - } - map.as_mut_bytes()[i * 4..(i + 1) * 4].copy_from_slice(&v.to_ne_bytes()); - } - - tracing::debug!(?map, "register map"); - - // Log on unknown major version numbers. This is not necessarily an - // error, so continue. - if map.major_version_number != 0 && map.major_version_number != 1 { - tracing::warn!( - major = map.major_version_number, - minor = map.minor_version_number, - micro = map.micro_version_number, - "unrecognized major version" - ); - } - - if map.vf_gdma_sriov_shared_sz != 32 { - anyhow::bail!( - "unexpected shared memory size: {}", - map.vf_gdma_sriov_shared_sz - ); - } - - if (bar0_len as u64).saturating_sub(map.vf_gdma_sriov_shared_reg_start) - < map.vf_gdma_sriov_shared_sz as u64 - { - anyhow::bail!( - "bar0 ({} bytes) too small for shared memory at {}", - bar0_mapping.len(), - map.vf_gdma_sriov_shared_reg_start - ); - } let dma_buffer = if let Some(dma_buffer) = dma_buffer { dma_buffer @@ -573,14 +530,7 @@ impl GdmaDriver { }) } - #[allow(dead_code)] - pub async fn restore( - saved_state: GdmaDriverSavedState, - mut device: T, - dma_buffer: MemoryBlock, - ) -> anyhow::Result { - tracing::info!("restoring gdma driver"); - + pub fn init(device: &mut T) -> anyhow::Result<(::Registers, RegMap)> { let bar0_mapping = device.map_bar(0)?; let bar0_len = bar0_mapping.len(); if bar0_len < size_of::() { @@ -598,7 +548,7 @@ impl GdmaDriver { map.as_mut_bytes()[i * 4..(i + 1) * 4].copy_from_slice(&v.to_ne_bytes()); } - tracing::debug!(?map, "register map on restore"); + tracing::debug!(?map, "register map"); // Log on unknown major version numbers. This is not necessarily an // error, so continue. @@ -628,7 +578,20 @@ impl GdmaDriver { ); } + Ok((bar0_mapping, map)) + } + + #[allow(dead_code)] + pub async fn restore( + saved_state: GdmaDriverSavedState, + mut device: T, + dma_buffer: MemoryBlock, + ) -> anyhow::Result { + tracing::info!("restoring gdma driver"); + + let (bar0_mapping, map) = Self::init(&mut device)?; let doorbell_shift = map.vf_db_page_sz.trailing_zeros(); + let bar0 = Arc::new(Bar0 { mem: bar0_mapping, map, diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index 83123248af..bf275db64e 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -66,6 +66,7 @@ pub struct GdmaDriverSavedState { #[mesh(14)] pub link_toggle: Vec<(u32, bool)>, + /// The hwc failure state #[mesh(15)] pub hwc_failure: bool, } From 87ec2a3b820bf0651df692b6c8840b513e6f31da Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Mon, 8 Sep 2025 22:14:54 +0000 Subject: [PATCH 09/25] retarget always --- vm/devices/net/mana_driver/src/gdma_driver.rs | 32 +++---------------- .../net/mana_driver/src/save_restore.rs | 22 ++----------- 2 files changed, 6 insertions(+), 48 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index b74901836a..605fadef48 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -11,7 +11,6 @@ use crate::resources::Resource; use crate::resources::ResourceArena; use crate::save_restore::DoorbellSavedState; use crate::save_restore::GdmaDriverSavedState; -use crate::save_restore::InterruptSavedState; use crate::save_restore::SavedMemoryState; use anyhow::Context; use futures::FutureExt; @@ -506,21 +505,6 @@ impl GdmaDriver { self.state_saved = true; let doorbell = self.bar0.save(Some(self.db_id as u64)); - let interrupt_config = self - .interrupts - .iter() - .enumerate() - .filter_map(|(index, interrupt)| { - if interrupt.is_some() { - Some(InterruptSavedState { - msix_index: index as u32, - cpu: index as u32, - }) - } else { - None - } - }) - .collect(); Ok(GdmaDriverSavedState { mem: SavedMemoryState { @@ -540,7 +524,6 @@ impl GdmaDriver { min_queue_avail: self.min_queue_avail, link_toggle: self.link_toggle.clone(), hwc_failure: self.hwc_failure, - interrupt_config, }) } @@ -638,11 +621,7 @@ impl GdmaDriver { )?; let mut interrupts = vec![None; saved_state.num_msix as usize]; - for int_state in &saved_state.interrupt_config { - let interrupt = device.map_interrupt(int_state.msix_index, int_state.cpu)?; - - interrupts[int_state.msix_index as usize] = Some(interrupt); - } + interrupts[0] = Some(device.map_interrupt(0, 0)?); let mut this = Self { device: Some(device), @@ -1272,12 +1251,9 @@ impl GdmaDriver { fn get_msix_for_cpu(&mut self, cpu: u32) -> anyhow::Result { let msix = cpu % self.num_msix; - // Allocate MSI-X, if it hasn't been allocated so far. - if self.interrupts[msix as usize].is_none() { - let device = self.device.as_mut().expect("device should be present"); - let interrupt = device.map_interrupt(msix, cpu)?; - self.interrupts[msix as usize] = Some(interrupt); - } + let device = self.device.as_mut().expect("device should be present"); + let interrupt = device.map_interrupt(msix, cpu)?; + self.interrupts[msix as usize] = Some(interrupt); Ok(msix) } diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index bf275db64e..e5417aab76 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -57,33 +57,15 @@ pub struct GdmaDriverSavedState { /// Minimum number of queues available #[mesh(12)] pub min_queue_avail: u32, - - /// Saved interrupts for restoration - #[mesh(13)] - pub interrupt_config: Vec, - /// Link status by vport index - #[mesh(14)] + #[mesh(13)] pub link_toggle: Vec<(u32, bool)>, /// The hwc failure state - #[mesh(15)] + #[mesh(14)] pub hwc_failure: bool, } -/// Saved state of an interrupt for restoration during servicing -#[derive(Protobuf, Clone, Debug)] -#[mesh(package = "mana_driver")] -pub struct InterruptSavedState { - /// The index in the msix table for this interrupt - #[mesh(1)] - pub msix_index: u32, - - /// Which CPU this interrupt is assigned to - #[mesh(2)] - pub cpu: u32, -} - /// The saved state of a completion queue or event queue for restoration /// during servicing #[derive(Clone, Protobuf, Debug)] From 980f5b0beda641d0fd535988c6ed22ac7911ed7e Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Wed, 10 Sep 2025 19:49:50 +0000 Subject: [PATCH 10/25] unmap interrupts on drop --- vm/devices/net/mana_driver/src/gdma_driver.rs | 22 +++++++++++++++ vm/devices/user_driver/src/lib.rs | 8 ++++++ vm/devices/user_driver/src/vfio.rs | 21 +++++++++++++++ vm/devices/user_driver/vfio_sys/src/lib.rs | 27 +++++++++++++++++++ 4 files changed, 78 insertions(+) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 605fadef48..906a19ed09 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -210,6 +210,11 @@ impl Drop for GdmaDriver { // Don't destroy anything if we're saving its state for restoration. if self.state_saved { + // Attempt to unmap all MSI-X vectors we may have mapped. + if let Err(e) = self.unmap_all_msix() { + tracing::warn!(error = %e, "failed to unmap all msix vectors when dropping GdmaDriver"); + } + return; } @@ -260,6 +265,23 @@ struct EqeWaitResult { } impl GdmaDriver { + /// Unmap (disable) all mapped MSI-X vectors for this driver instance. + pub fn unmap_all_msix(&mut self) -> anyhow::Result<()> { + let device = match self.device.as_mut() { + Some(d) => d, + None => return Ok(()), + }; + for (idx, slot) in self.interrupts.iter_mut().enumerate() { + if slot.is_some() { + if let Err(e) = device.unmap_interrupt(idx as u32) { + // Continue trying others. + tracing::debug!(msix = idx, error = %e, "unmap_interrupt failed"); + } + *slot = None; + } + } + Ok(()) + } pub fn doorbell(&self) -> Arc { self.bar0.clone() as _ } diff --git a/vm/devices/user_driver/src/lib.rs b/vm/devices/user_driver/src/lib.rs index 2a451bc055..a0470d666e 100644 --- a/vm/devices/user_driver/src/lib.rs +++ b/vm/devices/user_driver/src/lib.rs @@ -44,6 +44,14 @@ pub trait DeviceBacking: 'static + Send + Inspect { /// This can be called multiple times for the same interrupt without disconnecting /// previous mappings. The last `cpu` value will be used as the target CPU. fn map_interrupt(&mut self, msix: u32, cpu: u32) -> anyhow::Result; + + /// Unmaps (disables) the specified MSI-X vector previously mapped via `map_interrupt`. + /// + /// Default implementation is a no-op for backends that do not support + /// dynamic interrupt unmapping. + fn unmap_interrupt(&mut self, _msix: u32) -> anyhow::Result<()> { + Ok(()) + } } /// Access to device registers. diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index 0bc6a7670b..d51883885e 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -307,6 +307,27 @@ impl DeviceBacking for VfioDevice { Ok(interrupt.insert(new_interrupt).interrupt.clone()) } + + fn unmap_interrupt(&mut self, msix: u32) -> anyhow::Result<()> { + if msix >= self.msix_info.count { + anyhow::bail!("invalid msix index"); + } + if self + .interrupts + .get(msix as usize) + .and_then(|i| i.as_ref()) + .is_none() + { + // Already unmapped / never mapped. + return Ok(()); + } + self.device.unmap_msix(msix, 1)?; + // Drop local state so future map_interrupt recreates. + if let Some(slot) = self.interrupts.get_mut(msix as usize) { + *slot = None; + } + Ok(()) + } } struct InterruptTask { diff --git a/vm/devices/user_driver/vfio_sys/src/lib.rs b/vm/devices/user_driver/vfio_sys/src/lib.rs index 77dbc7f2f1..9eb91ca328 100644 --- a/vm/devices/user_driver/vfio_sys/src/lib.rs +++ b/vm/devices/user_driver/vfio_sys/src/lib.rs @@ -400,6 +400,33 @@ impl Device { } Ok(()) } + + /// Disable (unmap) a contiguous range of previously mapped MSI-X vectors. + /// + /// This issues VFIO_DEVICE_SET_IRQS with ACTION_TRIGGER + DATA_NONE and a + /// non-zero count, which per VFIO semantics removes the eventfd bindings + /// for the specified range starting at `start`. + pub fn unmap_msix(&self, start: u32, count: u32) -> anyhow::Result<()> { + if count == 0 { + return Ok(()); + } + + let header = vfio_irq_set { + argsz: size_of::() as u32, + flags: VFIO_IRQ_SET_ACTION_TRIGGER | VFIO_IRQ_SET_DATA_NONE, + index: VFIO_PCI_MSIX_IRQ_INDEX, + start, + count, + data: Default::default(), + }; + + // SAFETY: The file descriptor is valid; header constructed per VFIO spec. + unsafe { + ioctl::vfio_device_set_irqs(self.file.as_raw_fd(), &header) + .context("failed to unmap msix vectors")?; + } + Ok(()) + } } impl AsRef for Device { From 054c767bfb7155b18d8a97a0581e00f128fce14c Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 25 Sep 2025 17:11:02 +0000 Subject: [PATCH 11/25] PR feedback --- vm/devices/net/mana_driver/src/gdma_driver.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 906a19ed09..7257918e43 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -206,11 +206,11 @@ impl GdmaDriver { impl Drop for GdmaDriver { fn drop(&mut self) { - tracing::debug!(?self.state_saved, ?self.hwc_failure, "dropping gdma driver"); + tracing::info!(?self.state_saved, ?self.hwc_failure, "dropping gdma driver"); // Don't destroy anything if we're saving its state for restoration. if self.state_saved { - // Attempt to unmap all MSI-X vectors we may have mapped. + // Unmap interrupts to prevent the device from sending interrupts during save/restore if let Err(e) = self.unmap_all_msix() { tracing::warn!(error = %e, "failed to unmap all msix vectors when dropping GdmaDriver"); } From 1baec11dcf37165e3c7eb373cd90e06e8d54380c Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 25 Sep 2025 18:02:25 +0000 Subject: [PATCH 12/25] unmap all interrupts at once --- vm/devices/net/mana_driver/src/gdma_driver.rs | 20 +++++---------- vm/devices/user_driver/src/lib.rs | 4 +-- vm/devices/user_driver/src/vfio.rs | 25 ++++++++----------- 3 files changed, 19 insertions(+), 30 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 7257918e43..83ce9b26d3 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -211,8 +211,8 @@ impl Drop for GdmaDriver { // Don't destroy anything if we're saving its state for restoration. if self.state_saved { // Unmap interrupts to prevent the device from sending interrupts during save/restore - if let Err(e) = self.unmap_all_msix() { - tracing::warn!(error = %e, "failed to unmap all msix vectors when dropping GdmaDriver"); + if let Err(e) = self.unmap_all_interrupts() { + tracing::warn!(error = %e, "failed to unmap interrupts when dropping GdmaDriver"); } return; @@ -265,23 +265,15 @@ struct EqeWaitResult { } impl GdmaDriver { - /// Unmap (disable) all mapped MSI-X vectors for this driver instance. - pub fn unmap_all_msix(&mut self) -> anyhow::Result<()> { + pub fn unmap_all_interrupts(&mut self) -> anyhow::Result<()> { let device = match self.device.as_mut() { Some(d) => d, None => return Ok(()), }; - for (idx, slot) in self.interrupts.iter_mut().enumerate() { - if slot.is_some() { - if let Err(e) = device.unmap_interrupt(idx as u32) { - // Continue trying others. - tracing::debug!(msix = idx, error = %e, "unmap_interrupt failed"); - } - *slot = None; - } - } - Ok(()) + + device.unmap_all_interrupts() } + pub fn doorbell(&self) -> Arc { self.bar0.clone() as _ } diff --git a/vm/devices/user_driver/src/lib.rs b/vm/devices/user_driver/src/lib.rs index a0470d666e..2d36f502fe 100644 --- a/vm/devices/user_driver/src/lib.rs +++ b/vm/devices/user_driver/src/lib.rs @@ -45,11 +45,11 @@ pub trait DeviceBacking: 'static + Send + Inspect { /// previous mappings. The last `cpu` value will be used as the target CPU. fn map_interrupt(&mut self, msix: u32, cpu: u32) -> anyhow::Result; - /// Unmaps (disables) the specified MSI-X vector previously mapped via `map_interrupt`. + /// Unmaps and disables all previously mapped interrupts. /// /// Default implementation is a no-op for backends that do not support /// dynamic interrupt unmapping. - fn unmap_interrupt(&mut self, _msix: u32) -> anyhow::Result<()> { + fn unmap_all_interrupts(&mut self) -> anyhow::Result<()> { Ok(()) } } diff --git a/vm/devices/user_driver/src/vfio.rs b/vm/devices/user_driver/src/vfio.rs index d51883885e..82a0b44213 100644 --- a/vm/devices/user_driver/src/vfio.rs +++ b/vm/devices/user_driver/src/vfio.rs @@ -308,24 +308,21 @@ impl DeviceBacking for VfioDevice { Ok(interrupt.insert(new_interrupt).interrupt.clone()) } - fn unmap_interrupt(&mut self, msix: u32) -> anyhow::Result<()> { - if msix >= self.msix_info.count { - anyhow::bail!("invalid msix index"); - } - if self - .interrupts - .get(msix as usize) - .and_then(|i| i.as_ref()) - .is_none() - { - // Already unmapped / never mapped. + fn unmap_all_interrupts(&mut self) -> anyhow::Result<()> { + if self.interrupts.is_empty() { return Ok(()); } - self.device.unmap_msix(msix, 1)?; - // Drop local state so future map_interrupt recreates. - if let Some(slot) = self.interrupts.get_mut(msix as usize) { + + let count = self.interrupts.len() as u32; + self.device + .unmap_msix(0, count) + .context("failed to unmap all msix vectors")?; + + // Clear local bookkeeping so re-mapping works correctly later. + for slot in &mut self.interrupts { *slot = None; } + Ok(()) } } From 038c2da70947577e6eed014013009470449b06be Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 25 Sep 2025 20:20:48 +0000 Subject: [PATCH 13/25] remove eq_id_msix saving and reconstruct, other minor feedback --- vm/devices/net/mana_driver/src/gdma_driver.rs | 13 ++++++------- vm/devices/net/mana_driver/src/save_restore.rs | 17 ++++++----------- vm/devices/user_driver/src/lib.rs | 3 +-- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 83ce9b26d3..8bec5fc119 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -290,14 +290,12 @@ impl GdmaDriver { let num_msix = 1; let mut interrupt0 = device.map_interrupt(0, 0)?; - let dma_buffer = if let Some(dma_buffer) = dma_buffer { - dma_buffer - } else { + let dma_buffer = dma_buffer.map(Ok).unwrap_or_else(|| { let dma_client = device.dma_client(); dma_client .allocate_dma_buffer(NUM_PAGES * PAGE_SIZE) - .context("failed to allocate DMA buffer")? - }; + .context("failed to allocate DMA buffer") + })?; let pages = dma_buffer.pfns(); @@ -532,7 +530,6 @@ impl GdmaDriver { db_id: doorbell.doorbell_id, gpa_mkey: self.gpa_mkey, pdid: self._pdid, - eq_id_msix: self.eq_id_msix.clone(), hwc_activity_id: self.hwc_activity_id, num_msix: self.num_msix, min_queue_avail: self.min_queue_avail, @@ -636,6 +633,8 @@ impl GdmaDriver { let mut interrupts = vec![None; saved_state.num_msix as usize]; interrupts[0] = Some(device.map_interrupt(0, 0)?); + let mut eq_id_msix = HashMap::new(); + eq_id_msix.insert(eq.id(), 0); let mut this = Self { device: Some(device), @@ -646,12 +645,12 @@ impl GdmaDriver { cq, rq, sq, + eq_id_msix, test_events: 0, eq_armed: true, cq_armed: true, gpa_mkey: saved_state.gpa_mkey, _pdid: saved_state.pdid, - eq_id_msix: saved_state.eq_id_msix, num_msix: saved_state.num_msix, min_queue_avail: saved_state.min_queue_avail, hwc_activity_id: saved_state.hwc_activity_id, diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index e5417aab76..1b27b3988b 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -4,7 +4,6 @@ //! Types to save and restore the state of a MANA device. use mesh::payload::Protobuf; -use std::collections::HashMap; /// Top level saved state for the GDMA driver's saved state #[derive(Protobuf, Clone, Debug)] @@ -42,27 +41,23 @@ pub struct GdmaDriverSavedState { #[mesh(8)] pub pdid: u32, - /// Event queue id to msix mapping + /// The id of the HWC activity #[mesh(9)] - pub eq_id_msix: HashMap, - - /// The id of the hwc activity - #[mesh(10)] pub hwc_activity_id: u32, /// How many msix vectors are available - #[mesh(11)] + #[mesh(10)] pub num_msix: u32, /// Minimum number of queues available - #[mesh(12)] + #[mesh(11)] pub min_queue_avail: u32, /// Link status by vport index - #[mesh(13)] + #[mesh(12)] pub link_toggle: Vec<(u32, bool)>, - /// The hwc failure state - #[mesh(14)] + /// The HWC failure state + #[mesh(13)] pub hwc_failure: bool, } diff --git a/vm/devices/user_driver/src/lib.rs b/vm/devices/user_driver/src/lib.rs index 2d36f502fe..396380edb1 100644 --- a/vm/devices/user_driver/src/lib.rs +++ b/vm/devices/user_driver/src/lib.rs @@ -47,8 +47,7 @@ pub trait DeviceBacking: 'static + Send + Inspect { /// Unmaps and disables all previously mapped interrupts. /// - /// Default implementation is a no-op for backends that do not support - /// dynamic interrupt unmapping. + /// Default implementation is a no-op for backends that do not support interrupt unmapping. fn unmap_all_interrupts(&mut self) -> anyhow::Result<()> { Ok(()) } From e6fcd9966e45e6d21c8d57a96a44b08504a83e1d Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 11 Sep 2025 21:12:15 +0000 Subject: [PATCH 14/25] enable keepalive --- openhcl/openhcl_dma_manager/src/lib.rs | 2 +- openhcl/underhill_core/src/dispatch/mod.rs | 3 ++- openhcl/underhill_core/src/servicing.rs | 7 ++++++ openhcl/underhill_core/src/worker.rs | 2 +- .../net/mana_driver/src/save_restore.rs | 22 +++++++++++++++++++ 5 files changed, 33 insertions(+), 3 deletions(-) diff --git a/openhcl/openhcl_dma_manager/src/lib.rs b/openhcl/openhcl_dma_manager/src/lib.rs index d078db4d20..52a29263eb 100644 --- a/openhcl/openhcl_dma_manager/src/lib.rs +++ b/openhcl/openhcl_dma_manager/src/lib.rs @@ -387,7 +387,7 @@ impl OpenhclDmaManager { if let Some(private_pool) = &self.private_pool { private_pool - .validate_restore(false) + .validate_restore(true) .context("failed to validate restore for private pool")? } diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 95f469b56e..2af7bf0cb6 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -580,7 +580,7 @@ impl LoadedVm { anyhow::bail!("cannot service underhill while paused"); } - let mut state = self.save(Some(deadline), nvme_keepalive).await?; + let mut state = self.save(Some(deadline), true).await?; state.init_state.correlation_id = Some(correlation_id); // Unload any network devices. @@ -798,6 +798,7 @@ impl LoadedVm { nvme_state, dma_manager_state, vmbus_client, + mana_state: None, }, units, }; diff --git a/openhcl/underhill_core/src/servicing.rs b/openhcl/underhill_core/src/servicing.rs index 0606f51804..ca73a33ae5 100644 --- a/openhcl/underhill_core/src/servicing.rs +++ b/openhcl/underhill_core/src/servicing.rs @@ -10,6 +10,7 @@ use anyhow::Context as _; use vmcore::save_restore::SavedStateBlob; mod state { + use mana_driver::save_restore::ManaSavedState; use mesh::payload::Protobuf; use openhcl_dma_manager::save_restore::OpenhclDmaManagerState; use state_unit::SavedStateUnit; @@ -84,6 +85,8 @@ mod state { pub dma_manager_state: Option, #[mesh(10002)] pub vmbus_client: Option, + #[mesh(10003)] + pub mana_state: Option>, } #[derive(Protobuf)] @@ -183,6 +186,7 @@ impl From for FirmwareType { #[expect(clippy::option_option)] pub mod transposed { use super::*; + use mana_driver::save_restore::ManaSavedState; use openhcl_dma_manager::save_restore::OpenhclDmaManagerState; use vmcore::save_restore::SaveRestore; @@ -193,6 +197,7 @@ pub mod transposed { pub firmware_type: Option, pub vm_stop_reference_time: Option, pub emuplat: OptionEmuplatSavedState, + pub mana_state: Option>, pub flush_logs_result: Option>, pub vmgs: Option<( vmgs::save_restore::state::SavedVmgsState, @@ -230,6 +235,7 @@ pub mod transposed { vmgs, overlay_shutdown_device, nvme_state, + mana_state, dma_manager_state, vmbus_client, } = state; @@ -246,6 +252,7 @@ pub mod transposed { vmgs, overlay_shutdown_device: Some(overlay_shutdown_device), nvme_state: Some(nvme_state), + mana_state, dma_manager_state: Some(dma_manager_state), vmbus_client: Some(vmbus_client), } diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 56c323cd1f..adad29d1ae 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -771,7 +771,7 @@ impl UhVmNetworkSettings { } else { AllocationVisibility::Private }, - persistent_allocations: false, + persistent_allocations: true, })?; let (vf_manager, endpoints, save_state) = HclNetworkVFManager::new( diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index 1b27b3988b..7a2cd0441b 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -5,6 +5,28 @@ use mesh::payload::Protobuf; +/// Mana saved state +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct ManaSavedState { + /// The saved state of the MANA device driver + #[mesh(1)] + pub mana_device: ManaDeviceSavedState, + + /// Id of the device + #[mesh(2)] + pub pci_id: String, +} + +/// Mana device saved state +#[derive(Debug, Protobuf, Clone)] +#[mesh(package = "mana_driver")] +pub struct ManaDeviceSavedState { + /// Saved state for restoration of the GDMA driver + #[mesh(1)] + pub gdma: GdmaDriverSavedState, +} + /// Top level saved state for the GDMA driver's saved state #[derive(Protobuf, Clone, Debug)] #[mesh(package = "mana_driver")] From d799d6b4e4fb031ca8aa2085756e6541aae6f5ec Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Mon, 22 Sep 2025 20:03:33 +0000 Subject: [PATCH 15/25] triple fault to fix, only calling save and not restoring --- openhcl/underhill_core/src/dispatch/mod.rs | 14 ++++- openhcl/underhill_core/src/emuplat/netvsp.rs | 31 +++++++++ openhcl/underhill_core/src/worker.rs | 63 +++++++++++++++++++ vm/devices/net/mana_driver/src/gdma_driver.rs | 6 +- vm/devices/net/mana_driver/src/mana.rs | 11 ++++ .../tests/tests/x86_64/openhcl_uefi.rs | 21 +++++++ 6 files changed, 144 insertions(+), 2 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 2af7bf0cb6..a61a11eeaa 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -34,6 +34,7 @@ use hyperv_ic_resources::shutdown::ShutdownRpc; use hyperv_ic_resources::shutdown::ShutdownType; use igvm_defs::MemoryMapEntryType; use inspect::Inspect; +use mana_driver::save_restore::ManaSavedState; use mesh::CancelContext; use mesh::MeshPayload; use mesh::error::RemoteError; @@ -127,6 +128,9 @@ pub trait LoadedVmNetworkSettings: Inspect { &self, mut params: PacketCaptureParams, ) -> anyhow::Result>; + + /// Save the network state for restoration after servicing. + async fn save(&mut self) -> Vec; } /// A VM that has been loaded and can be run. @@ -580,6 +584,8 @@ impl LoadedVm { anyhow::bail!("cannot service underhill while paused"); } + tracing::info!("state units stopped"); + let mut state = self.save(Some(deadline), true).await?; state.init_state.correlation_id = Some(correlation_id); @@ -759,6 +765,12 @@ impl LoadedVm { None }; + let mana_state = if let Some(network_settings) = &mut self.network_settings { + Some(network_settings.save().await) + } else { + None + }; + let units = self.save_units().await.context("state unit save failed")?; let vmgs = if let Some((vmgs_thin_client, vmgs_disk_metadata, _)) = self.vmgs.as_ref() { Some(( @@ -798,7 +810,7 @@ impl LoadedVm { nvme_state, dma_manager_state, vmbus_client, - mana_state: None, + mana_state, }, units, }; diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index 3d015e5b8f..dcb8e7af8f 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -14,6 +14,7 @@ use guid::Guid; use inspect::Inspect; use mana_driver::mana::ManaDevice; use mana_driver::mana::VportState; +use mana_driver::save_restore::ManaSavedState; use mesh::rpc::FailableRpc; use mesh::rpc::Rpc; use mesh::rpc::RpcSend; @@ -58,6 +59,7 @@ enum HclNetworkVfManagerMessage { HideVtl0VF(Rpc), Inspect(inspect::Deferred), PacketCapture(FailableRpc, PacketCaptureParams>), + SaveState(Rpc<(), ManaSavedState>), } async fn create_mana_device( @@ -643,6 +645,25 @@ impl HclNetworkVFManagerWorker { }) .await; } + NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::SaveState(rpc)) => { + drop(self.messages.take().unwrap()); + + rpc.handle(|_| async move { + let mana_device = self + .mana_device + .take() + .expect("should have a device present to save state"); + + let saved_state = mana_device.save().await.unwrap(); + + ManaSavedState { + pci_id: self.vtl2_pci_id.clone(), + mana_device: saved_state, + } + }) + .await; + return; + } NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::ShutdownBegin( remove_vtl0_vf, )) => { @@ -969,6 +990,16 @@ impl HclNetworkVFManager { )) } + pub async fn save(&self) -> ManaSavedState { + let save_state = self + .shared_state + .worker_channel + .call(HclNetworkVfManagerMessage::SaveState, ()) + .await; + + save_state.unwrap() + } + pub async fn packet_capture( &self, params: PacketCaptureParams, diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index adad29d1ae..f0d7d7179c 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -90,6 +90,7 @@ use input_core::InputData; use input_core::MultiplexedInputHandle; use inspect::Inspect; use loader_defs::shim::MemoryVtlType; +use mana_driver::save_restore::ManaSavedState; use memory_range::MemoryRange; use mesh::CancelContext; use mesh::MeshPayload; @@ -1000,6 +1001,68 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { } Ok(params) } + + async fn save(&mut self) -> Vec { + let vf_managers: Vec<(Guid, Arc)> = self.vf_managers.drain().collect(); + + // Collect the instance_id of every vf_manager being shutdown + let instance_ids: Vec = vf_managers + .iter() + .map(|(instance_id, _)| *instance_id) + .collect(); + + // Only remove the vmbus channels and NICs from the VF Managers + let mut nic_channels = Vec::new(); + let mut i = 0; + while i < self.nics.len() { + if instance_ids.contains(&self.nics[i].0) { + let val = self.nics.remove(i); + nic_channels.push(val); + } else { + i += 1; + } + } + + for instance_id in instance_ids { + if !nic_channels.iter().any(|(id, _)| *id == instance_id) { + tracing::error!( + "No vmbus channel found that matches VF Manager instance_id: {instance_id}" + ); + } + } + + let mut endpoints: Vec<_> = + join_all(nic_channels.drain(..).map(async |(instance_id, channel)| { + async { + let nic = channel.remove().await.revoke().await; + nic.shutdown() + } + .instrument(tracing::info_span!("nic_shutdown", %instance_id)) + .await + })) + .await; + + let run_endpoints = async { + loop { + let _ = endpoints + .iter_mut() + .map(|endpoint| endpoint.wait_for_endpoint_action()) + .collect::>() + .race() + .await; + } + }; + + let save_vf_managers = join_all(vf_managers.into_iter().map(|(_, vf_manager)| { + let manager = vf_manager.clone(); + async move { manager.save().await } + })); + + let state = (run_endpoints, save_vf_managers).race().await; + tracing::info!("save returned: {state:?}"); + + state + } } /// The final vtl0 memory layout computed from different inputs. diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 8bec5fc119..941fe15465 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -513,11 +513,15 @@ impl GdmaDriver { } #[allow(dead_code)] - pub async fn save(mut self) -> anyhow::Result { + pub async fn save(&mut self) -> anyhow::Result { + tracing::info!("saving gdma driver state"); + self.state_saved = true; let doorbell = self.bar0.save(Some(self.db_id as u64)); + self.unmap_all_msix()?; + Ok(GdmaDriverSavedState { mem: SavedMemoryState { base_pfn: self.dma_buffer.pfns()[0], diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index 23a1a3f453..245fda41c1 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -5,6 +5,7 @@ pub use crate::bnic_driver::RxConfig; pub use crate::resources::ResourceArena; +pub use crate::save_restore::ManaDeviceSavedState; use crate::bnic_driver::BnicDriver; use crate::bnic_driver::WqConfig; @@ -144,6 +145,16 @@ impl ManaDevice { Ok(device) } + /// Saves the device's state for servicing + pub async fn save(&self) -> anyhow::Result { + let mut gdma = self.inner.gdma.lock().await; + let gdma_saved_state = gdma.save().await?; + let saved_state = ManaDeviceSavedState { + gdma: gdma_saved_state, + }; + Ok(saved_state) + } + /// Returns the number of vports the device supports. pub fn num_vports(&self) -> u32 { self.inner.dev_config.max_num_vports.into() diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs index 8a8e595221..5c5f2d177a 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs @@ -116,6 +116,27 @@ async fn nvme_keepalive( .await } +#[openvmm_test(openhcl_uefi_x64[nvme](vhd(ubuntu_2204_server_x64))[LATEST_STANDARD_X64])] +async fn mana_keepalive( + config: PetriVmBuilder, + (igvm_file,): (ResolvedArtifact,), +) -> Result<(), anyhow::Error> { + let (mut vm, agent) = config + .with_vmbus_redirect(true) + .modify_backend(|b| b.with_nic()) + .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off") + .run() + .await?; + + vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) + .await?; + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + + Ok(()) +} + /// Boot the UEFI firmware, with a VTL2 range automatically configured by /// hvlite. #[openvmm_test_no_agent(openhcl_uefi_x64(none))] From 0b4c25bc35eb28bcf1048e3862ee7955b5da52b0 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Tue, 23 Sep 2025 20:30:40 +0000 Subject: [PATCH 16/25] calling save but still destroying everything, triple faults sometimes but might be storage? --- openhcl/underhill_core/src/emuplat/netvsp.rs | 52 +++++++++++++++---- openhcl/underhill_core/src/worker.rs | 20 +++++-- vm/devices/net/mana_driver/src/gdma_driver.rs | 1 - vm/devices/net/mana_driver/src/mana.rs | 20 +++++-- .../net/mana_driver/src/save_restore.rs | 14 ++--- .../tests/x86_64/openhcl_linux_direct.rs | 2 +- 6 files changed, 82 insertions(+), 27 deletions(-) diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index dcb8e7af8f..81fb88b028 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -646,22 +646,47 @@ impl HclNetworkVFManagerWorker { .await; } NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::SaveState(rpc)) => { + tracing::info!("saving state and shutting down VTL2 device"); + + assert!(self.is_shutdown_active); drop(self.messages.take().unwrap()); + rpc.handle(async |_| { + futures::future::join_all(self.endpoint_controls.iter_mut().map( + async |control| match control.disconnect().await { + Ok(Some(mut endpoint)) => { + tracing::info!("Network endpoint disconnected"); + endpoint.stop().await; + } + Ok(None) => (), + Err(err) => { + tracing::error!( + err = err.as_ref() as &dyn std::error::Error, + "Failed to disconnect endpoint" + ); + } + }, + )) + .await; - rpc.handle(|_| async move { - let mana_device = self - .mana_device - .take() - .expect("should have a device present to save state"); + let device = self.mana_device.take().expect("device should be present when saving"); - let saved_state = mana_device.save().await.unwrap(); + let saved_state = if let Ok((saved_state, device)) = device.save().await { + std::mem::forget(device); + ManaSavedState { + mana_device: saved_state, + pci_id: self.vtl2_pci_id.clone(), + } + } else { + tracing::warn!("Failed to save MANA device state"); + ManaSavedState::default() + }; - ManaSavedState { - pci_id: self.vtl2_pci_id.clone(), - mana_device: saved_state, - } + tracing::info!(?saved_state, "MANA device state saved"); + + saved_state }) .await; + // Exit worker thread. return; } NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::ShutdownBegin( @@ -673,6 +698,7 @@ impl HclNetworkVFManagerWorker { self.is_shutdown_active = true; } NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::ShutdownComplete(rpc)) => { + tracing::info!("shutting down VTL2 device"); assert!(self.is_shutdown_active); drop(self.messages.take().unwrap()); rpc.handle(async |keep_vf_alive| { @@ -1097,6 +1123,12 @@ impl HclNetworkVFManagerShutdownInProgress { } self.complete = true; } + + pub async fn save(mut self) -> ManaSavedState { + let result = self.inner.save().await; + self.complete = true; + result + } } struct HclNetworkVFManagerInstance { diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index f0d7d7179c..ca82af5938 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1003,7 +1003,22 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { } async fn save(&mut self) -> Vec { - let vf_managers: Vec<(Guid, Arc)> = self.vf_managers.drain().collect(); + let mut vf_managers: Vec<(Guid, Arc)> = + self.vf_managers.drain().collect(); + + // Notify VF managers of shutdown so that the subsequent teardown of + // the NICs does not modify VF state. + let vf_managers = vf_managers + .drain(..) + .map(move |(instance_id, manager)| { + ( + instance_id, + Arc::into_inner(manager) + .unwrap() + .shutdown_begin(false), + ) + }) + .collect::>(); // Collect the instance_id of every vf_manager being shutdown let instance_ids: Vec = vf_managers @@ -1054,8 +1069,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { }; let save_vf_managers = join_all(vf_managers.into_iter().map(|(_, vf_manager)| { - let manager = vf_manager.clone(); - async move { manager.save().await } + vf_manager.save() })); let state = (run_endpoints, save_vf_managers).race().await; diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 941fe15465..0b7dc0390c 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -208,7 +208,6 @@ impl Drop for GdmaDriver { fn drop(&mut self) { tracing::info!(?self.state_saved, ?self.hwc_failure, "dropping gdma driver"); - // Don't destroy anything if we're saving its state for restoration. if self.state_saved { // Unmap interrupts to prevent the device from sending interrupts during save/restore if let Err(e) = self.unmap_all_interrupts() { diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index 245fda41c1..65390e5db6 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -146,13 +146,23 @@ impl ManaDevice { } /// Saves the device's state for servicing - pub async fn save(&self) -> anyhow::Result { - let mut gdma = self.inner.gdma.lock().await; - let gdma_saved_state = gdma.save().await?; + pub async fn save(self) -> anyhow::Result<(ManaDeviceSavedState, T)> { + self.inspect_task.cancel().await; + if let Some(hwc_task) = self.hwc_task { + hwc_task.cancel().await; + } + let inner = Arc::into_inner(self.inner).unwrap(); + let mut driver = inner.gdma.into_inner(); + + let _result = driver.deregister_device(inner.dev_id).await; + let saved_state = ManaDeviceSavedState { - gdma: gdma_saved_state, + gdma: driver.save().await?, }; - Ok(saved_state) + + let device = driver.into_device(); + + Ok((saved_state, device)) } /// Returns the number of vports the device supports. diff --git a/vm/devices/net/mana_driver/src/save_restore.rs b/vm/devices/net/mana_driver/src/save_restore.rs index 7a2cd0441b..fb615f8bc1 100644 --- a/vm/devices/net/mana_driver/src/save_restore.rs +++ b/vm/devices/net/mana_driver/src/save_restore.rs @@ -6,7 +6,7 @@ use mesh::payload::Protobuf; /// Mana saved state -#[derive(Debug, Protobuf, Clone)] +#[derive(Debug, Protobuf, Clone, Default)] #[mesh(package = "mana_driver")] pub struct ManaSavedState { /// The saved state of the MANA device driver @@ -19,7 +19,7 @@ pub struct ManaSavedState { } /// Mana device saved state -#[derive(Debug, Protobuf, Clone)] +#[derive(Debug, Protobuf, Clone, Default)] #[mesh(package = "mana_driver")] pub struct ManaDeviceSavedState { /// Saved state for restoration of the GDMA driver @@ -28,7 +28,7 @@ pub struct ManaDeviceSavedState { } /// Top level saved state for the GDMA driver's saved state -#[derive(Protobuf, Clone, Debug)] +#[derive(Protobuf, Clone, Debug, Default)] #[mesh(package = "mana_driver")] pub struct GdmaDriverSavedState { /// Memory to be restored by a DMA client @@ -85,7 +85,7 @@ pub struct GdmaDriverSavedState { /// The saved state of a completion queue or event queue for restoration /// during servicing -#[derive(Clone, Protobuf, Debug)] +#[derive(Clone, Protobuf, Debug, Default)] #[mesh(package = "mana_driver")] pub struct CqEqSavedState { /// The doorbell state of the queue, which is how the device is notified @@ -114,7 +114,7 @@ pub struct CqEqSavedState { } /// Saved state of a doorbell for restoration during servicing -#[derive(Clone, Protobuf, Debug)] +#[derive(Clone, Protobuf, Debug, Default)] #[mesh(package = "mana_driver")] pub struct DoorbellSavedState { /// The doorbell's id @@ -127,7 +127,7 @@ pub struct DoorbellSavedState { } /// Saved state of a work queue for restoration during servicing -#[derive(Debug, Protobuf, Clone)] +#[derive(Debug, Protobuf, Clone, Default)] #[mesh(package = "mana_driver")] pub struct WqSavedState { /// The doorbell state of the queue, which is how the device is notified @@ -157,7 +157,7 @@ pub struct WqSavedState { /// Saved state for a memory region used by the driver /// to be restored by a DMA client during servicing -#[derive(Debug, Protobuf, Clone)] +#[derive(Debug, Protobuf, Clone, Default)] #[mesh(package = "mana_driver")] pub struct SavedMemoryState { /// The base page frame number of the memory region diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index 08c81a884c..d71338cd13 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -108,7 +108,7 @@ async fn mana_nic_servicing( let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(|b| b.with_nic()) - .with_openhcl_command_line("OPENHCL_ENABLE_SHARED_VISIBILITY_POOL=1") + .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512") .run() .await?; From 4dde9aace46b19aa9a832535b8f64cfc675d5fda Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Wed, 24 Sep 2025 20:45:21 +0000 Subject: [PATCH 17/25] passing test, enabled by default currently --- openhcl/openhcl_dma_manager/src/lib.rs | 2 +- openhcl/underhill_core/src/dispatch/mod.rs | 25 ++++++++------ openhcl/underhill_core/src/emuplat/netvsp.rs | 33 +++++++++++++++++-- openhcl/underhill_core/src/worker.rs | 11 +++++++ vm/devices/net/mana_driver/src/gdma_driver.rs | 3 +- vm/devices/net/mana_driver/src/mana.rs | 31 +++++++++++++---- vm/devices/net/net_mana/src/lib.rs | 4 +-- 7 files changed, 85 insertions(+), 24 deletions(-) diff --git a/openhcl/openhcl_dma_manager/src/lib.rs b/openhcl/openhcl_dma_manager/src/lib.rs index 52a29263eb..d078db4d20 100644 --- a/openhcl/openhcl_dma_manager/src/lib.rs +++ b/openhcl/openhcl_dma_manager/src/lib.rs @@ -387,7 +387,7 @@ impl OpenhclDmaManager { if let Some(private_pool) = &self.private_pool { private_pool - .validate_restore(true) + .validate_restore(false) .context("failed to validate restore for private pool")? } diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index a61a11eeaa..b8c837a26b 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -115,6 +115,7 @@ pub trait LoadedVmNetworkSettings: Inspect { vmbus_server: &Option, dma_client_spawner: DmaClientSpawner, is_isolated: bool, + mana_state: Option<&ManaSavedState>, ) -> anyhow::Result; /// Callback when network is removed externally. @@ -754,6 +755,19 @@ impl LoadedVm { let emuplat = (self.emuplat_servicing.save()).context("emuplat save failed")?; + // Only save dma manager state if we are expected to keep VF devices + // alive across save. Otherwise, don't persist the state at all, as + // there should be no live DMA across save. + // + // This has to happen before saving the network state, otherwise its allocations + // are marked as Free and are unable to be restored. + let dma_manager_state = if vf_keepalive_flag { + use vmcore::save_restore::SaveRestore; + Some(self.dma_manager.save().context("dma_manager save failed")?) + } else { + None + }; + // Only save NVMe state when there are NVMe controllers and keep alive // was enabled. let nvme_state = if let Some(n) = &self.nvme_manager { @@ -781,16 +795,6 @@ impl LoadedVm { None }; - // Only save dma manager state if we are expected to keep VF devices - // alive across save. Otherwise, don't persist the state at all, as - // there should be no live DMA across save. - let dma_manager_state = if vf_keepalive_flag { - use vmcore::save_restore::SaveRestore; - Some(self.dma_manager.save().context("dma_manager save failed")?) - } else { - None - }; - let vmbus_client = if let Some(vmbus_client) = &mut self.vmbus_client { vmbus_client.stop().await; Some(vmbus_client.save().await) @@ -877,6 +881,7 @@ impl LoadedVm { &self.vmbus_server, self.dma_manager.client_spawner(), self.isolation.is_isolated(), + None, // No existing mana state ) .await?; diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index 81fb88b028..dd93a8b0d0 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -68,7 +68,20 @@ async fn create_mana_device( vp_count: u32, max_sub_channels: u16, dma_client: Arc, + mana_state: Option<&ManaSavedState>, ) -> anyhow::Result> { + if let Some(mana_state) = mana_state { + tracing::info!("restoring MANA device from saved state"); + return try_create_mana_device( + driver_source, + pci_id, + vp_count, + max_sub_channels, + dma_client, + Some(mana_state), + ).await + } + // Disable FLR on vfio attach/detach; this allows faster system // startup/shutdown with the caveat that the device needs to be properly // sent through the shutdown path during servicing operations, as that is @@ -92,6 +105,7 @@ async fn create_mana_device( vp_count, max_sub_channels, dma_client.clone(), + None, ) .await { @@ -121,16 +135,26 @@ async fn try_create_mana_device( vp_count: u32, max_sub_channels: u16, dma_client: Arc, + mana_state: Option<&ManaSavedState>, ) -> anyhow::Result> { - let device = VfioDevice::new(driver_source, pci_id, dma_client) - .await - .context("failed to open device")?; + // Restore the device if we have saved state from servicing, otherwise create a new one. + let device = if mana_state.is_some() { + tracing::info!("Restoring VFIO device from saved state"); + VfioDevice::restore(driver_source, pci_id, true, dma_client) + .await + .context("failed to restore device")? + } else { + VfioDevice::new(driver_source, pci_id, dma_client) + .await + .context("failed to open device")? + }; ManaDevice::new( &driver_source.simple(), device, vp_count, max_sub_channels + 1, + mana_state.map(|state| &state.mana_device), ) .instrument(tracing::info_span!("new_mana_device")) .await @@ -730,6 +754,7 @@ impl HclNetworkVFManagerWorker { self.vp_count, self.max_sub_channels, self.dma_client.clone(), + None, // No saved state on new device arrival ) .await { @@ -906,6 +931,7 @@ impl HclNetworkVFManager { netvsp_state: &Option>, dma_mode: GuestDmaMode, dma_client: Arc, + mana_state: Option<&ManaSavedState>, ) -> anyhow::Result<( Self, Vec, @@ -917,6 +943,7 @@ impl HclNetworkVFManager { vp_count, max_sub_channels, dma_client.clone(), + mana_state, ) .await?; let (mut endpoints, endpoint_controls): (Vec<_>, Vec<_>) = (0..device.num_vports()) diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index ca82af5938..68905ea0eb 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -757,6 +757,7 @@ impl UhVmNetworkSettings { vmbus_server: &Option, dma_client_spawner: DmaClientSpawner, is_isolated: bool, + saved_mana_state: Option<&ManaSavedState>, ) -> anyhow::Result { let instance_id = nic_config.instance_id; let nic_max_sub_channels = nic_config @@ -787,6 +788,7 @@ impl UhVmNetworkSettings { servicing_netvsp_state, self.dma_mode, dma_client, + saved_mana_state, ) .await?; @@ -913,6 +915,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { vmbus_server: &Option, dma_client_spawner: DmaClientSpawner, is_isolated: bool, + mana_state: Option<&ManaSavedState>, ) -> anyhow::Result { if self.vf_managers.contains_key(&instance_id) { return Err(NetworkSettingsError::VFManagerExists(instance_id).into()); @@ -946,6 +949,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { vmbus_server, dma_client_spawner, is_isolated, + mana_state, ) .await?; @@ -3133,6 +3137,12 @@ async fn new_underhill_vm( if !controllers.mana.is_empty() { let _span = tracing::info_span!("network_settings", CVM_ALLOWED).entered(); for nic_config in controllers.mana.into_iter() { + let nic_servicing_state = if let Some(ref state) = servicing_state.mana_state { + state.iter().find(|s| s.pci_id == nic_config.pci_id) + } else { + None + }; + let save_state = uh_network_settings .add_network( nic_config.instance_id, @@ -3146,6 +3156,7 @@ async fn new_underhill_vm( &vmbus_server, dma_manager.client_spawner(), isolation.is_isolated(), + nic_servicing_state, ) .await?; diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 0b7dc0390c..47c97aec65 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -208,6 +208,7 @@ impl Drop for GdmaDriver { fn drop(&mut self) { tracing::info!(?self.state_saved, ?self.hwc_failure, "dropping gdma driver"); + // Don't destroy anything if we're saving its state for restoration. if self.state_saved { // Unmap interrupts to prevent the device from sending interrupts during save/restore if let Err(e) = self.unmap_all_interrupts() { @@ -519,8 +520,6 @@ impl GdmaDriver { let doorbell = self.bar0.save(Some(self.db_id as u64)); - self.unmap_all_msix()?; - Ok(GdmaDriverSavedState { mem: SavedMemoryState { base_pfn: self.dma_buffer.pfns()[0], diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index 65390e5db6..ed0c2f7a57 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -78,10 +78,23 @@ impl ManaDevice { device: T, num_vps: u32, max_queues_per_vport: u16, + mana_state: Option<&ManaDeviceSavedState>, ) -> anyhow::Result { - let mut gdma = GdmaDriver::new(driver, device, num_vps, None) - .instrument(tracing::info_span!("new_gdma_driver")) - .await?; + let mut gdma = if let Some(mana_state) = mana_state { + let memory = device.dma_client().attach_pending_buffers()?; + let gdma_memory = memory + .iter() + .find(|m| m.pfns()[0] == mana_state.gdma.mem.base_pfn) + .expect("gdma restored memory not found") + .clone(); + + GdmaDriver::restore(mana_state.gdma.clone(), device, gdma_memory).await? + } else { + GdmaDriver::new(driver, device, num_vps, None) + .instrument(tracing::info_span!("new_gdma_driver")) + .await? + }; + gdma.test_eq().await?; gdma.verify_vf_driver_version().await?; @@ -94,7 +107,15 @@ impl ManaDevice { .find(|dev_id| dev_id.ty == GdmaDevType::GDMA_DEVICE_MANA) .context("no mana device found")?; - let dev_data = gdma.register_device(dev_id).await?; + let dev_data = if let Some(mana_state) = mana_state { + GdmaRegisterDeviceResp { + pdid: mana_state.gdma.pdid, + gpa_mkey: mana_state.gdma.gpa_mkey, + db_id: mana_state.gdma.db_id as u32, + } + } else { + gdma.register_device(dev_id).await? + }; let mut bnic = BnicDriver::new(&mut gdma, dev_id); let dev_config = bnic.query_dev_config().await?; @@ -154,8 +175,6 @@ impl ManaDevice { let inner = Arc::into_inner(self.inner).unwrap(); let mut driver = inner.gdma.into_inner(); - let _result = driver.deregister_device(inner.dev_id).await; - let saved_state = ManaDeviceSavedState { gdma: driver.save().await?, }; diff --git a/vm/devices/net/net_mana/src/lib.rs b/vm/devices/net/net_mana/src/lib.rs index 51af084038..79d200e1fb 100644 --- a/vm/devices/net/net_mana/src/lib.rs +++ b/vm/devices/net/net_mana/src/lib.rs @@ -1607,7 +1607,7 @@ mod tests { reserved: 0, max_num_eqs: 64, }; - let thing = ManaDevice::new(&driver, device, 1, 1).await.unwrap(); + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); let vport = thing.new_vport(0, None, &dev_config).await.unwrap(); let mut endpoint = ManaEndpoint::new(driver.clone(), vport, dma_mode).await; let mut queues = Vec::new(); @@ -1710,7 +1710,7 @@ mod tests { reserved: 0, max_num_eqs: 64, }; - let thing = ManaDevice::new(&driver, device, 1, 1).await.unwrap(); + let thing = ManaDevice::new(&driver, device, 1, 1, None).await.unwrap(); let _ = thing.new_vport(0, None, &dev_config).await.unwrap(); } } From 3177ddc3883f735c1af643c7ce7ac14ad372467b Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 25 Sep 2025 22:30:41 +0000 Subject: [PATCH 18/25] default off --- openhcl/underhill_core/src/dispatch/mod.rs | 17 ++++++++++------- openhcl/underhill_core/src/emuplat/netvsp.rs | 4 ---- openhcl/underhill_core/src/lib.rs | 1 + openhcl/underhill_core/src/options.rs | 5 +++++ openhcl/underhill_core/src/worker.rs | 3 +++ petri/src/vm/mod.rs | 2 ++ petri/src/worker.rs | 1 + vm/devices/get/get_protocol/src/lib.rs | 7 ++++++- vm/devices/get/get_resources/src/lib.rs | 4 +++- .../get/guest_emulation_device/src/lib.rs | 3 ++- vm/devices/net/mana_driver/src/gdma_driver.rs | 2 -- 11 files changed, 33 insertions(+), 16 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index b8c837a26b..0178a4e100 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -193,6 +193,7 @@ pub(crate) struct LoadedVm { pub _periodic_telemetry_task: Task<()>, pub nvme_keep_alive: bool, + pub mana_keep_alive: bool, pub test_configuration: Option, pub dma_manager: OpenhclDmaManager, } @@ -304,7 +305,7 @@ impl LoadedVm { WorkerRpc::Restart(rpc) => { let state = async { let running = self.stop().await; - match self.save(None, false).await { + match self.save(None, false, false).await { Ok(servicing_state) => Some((rpc, servicing_state)), Err(err) => { if running { @@ -369,7 +370,7 @@ impl LoadedVm { UhVmRpc::Save(rpc) => { rpc.handle_failable(async |()| { let running = self.stop().await; - let r = self.save(None, false).await; + let r = self.save(None, false, false).await; if running { self.start(None).await; } @@ -571,6 +572,7 @@ impl LoadedVm { // NOTE: This is set via the corresponding env arg, as this feature is // experimental. let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive(); + let mana_keepalive = self.mana_keep_alive && capabilities_flags.enable_mana_keepalive(); // Do everything before the log flush under a span. let r = async { @@ -587,7 +589,7 @@ impl LoadedVm { tracing::info!("state units stopped"); - let mut state = self.save(Some(deadline), true).await?; + let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?; state.init_state.correlation_id = Some(correlation_id); // Unload any network devices. @@ -749,7 +751,8 @@ impl LoadedVm { async fn save( &mut self, _deadline: Option, - vf_keepalive_flag: bool, + nvme_keepalive_flag: bool, + mana_keepalive_flag: bool, ) -> anyhow::Result { assert!(!self.state_units.is_running()); @@ -761,7 +764,7 @@ impl LoadedVm { // // This has to happen before saving the network state, otherwise its allocations // are marked as Free and are unable to be restored. - let dma_manager_state = if vf_keepalive_flag { + let dma_manager_state = if nvme_keepalive_flag || mana_keepalive_flag { use vmcore::save_restore::SaveRestore; Some(self.dma_manager.save().context("dma_manager save failed")?) } else { @@ -771,7 +774,7 @@ impl LoadedVm { // Only save NVMe state when there are NVMe controllers and keep alive // was enabled. let nvme_state = if let Some(n) = &self.nvme_manager { - n.save(vf_keepalive_flag) + n.save(nvme_keepalive_flag) .instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED)) .await .map(|s| NvmeSavedState { nvme_state: s }) @@ -779,7 +782,7 @@ impl LoadedVm { None }; - let mana_state = if let Some(network_settings) = &mut self.network_settings { + let mana_state = if let Some(network_settings) = &mut self.network_settings && mana_keepalive_flag { Some(network_settings.save().await) } else { None diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index dd93a8b0d0..a7a6e9912f 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -670,8 +670,6 @@ impl HclNetworkVFManagerWorker { .await; } NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::SaveState(rpc)) => { - tracing::info!("saving state and shutting down VTL2 device"); - assert!(self.is_shutdown_active); drop(self.messages.take().unwrap()); rpc.handle(async |_| { @@ -705,8 +703,6 @@ impl HclNetworkVFManagerWorker { ManaSavedState::default() }; - tracing::info!(?saved_state, "MANA device state saved"); - saved_state }) .await; diff --git a/openhcl/underhill_core/src/lib.rs b/openhcl/underhill_core/src/lib.rs index 280748b40c..dd3bc59871 100644 --- a/openhcl/underhill_core/src/lib.rs +++ b/openhcl/underhill_core/src/lib.rs @@ -325,6 +325,7 @@ async fn launch_workers( gdbstub: opt.gdbstub, hide_isolation: opt.hide_isolation, nvme_keep_alive: opt.nvme_keep_alive, + mana_keep_alive: opt.mana_keep_alive, nvme_always_flr: opt.nvme_always_flr, test_configuration: opt.test_configuration, disable_uefi_frontpage: opt.disable_uefi_frontpage, diff --git a/openhcl/underhill_core/src/options.rs b/openhcl/underhill_core/src/options.rs index fa80416b05..e6476b1339 100644 --- a/openhcl/underhill_core/src/options.rs +++ b/openhcl/underhill_core/src/options.rs @@ -165,6 +165,9 @@ pub struct Options { /// (OPENHCL_NVME_KEEP_ALIVE=1) Enable nvme keep alive when servicing. pub nvme_keep_alive: bool, + /// (OPENHCL_MANA_KEEP_ALIVE=1) Enable MANA keep alive when servicing. + pub mana_keep_alive: bool, + /// (OPENHCL_NVME_ALWAYS_FLR=1) /// Always use the FLR (Function Level Reset) path for NVMe devices, /// even if we would otherwise attempt to use VFIO's NoReset support. @@ -298,6 +301,7 @@ impl Options { let gdbstub = parse_legacy_env_bool("OPENHCL_GDBSTUB"); let gdbstub_port = parse_legacy_env_number("OPENHCL_GDBSTUB_PORT")?.map(|x| x as u32); let nvme_keep_alive = parse_env_bool("OPENHCL_NVME_KEEP_ALIVE"); + let mana_keep_alive = parse_env_bool("OPENHCL_MANA_KEEP_ALIVE"); let nvme_always_flr = parse_env_bool("OPENHCL_NVME_ALWAYS_FLR"); let test_configuration = parse_env_string("OPENHCL_TEST_CONFIG").and_then(|x| { x.to_string_lossy() @@ -382,6 +386,7 @@ impl Options { halt_on_guest_halt, no_sidecar_hotplug, nvme_keep_alive, + mana_keep_alive, nvme_always_flr, test_configuration, disable_uefi_frontpage, diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 68905ea0eb..8e88c53e76 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -287,6 +287,8 @@ pub struct UnderhillEnvCfg { pub hide_isolation: bool, /// Enable nvme keep alive. pub nvme_keep_alive: bool, + /// Enable mana keep alive. + pub mana_keep_alive: bool, /// Don't skip FLR for NVMe devices. pub nvme_always_flr: bool, /// test configuration @@ -3366,6 +3368,7 @@ async fn new_underhill_vm( _periodic_telemetry_task: periodic_telemetry_task, nvme_keep_alive: env_cfg.nvme_keep_alive, + mana_keep_alive: env_cfg.mana_keep_alive, test_configuration: env_cfg.test_configuration, dma_manager, }; diff --git a/petri/src/vm/mod.rs b/petri/src/vm/mod.rs index 25ff95163a..66ff403ea7 100644 --- a/petri/src/vm/mod.rs +++ b/petri/src/vm/mod.rs @@ -1508,6 +1508,8 @@ pub enum IsolationType { pub struct OpenHclServicingFlags { /// Preserve DMA memory for NVMe devices if supported. pub enable_nvme_keepalive: bool, + /// Preserve DMA memory for MANA devices if supported. + pub enable_mana_keepalive: bool, /// Skip any logic that the vmm may have to ignore servicing updates if the supplied igvm file version is not different than the one currently running. pub override_version_checks: bool, /// Hint to the OpenHCL runtime how much time to wait when stopping / saving the OpenHCL. diff --git a/petri/src/worker.rs b/petri/src/worker.rs index a3a479f630..620e3aadf8 100644 --- a/petri/src/worker.rs +++ b/petri/src/worker.rs @@ -69,6 +69,7 @@ impl Worker { send, GuestServicingFlags { nvme_keepalive: flags.enable_nvme_keepalive, + mana_keepalive: flags.enable_mana_keepalive }, file, ) diff --git a/vm/devices/get/get_protocol/src/lib.rs b/vm/devices/get/get_protocol/src/lib.rs index e5c54b9262..6c899b414a 100644 --- a/vm/devices/get/get_protocol/src/lib.rs +++ b/vm/devices/get/get_protocol/src/lib.rs @@ -1186,8 +1186,13 @@ pub struct SaveGuestVtl2StateFlags { /// Explicitly allow nvme_keepalive feature when servicing. #[bits(1)] pub enable_nvme_keepalive: bool, + + /// Explicitly allow mana_keepalive feature when servicing. + #[bits(1)] + pub enable_mana_keepalive: bool, + /// Reserved, must be zero. - #[bits(63)] + #[bits(62)] _rsvd1: u64, } diff --git a/vm/devices/get/get_resources/src/lib.rs b/vm/devices/get/get_resources/src/lib.rs index 7c69c98184..ac3ca7976b 100644 --- a/vm/devices/get/get_resources/src/lib.rs +++ b/vm/devices/get/get_resources/src/lib.rs @@ -157,8 +157,10 @@ pub mod ged { /// Define servicing behavior. #[derive(MeshPayload, Default)] pub struct GuestServicingFlags { - /// Retain memory for DMA-attached devices. + /// Retain memory for nvme devices. pub nvme_keepalive: bool, + /// Retain memory for MANA devices. + pub mana_keepalive: bool, } /// Actions a client can request that the Guest Emulation diff --git a/vm/devices/get/guest_emulation_device/src/lib.rs b/vm/devices/get/guest_emulation_device/src/lib.rs index a9805fb927..3198d0f765 100644 --- a/vm/devices/get/guest_emulation_device/src/lib.rs +++ b/vm/devices/get/guest_emulation_device/src/lib.rs @@ -577,7 +577,8 @@ impl GedChannel { ), correlation_id: Guid::ZERO, capabilities_flags: SaveGuestVtl2StateFlags::new() - .with_enable_nvme_keepalive(rpc.input().nvme_keepalive), + .with_enable_nvme_keepalive(rpc.input().nvme_keepalive) + .with_enable_mana_keepalive(rpc.input().mana_keepalive), timeout_hint_secs: 60, }; diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 47c97aec65..18e8139dfb 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -512,7 +512,6 @@ impl GdmaDriver { Ok(this) } - #[allow(dead_code)] pub async fn save(&mut self) -> anyhow::Result { tracing::info!("saving gdma driver state"); @@ -591,7 +590,6 @@ impl GdmaDriver { Ok((bar0_mapping, map)) } - #[allow(dead_code)] pub async fn restore( saved_state: GdmaDriverSavedState, mut device: T, From c403fc5d0c475ba5fd4a3303749382b154265a37 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 25 Sep 2025 23:07:55 +0000 Subject: [PATCH 19/25] run format, make RPC return an option in case device disappears, remove unnecessary results --- openhcl/underhill_core/src/dispatch/mod.rs | 4 +- openhcl/underhill_core/src/emuplat/netvsp.rs | 44 ++++++++++++------- openhcl/underhill_core/src/worker.rs | 16 +++---- openvmm/openvmm_entry/src/lib.rs | 5 ++- petri/src/worker.rs | 2 +- vm/devices/net/mana_driver/src/gdma_driver.rs | 8 ++-- vm/devices/net/mana_driver/src/mana.rs | 6 +-- vm/devices/net/mana_driver/src/tests.rs | 2 +- 8 files changed, 53 insertions(+), 34 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 0178a4e100..0776a008a1 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -782,7 +782,9 @@ impl LoadedVm { None }; - let mana_state = if let Some(network_settings) = &mut self.network_settings && mana_keepalive_flag { + let mana_state = if let Some(network_settings) = &mut self.network_settings + && mana_keepalive_flag + { Some(network_settings.save().await) } else { None diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index a7a6e9912f..5acfd4ce93 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -59,7 +59,7 @@ enum HclNetworkVfManagerMessage { HideVtl0VF(Rpc), Inspect(inspect::Deferred), PacketCapture(FailableRpc, PacketCaptureParams>), - SaveState(Rpc<(), ManaSavedState>), + SaveState(Rpc<(), Option>), } async fn create_mana_device( @@ -79,7 +79,8 @@ async fn create_mana_device( max_sub_channels, dma_client, Some(mana_state), - ).await + ) + .await; } // Disable FLR on vfio attach/detach; this allows faster system @@ -690,20 +691,20 @@ impl HclNetworkVFManagerWorker { )) .await; - let device = self.mana_device.take().expect("device should be present when saving"); - - let saved_state = if let Ok((saved_state, device)) = device.save().await { + if let Some(device) = self.mana_device.take() { + let (saved_state, device) = device.save().await; std::mem::forget(device); - ManaSavedState { + + Some(ManaSavedState { mana_device: saved_state, pci_id: self.vtl2_pci_id.clone(), - } + }) } else { - tracing::warn!("Failed to save MANA device state"); - ManaSavedState::default() - }; - - saved_state + tracing::warn!( + "no MANA device present when saving state, returning None" + ); + None + } }) .await; // Exit worker thread. @@ -1039,14 +1040,27 @@ impl HclNetworkVFManager { )) } - pub async fn save(&self) -> ManaSavedState { + pub async fn save(&self) -> Option { let save_state = self .shared_state .worker_channel .call(HclNetworkVfManagerMessage::SaveState, ()) .await; - save_state.unwrap() + match save_state { + Ok(None) => { + tracing::warn!("No MANA device present when saving state, returning None"); + None + } + Ok(Some(state)) => Some(state), + Err(err) => { + tracing::error!( + err = &err as &dyn std::error::Error, + "RPC failure when saving VF Manager state" + ); + None + } + } } pub async fn packet_capture( @@ -1147,7 +1161,7 @@ impl HclNetworkVFManagerShutdownInProgress { self.complete = true; } - pub async fn save(mut self) -> ManaSavedState { + pub async fn save(mut self) -> Option { let result = self.inner.save().await; self.complete = true; result diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 8e88c53e76..3d96728cd1 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -1019,9 +1019,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { .map(move |(instance_id, manager)| { ( instance_id, - Arc::into_inner(manager) - .unwrap() - .shutdown_begin(false), + Arc::into_inner(manager).unwrap().shutdown_begin(false), ) }) .collect::>(); @@ -1074,14 +1072,16 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { } }; - let save_vf_managers = join_all(vf_managers.into_iter().map(|(_, vf_manager)| { - vf_manager.save() - })); + let save_vf_managers = join_all( + vf_managers + .into_iter() + .map(|(_, vf_manager)| vf_manager.save()), + ); let state = (run_endpoints, save_vf_managers).race().await; - tracing::info!("save returned: {state:?}"); - state + // Discard any vf_managers that failed to return valid save state. + state.into_iter().flatten().collect() } } diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index 86d3169702..d6cdfe2088 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -2769,7 +2769,10 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an hvlite_helpers::underhill::service_underhill( &vm_rpc, ged_rpc.as_ref().context("no GED")?, - GuestServicingFlags::default(), + GuestServicingFlags { + mana_keepalive: true, + ..GuestServicingFlags::default() + }, file.into(), ) .await?; diff --git a/petri/src/worker.rs b/petri/src/worker.rs index 620e3aadf8..531055f762 100644 --- a/petri/src/worker.rs +++ b/petri/src/worker.rs @@ -69,7 +69,7 @@ impl Worker { send, GuestServicingFlags { nvme_keepalive: flags.enable_nvme_keepalive, - mana_keepalive: flags.enable_mana_keepalive + mana_keepalive: flags.enable_mana_keepalive, }, file, ) diff --git a/vm/devices/net/mana_driver/src/gdma_driver.rs b/vm/devices/net/mana_driver/src/gdma_driver.rs index 18e8139dfb..166b3a0d5e 100644 --- a/vm/devices/net/mana_driver/src/gdma_driver.rs +++ b/vm/devices/net/mana_driver/src/gdma_driver.rs @@ -208,7 +208,7 @@ impl Drop for GdmaDriver { fn drop(&mut self) { tracing::info!(?self.state_saved, ?self.hwc_failure, "dropping gdma driver"); - // Don't destroy anything if we're saving its state for restoration. + // Don't destroy anything if we're saving its state for restoration. if self.state_saved { // Unmap interrupts to prevent the device from sending interrupts during save/restore if let Err(e) = self.unmap_all_interrupts() { @@ -512,14 +512,14 @@ impl GdmaDriver { Ok(this) } - pub async fn save(&mut self) -> anyhow::Result { + pub async fn save(&mut self) -> GdmaDriverSavedState { tracing::info!("saving gdma driver state"); self.state_saved = true; let doorbell = self.bar0.save(Some(self.db_id as u64)); - Ok(GdmaDriverSavedState { + GdmaDriverSavedState { mem: SavedMemoryState { base_pfn: self.dma_buffer.pfns()[0], len: self.dma_buffer.len(), @@ -536,7 +536,7 @@ impl GdmaDriver { min_queue_avail: self.min_queue_avail, link_toggle: self.link_toggle.clone(), hwc_failure: self.hwc_failure, - }) + } } pub fn init(device: &mut T) -> anyhow::Result<(::Registers, RegMap)> { diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index ed0c2f7a57..b6272a278e 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -167,7 +167,7 @@ impl ManaDevice { } /// Saves the device's state for servicing - pub async fn save(self) -> anyhow::Result<(ManaDeviceSavedState, T)> { + pub async fn save(self) -> (ManaDeviceSavedState, T) { self.inspect_task.cancel().await; if let Some(hwc_task) = self.hwc_task { hwc_task.cancel().await; @@ -176,12 +176,12 @@ impl ManaDevice { let mut driver = inner.gdma.into_inner(); let saved_state = ManaDeviceSavedState { - gdma: driver.save().await?, + gdma: driver.save().await, }; let device = driver.into_device(); - Ok((saved_state, device)) + (saved_state, device) } /// Returns the number of vports the device supports. diff --git a/vm/devices/net/mana_driver/src/tests.rs b/vm/devices/net/mana_driver/src/tests.rs index 019cd9ffa0..63419968f8 100644 --- a/vm/devices/net/mana_driver/src/tests.rs +++ b/vm/devices/net/mana_driver/src/tests.rs @@ -193,7 +193,7 @@ async fn test_gdma_save_restore(driver: DefaultDriver) { gdma.test_eq().await.unwrap(); gdma.verify_vf_driver_version().await.unwrap(); - gdma.save().await.unwrap() + gdma.save().await }; let mut new_gdma = GdmaDriver::restore(saved_state, cloned_device, gdma_buffer) From 8e30a76435fc86f09dde1248fe93d0db01d1be09 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 25 Sep 2025 23:16:06 +0000 Subject: [PATCH 20/25] some logging --- openhcl/underhill_core/src/emuplat/netvsp.rs | 2 ++ vm/devices/net/mana_driver/src/mana.rs | 4 +++- .../vmm_tests/tests/tests/x86_64/openhcl_uefi.rs | 12 +++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index 5acfd4ce93..b8433322dd 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -142,10 +142,12 @@ async fn try_create_mana_device( let device = if mana_state.is_some() { tracing::info!("Restoring VFIO device from saved state"); VfioDevice::restore(driver_source, pci_id, true, dma_client) + .instrument(tracing::info_span!("restore_mana_vfio_device")) .await .context("failed to restore device")? } else { VfioDevice::new(driver_source, pci_id, dma_client) + .instrument(tracing::info_span!("new_mana_vfio_device")) .await .context("failed to open device")? }; diff --git a/vm/devices/net/mana_driver/src/mana.rs b/vm/devices/net/mana_driver/src/mana.rs index b6272a278e..4593bef25d 100644 --- a/vm/devices/net/mana_driver/src/mana.rs +++ b/vm/devices/net/mana_driver/src/mana.rs @@ -88,7 +88,9 @@ impl ManaDevice { .expect("gdma restored memory not found") .clone(); - GdmaDriver::restore(mana_state.gdma.clone(), device, gdma_memory).await? + GdmaDriver::restore(mana_state.gdma.clone(), device, gdma_memory) + .instrument(tracing::info_span!("restore_gdma_driver")) + .await? } else { GdmaDriver::new(driver, device, num_vps, None) .instrument(tracing::info_span!("new_gdma_driver")) diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs index 5c5f2d177a..89b02f11f3 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_uefi.rs @@ -124,12 +124,18 @@ async fn mana_keepalive( let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(|b| b.with_nic()) - .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off") + .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off OPENHCL_MANA_KEEP_ALIVE=1") .run() .await?; - vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) - .await?; + vm.restart_openhcl( + igvm_file, + OpenHclServicingFlags { + enable_mana_keepalive: true, + ..OpenHclServicingFlags::default() + }, + ) + .await?; agent.power_off().await?; vm.wait_for_clean_teardown().await?; From b3a95cc93f6b374697a1103b87c1a8a503ccff18 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Mon, 6 Oct 2025 19:54:24 +0000 Subject: [PATCH 21/25] cleanup from self-review --- openhcl/underhill_core/src/dispatch/mod.rs | 4 ++-- openhcl/underhill_core/src/worker.rs | 9 ++++++++- .../vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 0776a008a1..67b092dc12 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -115,6 +115,7 @@ pub trait LoadedVmNetworkSettings: Inspect { vmbus_server: &Option, dma_client_spawner: DmaClientSpawner, is_isolated: bool, + save_restore_supported: bool, mana_state: Option<&ManaSavedState>, ) -> anyhow::Result; @@ -587,8 +588,6 @@ impl LoadedVm { anyhow::bail!("cannot service underhill while paused"); } - tracing::info!("state units stopped"); - let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?; state.init_state.correlation_id = Some(correlation_id); @@ -886,6 +885,7 @@ impl LoadedVm { &self.vmbus_server, self.dma_manager.client_spawner(), self.isolation.is_isolated(), + self.mana_keep_alive, None, // No existing mana state ) .await?; diff --git a/openhcl/underhill_core/src/worker.rs b/openhcl/underhill_core/src/worker.rs index 169ec3b983..9eefd6d629 100644 --- a/openhcl/underhill_core/src/worker.rs +++ b/openhcl/underhill_core/src/worker.rs @@ -759,6 +759,7 @@ impl UhVmNetworkSettings { vmbus_server: &Option, dma_client_spawner: DmaClientSpawner, is_isolated: bool, + save_restore_supported: bool, saved_mana_state: Option<&ManaSavedState>, ) -> anyhow::Result { let instance_id = nic_config.instance_id; @@ -775,7 +776,7 @@ impl UhVmNetworkSettings { } else { AllocationVisibility::Private }, - persistent_allocations: true, + persistent_allocations: save_restore_supported, })?; let (vf_manager, endpoints, save_state) = HclNetworkVFManager::new( @@ -917,6 +918,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { vmbus_server: &Option, dma_client_spawner: DmaClientSpawner, is_isolated: bool, + save_restore_supported: bool, mana_state: Option<&ManaSavedState>, ) -> anyhow::Result { if self.vf_managers.contains_key(&instance_id) { @@ -951,6 +953,7 @@ impl LoadedVmNetworkSettings for UhVmNetworkSettings { vmbus_server, dma_client_spawner, is_isolated, + save_restore_supported, mana_state, ) .await?; @@ -3150,6 +3153,9 @@ async fn new_underhill_vm( None }; + let private_pool_available = !runtime_params.private_pool_ranges().is_empty(); + let save_restore_supported = env_cfg.mana_keep_alive && private_pool_available; + let save_state = uh_network_settings .add_network( nic_config.instance_id, @@ -3163,6 +3169,7 @@ async fn new_underhill_vm( &vmbus_server, dma_manager.client_spawner(), isolation.is_isolated(), + save_restore_supported, nic_servicing_state, ) .await?; diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index 37c5e1ce28..cb9e275c66 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -87,7 +87,7 @@ async fn mana_nic_servicing( let (mut vm, agent) = config .with_vmbus_redirect(true) .modify_backend(|b| b.with_nic()) - .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512") + .with_openhcl_command_line("OPENHCL_ENABLE_SHARED_VISIBILITY_POOL=1") .run() .await?; From 60008adac8aa544e5e6ea5327ae8652e903fbb07 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Tue, 7 Oct 2025 11:51:02 -0700 Subject: [PATCH 22/25] don't have mana keepalive on by default in openvmm RPC --- openvmm/openvmm_entry/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index 1f9179e19c..27339f935f 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -2801,10 +2801,7 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an hvlite_helpers::underhill::save_underhill( &vm_rpc, ged_rpc.as_ref().context("no GED")?, - GuestServicingFlags { - mana_keepalive: true, - ..GuestServicingFlags::default() - }, + GuestServicingFlags::default(), file.into(), ) .await?; From 6b4351ee8eeb4d380040a3cbc72fd7b482e79843 Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Tue, 7 Oct 2025 14:40:07 -0700 Subject: [PATCH 23/25] fix some ordering issues, move some tests around --- openhcl/underhill_core/src/dispatch/mod.rs | 3 +- openvmm/openvmm_entry/src/lib.rs | 13 ++- .../tests/multiarch/openhcl_servicing.rs | 79 +++++++++++++++++++ .../tests/x86_64/openhcl_linux_direct.rs | 28 ------- 4 files changed, 93 insertions(+), 30 deletions(-) diff --git a/openhcl/underhill_core/src/dispatch/mod.rs b/openhcl/underhill_core/src/dispatch/mod.rs index 67b092dc12..4830a729b4 100644 --- a/openhcl/underhill_core/src/dispatch/mod.rs +++ b/openhcl/underhill_core/src/dispatch/mod.rs @@ -781,6 +781,8 @@ impl LoadedVm { None }; + let units = self.save_units().await.context("state unit save failed")?; + let mana_state = if let Some(network_settings) = &mut self.network_settings && mana_keepalive_flag { @@ -789,7 +791,6 @@ impl LoadedVm { None }; - let units = self.save_units().await.context("state unit save failed")?; let vmgs = if let Some((vmgs_thin_client, vmgs_disk_metadata, _)) = self.vmgs.as_ref() { Some(( vmgs_thin_client.save().await.context("vmgs save failed")?, diff --git a/openvmm/openvmm_entry/src/lib.rs b/openvmm/openvmm_entry/src/lib.rs index 27339f935f..e14eaefe04 100644 --- a/openvmm/openvmm_entry/src/lib.rs +++ b/openvmm/openvmm_entry/src/lib.rs @@ -1931,6 +1931,12 @@ enum InteractiveCommand { /// configured path. #[clap(long, conflicts_with("user_mode_only"))] igvm: Option, + #[clap(long)] + /// Enable NVMe keepalive + nvme_keepalive: bool, + /// Enable MANA keepalive + #[clap(long)] + mana_keepalive: bool, }, /// Read guest memory @@ -2784,6 +2790,8 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an InteractiveCommand::ServiceVtl2 { user_mode_only, igvm, + mana_keepalive, + nvme_keepalive, } => { let paravisor_diag = paravisor_diag.clone(); let vm_rpc = vm_rpc.clone(); @@ -2801,7 +2809,10 @@ async fn run_control(driver: &DefaultDriver, mesh: &VmmMesh, opt: Options) -> an hvlite_helpers::underhill::save_underhill( &vm_rpc, ged_rpc.as_ref().context("no GED")?, - GuestServicingFlags::default(), + GuestServicingFlags { + nvme_keepalive, + mana_keepalive, + }, file.into(), ) .await?; diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index ccfd08747f..5b9054399d 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -419,3 +419,82 @@ async fn create_keepalive_test_config( .run() .await } + +/// Today this only tests that the nic can get an IP address via consomme's DHCP +/// implementation. +/// +/// FUTURE: Test traffic on the nic. +async fn validate_mana_nic(agent: &PipetteClient) -> Result<(), anyhow::Error> { + let sh = agent.unix_shell(); + cmd!(sh, "ifconfig eth0 up").run().await?; + cmd!(sh, "udhcpc eth0").run().await?; + let output = cmd!(sh, "ifconfig eth0").read().await?; + // Validate that we see a mana nic with the expected MAC address and IPs. + assert!(output.contains("HWaddr 00:15:5D:12:12:12")); + assert!(output.contains("inet addr:10.0.0.2")); + assert!(output.contains("inet6 addr: fe80::215:5dff:fe12:1212/64")); + + Ok(()) +} + +/// Test an OpenHCL Linux direct VM with a MANA nic assigned to VTL2 (backed by +/// the MANA emulator), and vmbus relay. Perform servicing and validate that the +/// nic is still functional. +#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] +async fn mana_nic_servicing( + config: PetriVmBuilder, + (igvm_file,): (ResolvedArtifact,), +) -> Result<(), anyhow::Error> { + let (mut vm, agent) = config + .with_vmbus_redirect(true) + .modify_backend(|b| b.with_nic()) + .run() + .await?; + + validate_mana_nic(&agent).await?; + + vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) + .await?; + + validate_mana_nic(&agent).await?; + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + + Ok(()) +} +/// Test an OpenHCL Linux direct VM with a MANA nic assigned to VTL2 (backed by +/// the MANA emulator), and vmbus relay. Perform servicing and validate that the +/// nic is still functional. +#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] +async fn mana_nic_servicing_keepalive( + config: PetriVmBuilder, + (igvm_file,): (ResolvedArtifact,), +) -> Result<(), anyhow::Error> { + let (mut vm, agent) = config + .with_vmbus_redirect(true) + .modify_backend(|b| b.with_nic()) + .with_openhcl_command_line( + "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off OPENHCL_MANA_KEEP_ALIVE=1", + ) // disable sidecar until #1345 is fixed + .run() + .await?; + + validate_mana_nic(&agent).await?; + + vm.restart_openhcl( + igvm_file, + OpenHclServicingFlags { + enable_mana_keepalive: true, + ..Default::default() + }, + ) + .await?; + + validate_mana_nic(&agent).await?; + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + + Ok(()) +} diff --git a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs index cb9e275c66..40515e8cae 100644 --- a/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs +++ b/vmm_tests/vmm_tests/tests/tests/x86_64/openhcl_linux_direct.rs @@ -76,34 +76,6 @@ async fn mana_nic_shared_pool( Ok(()) } -/// Test an OpenHCL Linux direct VM with a MANA nic assigned to VTL2 (backed by -/// the MANA emulator), and vmbus relay. Perform servicing and validate that the -/// nic is still functional. -#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] -async fn mana_nic_servicing( - config: PetriVmBuilder, - (igvm_file,): (ResolvedArtifact,), -) -> Result<(), anyhow::Error> { - let (mut vm, agent) = config - .with_vmbus_redirect(true) - .modify_backend(|b| b.with_nic()) - .with_openhcl_command_line("OPENHCL_ENABLE_SHARED_VISIBILITY_POOL=1") - .run() - .await?; - - validate_mana_nic(&agent).await?; - - vm.restart_openhcl(igvm_file, OpenHclServicingFlags::default()) - .await?; - - validate_mana_nic(&agent).await?; - - agent.power_off().await?; - vm.wait_for_clean_teardown().await?; - - Ok(()) -} - /// Test an OpenHCL Linux direct VM with many NVMe devices assigned to VTL2 and vmbus relay. #[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] async fn many_nvme_devices_servicing( From 46ac820e61b9ad1a15576f1a94e5a1e4b44730be Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Fri, 10 Oct 2025 11:27:51 -0700 Subject: [PATCH 24/25] add a comment, put some duplicated code in a helper method --- openhcl/underhill_core/src/emuplat/netvsp.rs | 56 +++++++++----------- 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/openhcl/underhill_core/src/emuplat/netvsp.rs b/openhcl/underhill_core/src/emuplat/netvsp.rs index b67fa60dde..3eb30019be 100644 --- a/openhcl/underhill_core/src/emuplat/netvsp.rs +++ b/openhcl/underhill_core/src/emuplat/netvsp.rs @@ -422,22 +422,7 @@ impl HclNetworkVFManagerWorker { } pub async fn shutdown_vtl2_device(&mut self, keep_vf_alive: bool) { - futures::future::join_all(self.endpoint_controls.iter_mut().map(async |control| { - match control.disconnect().await { - Ok(Some(mut endpoint)) => { - tracing::info!("Network endpoint disconnected"); - endpoint.stop().await; - } - Ok(None) => (), - Err(err) => { - tracing::error!( - err = err.as_ref() as &dyn std::error::Error, - "Failed to disconnect endpoint" - ); - } - } - })) - .await; + self.disconnect_all_endpoints().await; if let Some(device) = self.mana_device.take() { let (result, device) = device.shutdown().await; // Closing the VFIO device handle can take a long time. Leak the handle by @@ -490,6 +475,25 @@ impl HclNetworkVFManagerWorker { } } + async fn disconnect_all_endpoints(&mut self) { + futures::future::join_all(self.endpoint_controls.iter_mut().map(async |control| { + match control.disconnect().await { + Ok(Some(mut endpoint)) => { + tracing::info!("Network endpoint disconnected"); + endpoint.stop().await; + } + Ok(None) => (), + Err(err) => { + tracing::error!( + err = err.as_ref() as &dyn std::error::Error, + "Failed to disconnect endpoint" + ); + } + } + })) + .await; + } + pub async fn run(&mut self) { #[derive(Debug)] enum NextWorkItem { @@ -676,25 +680,13 @@ impl HclNetworkVFManagerWorker { assert!(self.is_shutdown_active); drop(self.messages.take().unwrap()); rpc.handle(async |_| { - futures::future::join_all(self.endpoint_controls.iter_mut().map( - async |control| match control.disconnect().await { - Ok(Some(mut endpoint)) => { - tracing::info!("Network endpoint disconnected"); - endpoint.stop().await; - } - Ok(None) => (), - Err(err) => { - tracing::error!( - err = err.as_ref() as &dyn std::error::Error, - "Failed to disconnect endpoint" - ); - } - }, - )) - .await; + self.disconnect_all_endpoints().await; if let Some(device) = self.mana_device.take() { let (saved_state, device) = device.save().await; + + // Closing the VFIO device handle can take a long time. + // Leak the handle by stashing it away. std::mem::forget(device); if let Ok(saved_state) = saved_state { From b5a4c163aa276cfcbb90ae929b628f061033e6bb Mon Sep 17 00:00:00 2001 From: Justus Camp Date: Thu, 23 Oct 2025 14:42:13 -0700 Subject: [PATCH 25/25] add an upgrade test --- .../tests/multiarch/openhcl_servicing.rs | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index 9d6e051ac3..72ae3fc25d 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -502,3 +502,57 @@ async fn mana_nic_servicing_keepalive( Ok(()) } + +// Test upgrading from 25_05 release to latest, then service again to go through keepalive path +#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64, RELEASE_25_05_LINUX_DIRECT_X64])] +async fn mana_nic_servicing_keepalive_upgrade( + config: PetriVmBuilder, + (to_igvm_file, from_igvm_file): ( + ResolvedArtifact, + ResolvedArtifact, + ), +) -> Result<(), anyhow::Error> { + // Start a VM using 25_05 release IGVM + let (mut vm, agent) = config + // TODO: remove .with_guest_state_lifetime(PetriGuestStateLifetime::Disk). The default (ephemeral) does not exist in the 2505 release. + .with_guest_state_lifetime(PetriGuestStateLifetime::Disk) + .with_custom_openhcl(from_igvm_file) + .with_vmbus_redirect(true) + .modify_backend(|b| b.with_nic()) + .with_openhcl_command_line( + "OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off OPENHCL_MANA_KEEP_ALIVE=1", + ) // disable sidecar until #1345 is fixed + .run() + .await?; + + validate_mana_nic(&agent).await?; + + // Service to latest IGVM and make sure MANA nic still works + vm.restart_openhcl( + to_igvm_file.clone(), + OpenHclServicingFlags { + enable_mana_keepalive: true, + ..Default::default() + }, + ) + .await?; + + validate_mana_nic(&agent).await?; + + // Service again to latest IGVM to test keepalive path + vm.restart_openhcl( + to_igvm_file, + OpenHclServicingFlags { + enable_mana_keepalive: true, + ..Default::default() + }, + ) + .await?; + + validate_mana_nic(&agent).await?; + + agent.power_off().await?; + vm.wait_for_clean_teardown().await?; + + Ok(()) +}