Skip to content
Open
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
eb06840
cherry-pick gdma changes
justus-camp-microsoft Apr 23, 2025
e9e293f
some cleanup
justus-camp-microsoft Apr 24, 2025
1d5479d
unused import
justus-camp-microsoft Apr 24, 2025
93dfba9
get rid of crate
justus-camp-microsoft Apr 24, 2025
eea14e1
some of feedback
justus-camp-microsoft May 2, 2025
d6fc130
remove arm state, re-arm on restore
justus-camp-microsoft May 2, 2025
fe7e8bd
save on hwc failure, save hwc failure state
justus-camp-microsoft May 7, 2025
a48267e
move some duplicated code to init function
justus-camp-microsoft May 7, 2025
1950a50
Merge branch 'main' into gdma
Jun 25, 2025
06ac5b2
Merge branch 'main' into gdma
Sep 8, 2025
87ec2a3
retarget always
Sep 8, 2025
980f5b0
unmap interrupts on drop
Sep 10, 2025
054c767
PR feedback
justus-camp-microsoft Sep 25, 2025
1baec11
unmap all interrupts at once
justus-camp-microsoft Sep 25, 2025
038c2da
remove eq_id_msix saving and reconstruct, other minor feedback
justus-camp-microsoft Sep 25, 2025
e6fcd99
enable keepalive
justus-camp-microsoft Sep 11, 2025
d799d6b
triple fault to fix, only calling save and not restoring
justus-camp-microsoft Sep 22, 2025
0b4c25b
calling save but still destroying everything, triple faults sometimes…
justus-camp-microsoft Sep 23, 2025
4dde9aa
passing test, enabled by default currently
justus-camp-microsoft Sep 24, 2025
3177ddc
default off
justus-camp-microsoft Sep 25, 2025
c403fc5
run format, make RPC return an option in case device disappears, remo…
justus-camp-microsoft Sep 25, 2025
8e30a76
some logging
justus-camp-microsoft Sep 25, 2025
6646ca2
merge main
justus-camp-microsoft Oct 6, 2025
b3a95cc
cleanup from self-review
justus-camp-microsoft Oct 6, 2025
60008ad
don't have mana keepalive on by default in openvmm RPC
justus-camp-microsoft Oct 7, 2025
6b4351e
fix some ordering issues, move some tests around
justus-camp-microsoft Oct 7, 2025
c53567f
Merge branch 'main' into full_enablement
justus-camp-microsoft Oct 8, 2025
46ac820
add a comment, put some duplicated code in a helper method
justus-camp-microsoft Oct 10, 2025
cdad413
Merge remote-tracking branch 'upstream/main' into full_enablement
justus-camp-microsoft Oct 21, 2025
b5a4c16
add an upgrade test
justus-camp-microsoft Oct 23, 2025
c7925a8
Merge branch 'main' into full_enablement
justus-camp-microsoft Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 39 additions & 15 deletions openhcl/underhill_core/src/dispatch/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ use hyperv_ic_resources::shutdown::ShutdownRpc;
use hyperv_ic_resources::shutdown::ShutdownType;
use igvm_defs::MemoryMapEntryType;
use inspect::Inspect;
use mana_driver::save_restore::ManaSavedState;
use mesh::CancelContext;
use mesh::MeshPayload;
use mesh::error::RemoteError;
Expand Down Expand Up @@ -114,6 +115,8 @@ pub trait LoadedVmNetworkSettings: Inspect {
vmbus_server: &Option<VmbusServerHandle>,
dma_client_spawner: DmaClientSpawner,
is_isolated: bool,
save_restore_supported: bool,
mana_state: Option<&ManaSavedState>,
) -> anyhow::Result<RuntimeSavedState>;

/// Callback when network is removed externally.
Expand All @@ -127,6 +130,9 @@ pub trait LoadedVmNetworkSettings: Inspect {
&self,
mut params: PacketCaptureParams<Socket>,
) -> anyhow::Result<PacketCaptureParams<Socket>>;

/// Save the network state for restoration after servicing.
async fn save(&mut self) -> Vec<ManaSavedState>;
}

/// A VM that has been loaded and can be run.
Expand Down Expand Up @@ -188,6 +194,7 @@ pub(crate) struct LoadedVm {
pub _periodic_telemetry_task: Task<()>,

pub nvme_keep_alive: bool,
pub mana_keep_alive: bool,
pub test_configuration: Option<TestScenarioConfig>,
pub dma_manager: OpenhclDmaManager,
}
Expand Down Expand Up @@ -299,7 +306,7 @@ impl LoadedVm {
WorkerRpc::Restart(rpc) => {
let state = async {
let running = self.stop().await;
match self.save(None, false).await {
match self.save(None, false, false).await {
Ok(servicing_state) => Some((rpc, servicing_state)),
Err(err) => {
if running {
Expand Down Expand Up @@ -364,7 +371,7 @@ impl LoadedVm {
UhVmRpc::Save(rpc) => {
rpc.handle_failable(async |()| {
let running = self.stop().await;
let r = self.save(None, false).await;
let r = self.save(None, false, false).await;
if running {
self.start(None).await;
}
Expand Down Expand Up @@ -566,6 +573,7 @@ impl LoadedVm {
// NOTE: This is set via the corresponding env arg, as this feature is
// experimental.
let nvme_keepalive = self.nvme_keep_alive && capabilities_flags.enable_nvme_keepalive();
let mana_keepalive = self.mana_keep_alive && capabilities_flags.enable_mana_keepalive();

// Do everything before the log flush under a span.
let r = async {
Expand All @@ -580,7 +588,7 @@ impl LoadedVm {
anyhow::bail!("cannot service underhill while paused");
}

let mut state = self.save(Some(deadline), nvme_keepalive).await?;
let mut state = self.save(Some(deadline), nvme_keepalive, mana_keepalive).await?;
state.init_state.correlation_id = Some(correlation_id);

// Unload any network devices.
Expand Down Expand Up @@ -742,16 +750,30 @@ impl LoadedVm {
async fn save(
&mut self,
_deadline: Option<std::time::Instant>,
vf_keepalive_flag: bool,
nvme_keepalive_flag: bool,
mana_keepalive_flag: bool,
) -> anyhow::Result<ServicingState> {
assert!(!self.state_units.is_running());

let emuplat = (self.emuplat_servicing.save()).context("emuplat save failed")?;

// Only save dma manager state if we are expected to keep VF devices
// alive across save. Otherwise, don't persist the state at all, as
// there should be no live DMA across save.
//
// This has to happen before saving the network state, otherwise its allocations
// are marked as Free and are unable to be restored.
let dma_manager_state = if nvme_keepalive_flag || mana_keepalive_flag {
use vmcore::save_restore::SaveRestore;
Some(self.dma_manager.save().context("dma_manager save failed")?)
} else {
None
};

// Only save NVMe state when there are NVMe controllers and keep alive
// was enabled.
let nvme_state = if let Some(n) = &self.nvme_manager {
n.save(vf_keepalive_flag)
n.save(nvme_keepalive_flag)
.instrument(tracing::info_span!("nvme_manager_save", CVM_ALLOWED))
.await
.map(|s| NvmeSavedState { nvme_state: s })
Expand All @@ -760,6 +782,15 @@ impl LoadedVm {
};

let units = self.save_units().await.context("state unit save failed")?;

let mana_state = if let Some(network_settings) = &mut self.network_settings
&& mana_keepalive_flag
{
Some(network_settings.save().await)
} else {
None
};

let vmgs = if let Some((vmgs_thin_client, vmgs_disk_metadata, _)) = self.vmgs.as_ref() {
Some((
vmgs_thin_client.save().await.context("vmgs save failed")?,
Expand All @@ -769,16 +800,6 @@ impl LoadedVm {
None
};

// Only save dma manager state if we are expected to keep VF devices
// alive across save. Otherwise, don't persist the state at all, as
// there should be no live DMA across save.
let dma_manager_state = if vf_keepalive_flag {
use vmcore::save_restore::SaveRestore;
Some(self.dma_manager.save().context("dma_manager save failed")?)
} else {
None
};

let vmbus_client = if let Some(vmbus_client) = &mut self.vmbus_client {
vmbus_client.stop().await;
Some(vmbus_client.save().await)
Expand All @@ -798,6 +819,7 @@ impl LoadedVm {
nvme_state,
dma_manager_state,
vmbus_client,
mana_state,
},
units,
};
Expand Down Expand Up @@ -864,6 +886,8 @@ impl LoadedVm {
&self.vmbus_server,
self.dma_manager.client_spawner(),
self.isolation.is_isolated(),
self.mana_keep_alive,
None, // No existing mana state
)
.await?;

Expand Down
139 changes: 120 additions & 19 deletions openhcl/underhill_core/src/emuplat/netvsp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use guid::Guid;
use inspect::Inspect;
use mana_driver::mana::ManaDevice;
use mana_driver::mana::VportState;
use mana_driver::save_restore::ManaSavedState;
use mesh::rpc::FailableRpc;
use mesh::rpc::Rpc;
use mesh::rpc::RpcSend;
Expand Down Expand Up @@ -58,6 +59,7 @@ enum HclNetworkVfManagerMessage {
HideVtl0VF(Rpc<bool, ()>),
Inspect(inspect::Deferred),
PacketCapture(FailableRpc<PacketCaptureParams<Socket>, PacketCaptureParams<Socket>>),
SaveState(Rpc<(), Option<ManaSavedState>>),
}

async fn create_mana_device(
Expand All @@ -66,7 +68,21 @@ async fn create_mana_device(
vp_count: u32,
max_sub_channels: u16,
dma_client: Arc<dyn DmaClient>,
mana_state: Option<&ManaSavedState>,
) -> anyhow::Result<ManaDevice<VfioDevice>> {
if let Some(mana_state) = mana_state {
tracing::info!("restoring MANA device from saved state");
return try_create_mana_device(
driver_source,
pci_id,
vp_count,
max_sub_channels,
dma_client,
Some(mana_state),
)
.await;
}

// Disable FLR on vfio attach/detach; this allows faster system
// startup/shutdown with the caveat that the device needs to be properly
// sent through the shutdown path during servicing operations, as that is
Expand All @@ -90,6 +106,7 @@ async fn create_mana_device(
vp_count,
max_sub_channels,
dma_client.clone(),
None,
)
.await
{
Expand Down Expand Up @@ -119,16 +136,28 @@ async fn try_create_mana_device(
vp_count: u32,
max_sub_channels: u16,
dma_client: Arc<dyn DmaClient>,
mana_state: Option<&ManaSavedState>,
) -> anyhow::Result<ManaDevice<VfioDevice>> {
let device = VfioDevice::new(driver_source, pci_id, dma_client)
.await
.context("failed to open device")?;
// Restore the device if we have saved state from servicing, otherwise create a new one.
let device = if mana_state.is_some() {
tracing::info!("Restoring VFIO device from saved state");
VfioDevice::restore(driver_source, pci_id, true, dma_client)
.instrument(tracing::info_span!("restore_mana_vfio_device"))
.await
.context("failed to restore device")?
} else {
VfioDevice::new(driver_source, pci_id, dma_client)
.instrument(tracing::info_span!("new_mana_vfio_device"))
.await
.context("failed to open device")?
};

ManaDevice::new(
&driver_source.simple(),
device,
vp_count,
max_sub_channels + 1,
mana_state.map(|state| &state.mana_device),
)
.instrument(tracing::info_span!("new_mana_device"))
.await
Expand Down Expand Up @@ -393,22 +422,7 @@ impl HclNetworkVFManagerWorker {
}

pub async fn shutdown_vtl2_device(&mut self, keep_vf_alive: bool) {
futures::future::join_all(self.endpoint_controls.iter_mut().map(async |control| {
match control.disconnect().await {
Ok(Some(mut endpoint)) => {
tracing::info!("Network endpoint disconnected");
endpoint.stop().await;
}
Ok(None) => (),
Err(err) => {
tracing::error!(
err = err.as_ref() as &dyn std::error::Error,
"Failed to disconnect endpoint"
);
}
}
}))
.await;
self.disconnect_all_endpoints().await;
if let Some(device) = self.mana_device.take() {
let (result, device) = device.shutdown().await;
// Closing the VFIO device handle can take a long time. Leak the handle by
Expand Down Expand Up @@ -461,6 +475,25 @@ impl HclNetworkVFManagerWorker {
}
}

async fn disconnect_all_endpoints(&mut self) {
futures::future::join_all(self.endpoint_controls.iter_mut().map(async |control| {
match control.disconnect().await {
Ok(Some(mut endpoint)) => {
tracing::info!("Network endpoint disconnected");
endpoint.stop().await;
}
Ok(None) => (),
Err(err) => {
tracing::error!(
err = err.as_ref() as &dyn std::error::Error,
"Failed to disconnect endpoint"
);
}
}
}))
.await;
}

pub async fn run(&mut self) {
#[derive(Debug)]
enum NextWorkItem {
Expand Down Expand Up @@ -643,6 +676,41 @@ impl HclNetworkVFManagerWorker {
})
.await;
}
NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::SaveState(rpc)) => {
assert!(self.is_shutdown_active);
drop(self.messages.take().unwrap());
rpc.handle(async |_| {
self.disconnect_all_endpoints().await;

if let Some(device) = self.mana_device.take() {
let (saved_state, device) = device.save().await;

// Closing the VFIO device handle can take a long time.
// Leak the handle by stashing it away.
std::mem::forget(device);
Copy link

Copilot AI Oct 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using std::mem::forget() prevents the device destructor from running, which could lead to resource leaks. Consider documenting why this is necessary or finding an alternative approach that properly manages the device lifetime.

Suggested change
std::mem::forget(device);

Copilot uses AI. Check for mistakes.

if let Ok(saved_state) = saved_state {
Some(ManaSavedState {
mana_device: saved_state,
pci_id: self.vtl2_pci_id.clone(),
})
} else {
tracing::error!(
"Failed while saving MANA device state, returning None"
);
None
}
} else {
tracing::warn!(
"no MANA device present when saving state, returning None"
);
None
}
})
.await;
// Exit worker thread.
return;
}
NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::ShutdownBegin(
remove_vtl0_vf,
)) => {
Expand All @@ -652,6 +720,7 @@ impl HclNetworkVFManagerWorker {
self.is_shutdown_active = true;
}
NextWorkItem::ManagerMessage(HclNetworkVfManagerMessage::ShutdownComplete(rpc)) => {
tracing::info!("shutting down VTL2 device");
assert!(self.is_shutdown_active);
drop(self.messages.take().unwrap());
rpc.handle(async |keep_vf_alive| {
Expand Down Expand Up @@ -683,6 +752,7 @@ impl HclNetworkVFManagerWorker {
self.vp_count,
self.max_sub_channels,
self.dma_client.clone(),
None, // No saved state on new device arrival
)
.await
{
Expand Down Expand Up @@ -859,6 +929,7 @@ impl HclNetworkVFManager {
netvsp_state: &Option<Vec<SavedState>>,
dma_mode: GuestDmaMode,
dma_client: Arc<dyn DmaClient>,
mana_state: Option<&ManaSavedState>,
) -> anyhow::Result<(
Self,
Vec<HclNetworkVFManagerEndpointInfo>,
Expand All @@ -870,6 +941,7 @@ impl HclNetworkVFManager {
vp_count,
max_sub_channels,
dma_client.clone(),
mana_state,
)
.await?;
let (mut endpoints, endpoint_controls): (Vec<_>, Vec<_>) = (0..device.num_vports())
Expand Down Expand Up @@ -969,6 +1041,29 @@ impl HclNetworkVFManager {
))
}

pub async fn save(&self) -> Option<ManaSavedState> {
let save_state = self
.shared_state
.worker_channel
.call(HclNetworkVfManagerMessage::SaveState, ())
.await;

match save_state {
Ok(None) => {
tracing::warn!("No MANA device present when saving state, returning None");
None
}
Ok(Some(state)) => Some(state),
Err(err) => {
tracing::error!(
err = &err as &dyn std::error::Error,
"RPC failure when saving VF Manager state"
);
None
}
}
}

pub async fn packet_capture(
&self,
params: PacketCaptureParams<Socket>,
Expand Down Expand Up @@ -1066,6 +1161,12 @@ impl HclNetworkVFManagerShutdownInProgress {
}
self.complete = true;
}

pub async fn save(mut self) -> Option<ManaSavedState> {
let result = self.inner.save().await;
self.complete = true;
result
}
}

struct HclNetworkVFManagerInstance<F> {
Expand Down
Loading
Loading