Skip to content

Commit 3e0e0a4

Browse files
authored
vmm_tests: First vmm test for nvme keepalive using dropped CREATE_IO_COMPLETION_QUEUE commands (#1901)
A basic first nvme keepalive vmm test that drops CREATE_IO_COMPLETION_QUEUE admin queue commands during servicing in order to verify keepalive functionality. I locally tested this and the test times out without nvme_keepalive enabled.
1 parent 4ff6df9 commit 3e0e0a4

File tree

4 files changed

+124
-2
lines changed

4 files changed

+124
-2
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9318,6 +9318,7 @@ dependencies = [
93189318
"mesh",
93199319
"mesh_rpc",
93209320
"nvme_resources",
9321+
"nvme_spec",
93219322
"pal",
93229323
"pal_async",
93239324
"petri",

vm/devices/storage/nvme_resources/src/fault.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ pub enum QueueFaultBehavior<T> {
1515
Update(T),
1616
/// Drop the queue entry
1717
Drop,
18-
/// No Fault, proceed as normal
18+
/// No Fault
1919
Default,
2020
/// Delay
2121
Delay(Duration),

vmm_tests/vmm_tests/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ disk_backend_resources.workspace = true
3030
hyperv_ic_resources.workspace = true
3131
hvdef.workspace = true
3232
nvme_resources.workspace = true
33+
nvme_spec.workspace = true
3334
scsidisk_resources.workspace = true
3435
storvsp_resources.workspace = true
3536
vm_resource.workspace = true

vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs

Lines changed: 121 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,15 @@
88
99
use disk_backend_resources::LayeredDiskHandle;
1010
use disk_backend_resources::layer::RamDiskLayerHandle;
11+
use guid::Guid;
1112
use hvlite_defs::config::DeviceVtl;
13+
use hvlite_defs::config::VpciDeviceConfig;
14+
use mesh::CellUpdater;
15+
use nvme_resources::NamespaceDefinition;
16+
use nvme_resources::NvmeFaultControllerHandle;
17+
use nvme_resources::fault::AdminQueueFaultConfig;
18+
use nvme_resources::fault::FaultConfiguration;
19+
use nvme_resources::fault::QueueFaultBehavior;
1220
use petri::OpenHclServicingFlags;
1321
use petri::PetriVmBuilder;
1422
use petri::PetriVmmBackend;
@@ -172,7 +180,7 @@ async fn shutdown_ic(
172180
c.vmbus_devices.push((
173181
DeviceVtl::Vtl0,
174182
ScsiControllerHandle {
175-
instance_id: guid::Guid::new_random(),
183+
instance_id: Guid::new_random(),
176184
max_sub_channel_count: 1,
177185
devices: vec![ScsiDeviceAndPath {
178186
path: ScsiPath {
@@ -224,3 +232,115 @@ async fn shutdown_ic(
224232

225233
// TODO: add tests with guest workloads while doing servicing.
226234
// TODO: add tests from previous release branch to current.
235+
236+
/// Test servicing an OpenHCL VM from the current version to itself
237+
/// with NVMe keepalive support and a faulty controller that drops CREATE_IO_COMPLETION_QUEUE commands
238+
#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])]
239+
async fn keepalive_with_nvme_fault(
240+
config: PetriVmBuilder<OpenVmmPetriBackend>,
241+
(igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,),
242+
) -> Result<(), anyhow::Error> {
243+
const NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3");
244+
let vtl0_nvme_lun = 1;
245+
let vtl2_nsid = 37; // Pick any namespace ID as long as it doesn't conflict with other namespaces in the controller
246+
let scsi_instance = Guid::new_random();
247+
248+
if !host_supports_servicing() {
249+
tracing::info!("skipping OpenHCL servicing test on unsupported host");
250+
return Ok(());
251+
}
252+
253+
let mut fault_start_updater = CellUpdater::new(false);
254+
255+
let fault_configuration = FaultConfiguration {
256+
fault_active: fault_start_updater.cell(),
257+
admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault(
258+
nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
259+
QueueFaultBehavior::Drop,
260+
),
261+
};
262+
263+
let (mut vm, agent) = config
264+
.with_vmbus_redirect(true)
265+
.with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off") // disable sidecar until #1345 is fixed
266+
.modify_backend(move |b| {
267+
b.with_custom_config(|c| {
268+
// Add a fault controller to test the nvme controller functionality
269+
c.vpci_devices.push(VpciDeviceConfig {
270+
vtl: DeviceVtl::Vtl2,
271+
instance_id: NVME_INSTANCE,
272+
resource: NvmeFaultControllerHandle {
273+
subsystem_id: Guid::new_random(),
274+
msix_count: 10,
275+
max_io_queues: 10,
276+
namespaces: vec![NamespaceDefinition {
277+
nsid: vtl2_nsid,
278+
read_only: false,
279+
disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle {
280+
len: Some(256 * 1024),
281+
})
282+
.into_resource(),
283+
}],
284+
fault_config: fault_configuration,
285+
}
286+
.into_resource(),
287+
})
288+
})
289+
// Assign the fault controller to VTL2
290+
.with_custom_vtl2_settings(|v| {
291+
v.dynamic.as_mut().unwrap().storage_controllers.push(
292+
vtl2_settings_proto::StorageController {
293+
instance_id: scsi_instance.to_string(),
294+
protocol: vtl2_settings_proto::storage_controller::StorageProtocol::Scsi
295+
.into(),
296+
luns: vec![vtl2_settings_proto::Lun {
297+
location: vtl0_nvme_lun,
298+
device_id: Guid::new_random().to_string(),
299+
vendor_id: "OpenVMM".to_string(),
300+
product_id: "Disk".to_string(),
301+
product_revision_level: "1.0".to_string(),
302+
serial_number: "0".to_string(),
303+
model_number: "1".to_string(),
304+
physical_devices: Some(vtl2_settings_proto::PhysicalDevices {
305+
r#type: vtl2_settings_proto::physical_devices::BackingType::Single
306+
.into(),
307+
device: Some(vtl2_settings_proto::PhysicalDevice {
308+
device_type:
309+
vtl2_settings_proto::physical_device::DeviceType::Nvme
310+
.into(),
311+
device_path: NVME_INSTANCE.to_string(),
312+
sub_device_path: vtl2_nsid,
313+
}),
314+
devices: Vec::new(),
315+
}),
316+
..Default::default()
317+
}],
318+
io_queue_depth: None,
319+
},
320+
)
321+
})
322+
})
323+
.run()
324+
.await?;
325+
agent.ping().await?;
326+
let sh = agent.unix_shell();
327+
328+
// Make sure the disk showed up.
329+
cmd!(sh, "ls /dev/sda").run().await?;
330+
331+
// CREATE_IO_COMPLETION_QUEUE is blocked. This will time out without keepalive enabled.
332+
fault_start_updater.set(true).await;
333+
vm.restart_openhcl(
334+
igvm_file.clone(),
335+
OpenHclServicingFlags {
336+
enable_nvme_keepalive: true,
337+
..Default::default()
338+
},
339+
)
340+
.await?;
341+
342+
fault_start_updater.set(false).await;
343+
agent.ping().await?;
344+
345+
Ok(())
346+
}

0 commit comments

Comments
 (0)