|
8 | 8 |
|
9 | 9 | use disk_backend_resources::LayeredDiskHandle;
|
10 | 10 | use disk_backend_resources::layer::RamDiskLayerHandle;
|
| 11 | +use guid::Guid; |
11 | 12 | use hvlite_defs::config::DeviceVtl;
|
| 13 | +use hvlite_defs::config::VpciDeviceConfig; |
| 14 | +use mesh::CellUpdater; |
| 15 | +use nvme_resources::NamespaceDefinition; |
| 16 | +use nvme_resources::NvmeFaultControllerHandle; |
| 17 | +use nvme_resources::fault::AdminQueueFaultConfig; |
| 18 | +use nvme_resources::fault::FaultConfiguration; |
| 19 | +use nvme_resources::fault::QueueFaultBehavior; |
12 | 20 | use petri::OpenHclServicingFlags;
|
13 | 21 | use petri::PetriVmBuilder;
|
14 | 22 | use petri::PetriVmmBackend;
|
@@ -172,7 +180,7 @@ async fn shutdown_ic(
|
172 | 180 | c.vmbus_devices.push((
|
173 | 181 | DeviceVtl::Vtl0,
|
174 | 182 | ScsiControllerHandle {
|
175 |
| - instance_id: guid::Guid::new_random(), |
| 183 | + instance_id: Guid::new_random(), |
176 | 184 | max_sub_channel_count: 1,
|
177 | 185 | devices: vec![ScsiDeviceAndPath {
|
178 | 186 | path: ScsiPath {
|
@@ -224,3 +232,115 @@ async fn shutdown_ic(
|
224 | 232 |
|
225 | 233 | // TODO: add tests with guest workloads while doing servicing.
|
226 | 234 | // TODO: add tests from previous release branch to current.
|
| 235 | + |
| 236 | +/// Test servicing an OpenHCL VM from the current version to itself |
| 237 | +/// with NVMe keepalive support and a faulty controller that drops CREATE_IO_COMPLETION_QUEUE commands |
| 238 | +#[openvmm_test(openhcl_linux_direct_x64 [LATEST_LINUX_DIRECT_TEST_X64])] |
| 239 | +async fn keepalive_with_nvme_fault( |
| 240 | + config: PetriVmBuilder<OpenVmmPetriBackend>, |
| 241 | + (igvm_file,): (ResolvedArtifact<impl petri_artifacts_common::tags::IsOpenhclIgvm>,), |
| 242 | +) -> Result<(), anyhow::Error> { |
| 243 | + const NVME_INSTANCE: Guid = guid::guid!("dce4ebad-182f-46c0-8d30-8446c1c62ab3"); |
| 244 | + let vtl0_nvme_lun = 1; |
| 245 | + let vtl2_nsid = 37; // Pick any namespace ID as long as it doesn't conflict with other namespaces in the controller |
| 246 | + let scsi_instance = Guid::new_random(); |
| 247 | + |
| 248 | + if !host_supports_servicing() { |
| 249 | + tracing::info!("skipping OpenHCL servicing test on unsupported host"); |
| 250 | + return Ok(()); |
| 251 | + } |
| 252 | + |
| 253 | + let mut fault_start_updater = CellUpdater::new(false); |
| 254 | + |
| 255 | + let fault_configuration = FaultConfiguration { |
| 256 | + fault_active: fault_start_updater.cell(), |
| 257 | + admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault( |
| 258 | + nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0, |
| 259 | + QueueFaultBehavior::Drop, |
| 260 | + ), |
| 261 | + }; |
| 262 | + |
| 263 | + let (mut vm, agent) = config |
| 264 | + .with_vmbus_redirect(true) |
| 265 | + .with_openhcl_command_line("OPENHCL_ENABLE_VTL2_GPA_POOL=512 OPENHCL_SIDECAR=off") // disable sidecar until #1345 is fixed |
| 266 | + .modify_backend(move |b| { |
| 267 | + b.with_custom_config(|c| { |
| 268 | + // Add a fault controller to test the nvme controller functionality |
| 269 | + c.vpci_devices.push(VpciDeviceConfig { |
| 270 | + vtl: DeviceVtl::Vtl2, |
| 271 | + instance_id: NVME_INSTANCE, |
| 272 | + resource: NvmeFaultControllerHandle { |
| 273 | + subsystem_id: Guid::new_random(), |
| 274 | + msix_count: 10, |
| 275 | + max_io_queues: 10, |
| 276 | + namespaces: vec![NamespaceDefinition { |
| 277 | + nsid: vtl2_nsid, |
| 278 | + read_only: false, |
| 279 | + disk: LayeredDiskHandle::single_layer(RamDiskLayerHandle { |
| 280 | + len: Some(256 * 1024), |
| 281 | + }) |
| 282 | + .into_resource(), |
| 283 | + }], |
| 284 | + fault_config: fault_configuration, |
| 285 | + } |
| 286 | + .into_resource(), |
| 287 | + }) |
| 288 | + }) |
| 289 | + // Assign the fault controller to VTL2 |
| 290 | + .with_custom_vtl2_settings(|v| { |
| 291 | + v.dynamic.as_mut().unwrap().storage_controllers.push( |
| 292 | + vtl2_settings_proto::StorageController { |
| 293 | + instance_id: scsi_instance.to_string(), |
| 294 | + protocol: vtl2_settings_proto::storage_controller::StorageProtocol::Scsi |
| 295 | + .into(), |
| 296 | + luns: vec![vtl2_settings_proto::Lun { |
| 297 | + location: vtl0_nvme_lun, |
| 298 | + device_id: Guid::new_random().to_string(), |
| 299 | + vendor_id: "OpenVMM".to_string(), |
| 300 | + product_id: "Disk".to_string(), |
| 301 | + product_revision_level: "1.0".to_string(), |
| 302 | + serial_number: "0".to_string(), |
| 303 | + model_number: "1".to_string(), |
| 304 | + physical_devices: Some(vtl2_settings_proto::PhysicalDevices { |
| 305 | + r#type: vtl2_settings_proto::physical_devices::BackingType::Single |
| 306 | + .into(), |
| 307 | + device: Some(vtl2_settings_proto::PhysicalDevice { |
| 308 | + device_type: |
| 309 | + vtl2_settings_proto::physical_device::DeviceType::Nvme |
| 310 | + .into(), |
| 311 | + device_path: NVME_INSTANCE.to_string(), |
| 312 | + sub_device_path: vtl2_nsid, |
| 313 | + }), |
| 314 | + devices: Vec::new(), |
| 315 | + }), |
| 316 | + ..Default::default() |
| 317 | + }], |
| 318 | + io_queue_depth: None, |
| 319 | + }, |
| 320 | + ) |
| 321 | + }) |
| 322 | + }) |
| 323 | + .run() |
| 324 | + .await?; |
| 325 | + agent.ping().await?; |
| 326 | + let sh = agent.unix_shell(); |
| 327 | + |
| 328 | + // Make sure the disk showed up. |
| 329 | + cmd!(sh, "ls /dev/sda").run().await?; |
| 330 | + |
| 331 | + // CREATE_IO_COMPLETION_QUEUE is blocked. This will time out without keepalive enabled. |
| 332 | + fault_start_updater.set(true).await; |
| 333 | + vm.restart_openhcl( |
| 334 | + igvm_file.clone(), |
| 335 | + OpenHclServicingFlags { |
| 336 | + enable_nvme_keepalive: true, |
| 337 | + ..Default::default() |
| 338 | + }, |
| 339 | + ) |
| 340 | + .await?; |
| 341 | + |
| 342 | + fault_start_updater.set(false).await; |
| 343 | + agent.ping().await?; |
| 344 | + |
| 345 | + Ok(()) |
| 346 | +} |
0 commit comments