diff --git a/vm/devices/pci/pci_core/src/capabilities/mod.rs b/vm/devices/pci/pci_core/src/capabilities/mod.rs index 3c467f236f..581753eac1 100644 --- a/vm/devices/pci/pci_core/src/capabilities/mod.rs +++ b/vm/devices/pci/pci_core/src/capabilities/mod.rs @@ -3,6 +3,8 @@ //! PCI capabilities. +pub use self::pci_express::FlrHandler; +pub use self::pci_express::PciExpressCapability; pub use self::read_only::ReadOnlyCapability; use inspect::Inspect; diff --git a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs index 769c093eeb..4aac29ecfd 100644 --- a/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs +++ b/vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs @@ -364,6 +364,7 @@ async fn test_nvme_fault_injection(driver: DefaultDriver, fault_configuration: F msix_count: MSIX_COUNT, max_io_queues: IO_QUEUE_COUNT, subsystem_id: Guid::new_random(), + flr_support: false, }, fault_configuration, ); diff --git a/vm/devices/storage/nvme_resources/src/lib.rs b/vm/devices/storage/nvme_resources/src/lib.rs index 4b7d55616b..f7053c9b18 100644 --- a/vm/devices/storage/nvme_resources/src/lib.rs +++ b/vm/devices/storage/nvme_resources/src/lib.rs @@ -43,6 +43,8 @@ pub struct NvmeFaultControllerHandle { pub max_io_queues: u16, /// The initial set of namespaces. pub namespaces: Vec, + /// Whether to enable flr support. + pub flr_support: bool, /// Configuration for the fault pub fault_config: FaultConfiguration, } diff --git a/vm/devices/storage/nvme_test/src/pci.rs b/vm/devices/storage/nvme_test/src/pci.rs index c1d4815195..3a0d09b910 100644 --- a/vm/devices/storage/nvme_test/src/pci.rs +++ b/vm/devices/storage/nvme_test/src/pci.rs @@ -34,6 +34,8 @@ use nvme_resources::fault::FaultConfiguration; use nvme_resources::fault::PciFaultBehavior; use parking_lot::Mutex; use pci_core::capabilities::msix::MsixEmulator; +use pci_core::capabilities::pci_express::FlrHandler; +use pci_core::capabilities::pci_express::PciExpressCapability; use pci_core::cfg_space_emu::BarMemoryKind; use pci_core::cfg_space_emu::ConfigSpaceType0Emulator; use pci_core::cfg_space_emu::DeviceBars; @@ -49,6 +51,32 @@ use vmcore::save_restore::SaveRestore; use vmcore::save_restore::SavedStateNotSupported; use vmcore::vm_task::VmTaskDriverSource; +/// FLR handler that signals reset requests. +#[derive(Inspect)] +struct NvmeFlrHandler { + #[inspect(skip)] + reset_requested: Arc, +} + +impl NvmeFlrHandler { + fn new() -> (Self, Arc) { + let reset_requested = Arc::new(std::sync::atomic::AtomicBool::new(false)); + ( + Self { + reset_requested: reset_requested.clone(), + }, + reset_requested, + ) + } +} + +impl FlrHandler for NvmeFlrHandler { + fn initiate_flr(&self) { + self.reset_requested + .store(true, std::sync::atomic::Ordering::SeqCst); + } +} + /// An NVMe controller. #[derive(InspectMut)] pub struct NvmeFaultController { @@ -61,7 +89,9 @@ pub struct NvmeFaultController { #[inspect(flatten, mut)] workers: NvmeWorkers, #[inspect(skip)] - fault_configuration: FaultConfiguration, + flr_reset_requested: Option>, + #[inspect(skip)] + worker_context: NvmeWorkersContext, } #[derive(Inspect)] @@ -107,6 +137,8 @@ pub struct NvmeFaultControllerCaps { /// The subsystem ID, used as part of the subnqn field of the identify /// controller response. pub subsystem_id: Guid, + /// Whether to advertise Function Level Reset (FLR) support. + pub flr_support: bool, } impl NvmeFaultController { @@ -130,6 +162,20 @@ impl NvmeFaultController { BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())), ); + // Prepare capabilities list + let mut capabilities: Vec> = + vec![Box::new(msix_cap)]; + + // Optionally add PCI Express capability with FLR support + let flr_reset_requested = if caps.flr_support { + let (flr_handler, reset_requested) = NvmeFlrHandler::new(); + let pcie_cap = PciExpressCapability::new(Some(Arc::new(flr_handler))); + capabilities.push(Box::new(pcie_cap)); + Some(reset_requested) + } else { + None + }; + let cfg_space = ConfigSpaceType0Emulator::new( HardwareIds { vendor_id: VENDOR_ID, @@ -141,7 +187,7 @@ impl NvmeFaultController { type0_sub_vendor_id: 0, type0_sub_system_id: 0, }, - vec![Box::new(msix_cap)], + capabilities, bars, ); @@ -150,16 +196,18 @@ impl NvmeFaultController { .collect(); let qe_sizes = Arc::new(Default::default()); - let admin = NvmeWorkers::new(NvmeWorkersContext { - driver_source, + let worker_context = NvmeWorkersContext { + driver_source: driver_source.clone(), mem: guest_memory, interrupts, max_sqs: caps.max_io_queues, max_cqs: caps.max_io_queues, qe_sizes: Arc::clone(&qe_sizes), subsystem_id: caps.subsystem_id, - fault_configuration: fault_configuration.clone(), - }); + fault_configuration, + }; + + let admin = NvmeWorkers::new(worker_context.clone()); Self { cfg_space, @@ -167,7 +215,8 @@ impl NvmeFaultController { registers: RegState::new(), workers: admin, qe_sizes, - fault_configuration, + flr_reset_requested, + worker_context, } } @@ -347,6 +396,7 @@ impl NvmeFaultController { if cc.en() { // If any fault was configured for cc.en() process it here match self + .worker_context .fault_configuration .pci_fault .controller_management_fault_enable @@ -446,7 +496,8 @@ impl ChangeDeviceState for NvmeFaultController { registers, qe_sizes, workers, - fault_configuration: _, + flr_reset_requested: _, + worker_context: _, } = self; workers.reset().await; cfg_space.reset(); @@ -501,7 +552,23 @@ impl PciConfigSpace for NvmeFaultController { } fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult { - self.cfg_space.write_u32(offset, value) + let result = self.cfg_space.write_u32(offset, value); + + // Check for FLR requests + if let Some(flr_requested) = &self.flr_reset_requested { + // According to the spec, FLR bit should always read 0, reset it before responding. + if flr_requested.swap(false, std::sync::atomic::Ordering::SeqCst) { + // FLR entails a state agnostic hard-reset. Instead of just resetting the controller, + // create a completely new worker backend to ensure clean state. + self.cfg_space.reset(); + self.registers = RegState::new(); + *self.qe_sizes.lock() = Default::default(); + + self.workers = NvmeWorkers::new(self.worker_context.clone()); + } + } + + result } } diff --git a/vm/devices/storage/nvme_test/src/resolver.rs b/vm/devices/storage/nvme_test/src/resolver.rs index 50c518d879..27cfc352df 100644 --- a/vm/devices/storage/nvme_test/src/resolver.rs +++ b/vm/devices/storage/nvme_test/src/resolver.rs @@ -63,6 +63,7 @@ impl AsyncResolveResource msix_count: resource.msix_count, max_io_queues: resource.max_io_queues, subsystem_id: resource.subsystem_id, + flr_support: resource.flr_support, }, resource.fault_config, ); diff --git a/vm/devices/storage/nvme_test/src/tests.rs b/vm/devices/storage/nvme_test/src/tests.rs index f0a40aba7e..eec09d4183 100644 --- a/vm/devices/storage/nvme_test/src/tests.rs +++ b/vm/devices/storage/nvme_test/src/tests.rs @@ -2,5 +2,6 @@ // Licensed under the MIT License. mod controller_tests; +mod flr_tests; mod shadow_doorbell_tests; mod test_helpers; diff --git a/vm/devices/storage/nvme_test/src/tests/controller_tests.rs b/vm/devices/storage/nvme_test/src/tests/controller_tests.rs index da17946e64..35b565e2e0 100644 --- a/vm/devices/storage/nvme_test/src/tests/controller_tests.rs +++ b/vm/devices/storage/nvme_test/src/tests/controller_tests.rs @@ -9,6 +9,7 @@ use crate::PAGE_SIZE64; use crate::command_match::CommandMatchBuilder; use crate::prp::PrpRange; use crate::spec; +use crate::tests::test_helpers::find_pci_capability; use crate::tests::test_helpers::read_completion_from_queue; use crate::tests::test_helpers::test_memory; use crate::tests::test_helpers::write_command_to_queue; @@ -51,6 +52,7 @@ fn instantiate_controller( msix_count: 64, max_io_queues: 64, subsystem_id: Guid::new_random(), + flr_support: false, // TODO: Add tests with flr support. }, fault_configuration, ); @@ -125,38 +127,23 @@ pub async fn instantiate_and_build_admin_queue( nvmec.pci_cfg_write(0x20, BAR0_LEN as u32).unwrap(); // Find the MSI-X cap struct. - let mut cfg_dword = 0; - nvmec.pci_cfg_read(0x34, &mut cfg_dword).unwrap(); - cfg_dword &= 0xff; - loop { - // Read a cap struct header and pull out the fields. - let mut cap_header = 0; - nvmec - .pci_cfg_read(cfg_dword as u16, &mut cap_header) - .unwrap(); - if cap_header & 0xff == 0x11 { - // Read the table BIR and offset. - let mut table_loc = 0; - nvmec - .pci_cfg_read(cfg_dword as u16 + 4, &mut table_loc) - .unwrap(); - // Code in other places assumes that the MSI-X table is at the beginning - // of BAR 4. If this becomes a fluid concept, capture the values - // here and use them, rather than just asserting on them. - assert_eq!(table_loc & 0x7, 4); - assert_eq!(table_loc >> 3, 0); - - // Found MSI-X, enable it. - nvmec.pci_cfg_write(cfg_dword as u16, 0x80000000).unwrap(); - break; - } - // Isolate the ptr to the next cap struct. - cfg_dword = (cap_header >> 8) & 0xff; - if cfg_dword == 0 { - // Hit the end. - panic!(); - } - } + let cfg_dword = + find_pci_capability(&mut nvmec, 0x11).expect("MSI-X capability should be present"); + + // Read the table BIR and offset. + let mut table_loc = 0; + nvmec + .pci_cfg_read(cfg_dword as u16 + 4, &mut table_loc) + .unwrap(); + + // Code in other places assumes that the MSI-X table is at the beginning + // of BAR 4. If this becomes a fluid concept, capture the values + // here and use them, rather than just asserting on them. + assert_eq!(table_loc & 0x7, 4); + assert_eq!(table_loc >> 3, 0); + + // Found MSI-X, enable it. + nvmec.pci_cfg_write(cfg_dword as u16, 0x80000000).unwrap(); // Turn on MMIO access by writing to the Command register in config space. Enable // MMIO and DMA. diff --git a/vm/devices/storage/nvme_test/src/tests/flr_tests.rs b/vm/devices/storage/nvme_test/src/tests/flr_tests.rs new file mode 100644 index 0000000000..da2e24d9e3 --- /dev/null +++ b/vm/devices/storage/nvme_test/src/tests/flr_tests.rs @@ -0,0 +1,158 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! Tests for Function Level Reset (FLR) functionality. + +use super::test_helpers::TestNvmeMmioRegistration; +use crate::NvmeFaultController; +use crate::NvmeFaultControllerCaps; +use crate::tests::test_helpers::find_pci_capability; +use chipset_device::pci::PciConfigSpace; +use guestmem::GuestMemory; +use guid::Guid; +use mesh::CellUpdater; +use nvme_resources::fault::AdminQueueFaultConfig; +use nvme_resources::fault::FaultConfiguration; +use nvme_resources::fault::PciFaultConfig; +use pal_async::DefaultDriver; +use pal_async::async_test; +use pci_core::capabilities::pci_express::PCI_EXPRESS_DEVICE_CAPS_FLR_BIT_MASK; +use pci_core::msi::MsiInterruptSet; +use pci_core::spec::caps::CapabilityId; +use pci_core::spec::caps::pci_express::PciExpressCapabilityHeader; +use vmcore::vm_task::SingleDriverBackend; +use vmcore::vm_task::VmTaskDriverSource; +use zerocopy::IntoBytes; + +fn instantiate_controller_with_flr( + driver: DefaultDriver, + gm: &GuestMemory, + flr_support: bool, +) -> NvmeFaultController { + let vm_task_driver = VmTaskDriverSource::new(SingleDriverBackend::new(driver)); + let mut msi_interrupt_set = MsiInterruptSet::new(); + let mut mmio_reg = TestNvmeMmioRegistration {}; + + NvmeFaultController::new( + &vm_task_driver, + gm.clone(), + &mut msi_interrupt_set, + &mut mmio_reg, + NvmeFaultControllerCaps { + msix_count: 64, + max_io_queues: 64, + subsystem_id: Guid::new_random(), + flr_support, + }, + FaultConfiguration { + fault_active: CellUpdater::new(false).cell(), + admin_fault: AdminQueueFaultConfig::new(), + pci_fault: PciFaultConfig::new(), + }, + ) +} + +#[async_test] +async fn test_flr_capability_advertised(driver: DefaultDriver) { + let gm = test_memory(); + let mut controller = instantiate_controller_with_flr(driver, &gm, true); + + // Find the PCI Express capability + let cap_ptr = find_pci_capability(&mut controller, CapabilityId::PCI_EXPRESS.0) + .expect("PCI Express capability should be present when FLR is enabled"); + + // Read Device Capabilities register to check FLR support + let mut device_caps = 0u32; + controller + .pci_cfg_read( + cap_ptr + PciExpressCapabilityHeader::DEVICE_CAPS.0, + &mut device_caps, + ) + .unwrap(); + + // Check Function Level Reset bit (bit 28 in Device Capabilities) + let flr_supported = (device_caps & PCI_EXPRESS_DEVICE_CAPS_FLR_BIT_MASK) != 0; + assert!( + flr_supported, + "FLR should be advertised in Device Capabilities" + ); +} + +#[async_test] +async fn test_no_flr_capability_when_disabled(driver: DefaultDriver) { + let gm = test_memory(); + let mut controller = instantiate_controller_with_flr(driver, &gm, false); + + // Find the PCI Express capability - it should not be present + let pcie_cap_offset = find_pci_capability(&mut controller, CapabilityId::PCI_EXPRESS.0); + + assert!( + pcie_cap_offset.is_none(), + "PCI Express capability should not be present when FLR is disabled" + ); +} + +#[async_test] +async fn test_flr_trigger(driver: DefaultDriver) { + let gm = test_memory(); + let mut controller = instantiate_controller_with_flr(driver, &gm, true); + + // Set the ACQ base to 0x1000 and the ASQ base to 0x2000. + let mut qword = 0x1000; + controller.write_bar0(0x30, qword.as_bytes()).unwrap(); + qword = 0x2000; + controller.write_bar0(0x28, qword.as_bytes()).unwrap(); + + // Set the queues so that they have four entries apiece. + let mut dword = 0x30003; + controller.write_bar0(0x24, dword.as_bytes()).unwrap(); + + // Enable the controller. + controller.read_bar0(0x14, dword.as_mut_bytes()).unwrap(); + dword |= 1; + controller.write_bar0(0x14, dword.as_bytes()).unwrap(); + controller.read_bar0(0x14, dword.as_mut_bytes()).unwrap(); + assert!(dword & 1 != 0); + + // Read CSTS + controller.read_bar0(0x1c, dword.as_mut_bytes()).unwrap(); + assert!(dword & 2 == 0); + + // Find the PCI Express capability + let pcie_cap_offset = find_pci_capability(&mut controller, CapabilityId::PCI_EXPRESS.0); + + let pcie_cap_offset = pcie_cap_offset.expect("PCI Express capability should be present"); + + // Read Device Control/Status register to get initial state + let device_ctl_sts_offset = pcie_cap_offset + PciExpressCapabilityHeader::DEVICE_CTL_STS.0; + let mut initial_ctl_sts = 0u32; + controller + .pci_cfg_read(device_ctl_sts_offset, &mut initial_ctl_sts) + .unwrap(); + + // Trigger FLR by setting the Initiate Function Level Reset bit (bit 15 in Device Control) + let flr_bit = 1u32 << 15; + let new_ctl_sts = initial_ctl_sts | flr_bit; + controller + .pci_cfg_write(device_ctl_sts_offset, new_ctl_sts) + .unwrap(); + + // The FLR bit should be self-clearing, so read it back to verify + let mut post_flr_ctl_sts = 0u32; + controller + .pci_cfg_read(device_ctl_sts_offset, &mut post_flr_ctl_sts) + .unwrap(); + assert_eq!( + post_flr_ctl_sts & flr_bit, + 0, + "FLR bit should be self-clearing" + ); + + // Check that the controller is disabled after FLR + controller.read_bar0(0x14, dword.as_mut_bytes()).unwrap(); + assert!(dword == 0); +} + +fn test_memory() -> GuestMemory { + GuestMemory::allocate(0x10000) +} diff --git a/vm/devices/storage/nvme_test/src/tests/test_helpers.rs b/vm/devices/storage/nvme_test/src/tests/test_helpers.rs index 60a22f7eb6..e123a50d44 100644 --- a/vm/devices/storage/nvme_test/src/tests/test_helpers.rs +++ b/vm/devices/storage/nvme_test/src/tests/test_helpers.rs @@ -3,12 +3,14 @@ //! Mock types for unit-testing various NVMe behaviors. +use crate::NvmeFaultController; use crate::PAGE_SIZE; use crate::PAGE_SIZE64; use crate::prp::PrpRange; use crate::spec; use chipset_device::mmio::ControlMmioIntercept; use chipset_device::mmio::RegisterMmioIntercept; +use chipset_device::pci::PciConfigSpace; use guestmem::GuestMemory; use parking_lot::Mutex; use pci_core::msi::MsiControl; @@ -148,3 +150,32 @@ pub fn read_completion_from_queue( gm.read_plain::(gpa).unwrap() } + +// Returns the offset for the PCI capability or None if not found. Caps the max length of the list to 100 to avoid infinite loops. +pub fn find_pci_capability(controller: &mut NvmeFaultController, cap_id: u8) -> Option { + let mut cfg_dword = 0; + controller.pci_cfg_read(0x34, &mut cfg_dword).unwrap(); // Cap_ptr is always at 0x34 + cfg_dword &= 0xff; + let mut max_caps = 100; // Limit to avoid infinite loop + loop { + if max_caps == 0 { + return None; + } + // Read a cap struct header and pull out the fields. + let mut cap_header = 0; + controller + .pci_cfg_read(cfg_dword as u16, &mut cap_header) + .unwrap(); + if cap_header & 0xff == cap_id as u32 { + break; + } + // Isolate the ptr to the next cap struct. + cfg_dword = (cap_header >> 8) & 0xff; + if cfg_dword == 0 { + return None; + } + max_caps -= 1; + } + + Some(cfg_dword as u16) +} diff --git a/vm/devices/storage/nvme_test/src/workers/coordinator.rs b/vm/devices/storage/nvme_test/src/workers/coordinator.rs index 7016857eab..db1a26d303 100644 --- a/vm/devices/storage/nvme_test/src/workers/coordinator.rs +++ b/vm/devices/storage/nvme_test/src/workers/coordinator.rs @@ -31,9 +31,10 @@ use vmcore::interrupt::Interrupt; use vmcore::vm_task::VmTaskDriver; use vmcore::vm_task::VmTaskDriverSource; +#[derive(Clone)] /// An input context for the NvmeWorkers -pub struct NvmeWorkersContext<'a> { - pub driver_source: &'a VmTaskDriverSource, +pub struct NvmeWorkersContext { + pub driver_source: VmTaskDriverSource, pub mem: GuestMemory, pub interrupts: Vec, pub max_sqs: u16, @@ -65,7 +66,7 @@ impl InspectMut for NvmeWorkers { } impl NvmeWorkers { - pub fn new(context: NvmeWorkersContext<'_>) -> Self { + pub fn new(context: NvmeWorkersContext) -> Self { let NvmeWorkersContext { driver_source, mem, @@ -86,7 +87,7 @@ impl NvmeWorkers { let handler: AdminHandler = AdminHandler::new( driver.clone(), AdminConfig { - driver_source: driver_source.clone(), + driver_source, mem, interrupts, doorbells: doorbells.clone(), diff --git a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs index 4794eb1725..adfbf3153a 100644 --- a/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs +++ b/vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs @@ -423,6 +423,7 @@ async fn create_keepalive_test_config( .into_resource(), }], fault_config: fault_configuration, + flr_support: false, } .into_resource(), })