Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
7b21eec
Ran xtask fmt fix
gurasinghMS Aug 22, 2025
b0b7109
Fixing build issues
gurasinghMS Aug 22, 2025
3c02e17
Initial plan
Copilot Jul 18, 2025
3c19bec
Add Function Level Reset (FLR) support to NVMe emulator
Copilot Jul 18, 2025
1cca506
Initial plan
Copilot Jul 18, 2025
14a5a7a
Initial plan
Copilot Jul 18, 2025
530058e
Initial plan
Copilot Jul 18, 2025
5132157
Moved the flr logic to the nvme fault controller instead of the actua…
gurasinghMS Aug 13, 2025
d13c160
Moving FLR logic to happen after a CFG write
gurasinghMS Aug 13, 2025
1ce07fd
Rebasing to the Fault builder model style of fault injection
gurasinghMS Aug 23, 2025
ba5fe14
Added the find_pci_capability method as a test helper instead of it b…
gurasinghMS Aug 14, 2025
02a2b7e
Fixing build issues
gurasinghMS Aug 14, 2025
e5bb73b
Small changes based on comments. Using builder notation instead of ma…
gurasinghMS Aug 19, 2025
4f57f03
Pausing work
gurasinghMS Aug 22, 2025
8ece4b3
Pausing work again. This will be completed after the Fault Configurat…
gurasinghMS Aug 22, 2025
b908990
Fixing build issues
gurasinghMS Aug 23, 2025
d82fd13
Responding to comments and build errors
gurasinghMS Aug 24, 2025
2ec8554
Finished the merge and related changes. Back up to speed with the mai…
gurasinghMS Aug 25, 2025
eff125f
nvme_test: Add FaultConfiguration to the fault controller handler (#1…
gurasinghMS Aug 26, 2025
40ddc20
nvme_test: cc.enable() delay capability during servicing for the Nvme…
gurasinghMS Aug 29, 2025
e7c5db7
nvme_test: add better command matching when injecting faults in the n…
gurasinghMS Sep 4, 2025
2a24d9d
Squashed commits
gurasinghMS Sep 9, 2025
8f273a1
Remove unecessary changes for FLR support
gurasinghMS Sep 9, 2025
a6a27a8
Unused imports removed
gurasinghMS Sep 9, 2025
5492066
Merge branch 'main' into advertise_flr_support
gurasinghMS Sep 9, 2025
574b991
Fixing clippy issues
gurasinghMS Sep 10, 2025
56065d0
Merge branch 'main' into advertise_flr_support
gurasinghMS Sep 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vm/devices/pci/pci_core/src/capabilities/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

//! PCI capabilities.

pub use self::pci_express::FlrHandler;
pub use self::pci_express::PciExpressCapability;
pub use self::read_only::ReadOnlyCapability;

use inspect::Inspect;
Expand Down
1 change: 1 addition & 0 deletions vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ async fn test_nvme_fault_injection(driver: DefaultDriver, fault_configuration: F
msix_count: MSIX_COUNT,
max_io_queues: IO_QUEUE_COUNT,
subsystem_id: Guid::new_random(),
flr_support: false,
},
fault_configuration,
);
Expand Down
2 changes: 2 additions & 0 deletions vm/devices/storage/nvme_resources/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ pub struct NvmeFaultControllerHandle {
pub max_io_queues: u16,
/// The initial set of namespaces.
pub namespaces: Vec<NamespaceDefinition>,
/// Whether to enable flr support.
pub flr_support: bool,
/// Configuration for the fault
pub fault_config: FaultConfiguration,
}
Expand Down
85 changes: 76 additions & 9 deletions vm/devices/storage/nvme_test/src/pci.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultBehavior;
use parking_lot::Mutex;
use pci_core::capabilities::msix::MsixEmulator;
use pci_core::capabilities::pci_express::FlrHandler;
use pci_core::capabilities::pci_express::PciExpressCapability;
use pci_core::cfg_space_emu::BarMemoryKind;
use pci_core::cfg_space_emu::ConfigSpaceType0Emulator;
use pci_core::cfg_space_emu::DeviceBars;
Expand All @@ -49,6 +51,32 @@ use vmcore::save_restore::SaveRestore;
use vmcore::save_restore::SavedStateNotSupported;
use vmcore::vm_task::VmTaskDriverSource;

/// FLR handler that signals reset requests.
#[derive(Inspect)]
struct NvmeFlrHandler {
#[inspect(skip)]
reset_requested: Arc<std::sync::atomic::AtomicBool>,
}

impl NvmeFlrHandler {
fn new() -> (Self, Arc<std::sync::atomic::AtomicBool>) {
let reset_requested = Arc::new(std::sync::atomic::AtomicBool::new(false));
(
Self {
reset_requested: reset_requested.clone(),
},
reset_requested,
)
}
}

impl FlrHandler for NvmeFlrHandler {
fn initiate_flr(&self) {
self.reset_requested
.store(true, std::sync::atomic::Ordering::SeqCst);
}
}

/// An NVMe controller.
#[derive(InspectMut)]
pub struct NvmeFaultController {
Expand All @@ -61,7 +89,9 @@ pub struct NvmeFaultController {
#[inspect(flatten, mut)]
workers: NvmeWorkers,
#[inspect(skip)]
fault_configuration: FaultConfiguration,
flr_reset_requested: Option<Arc<std::sync::atomic::AtomicBool>>,
#[inspect(skip)]
worker_context: NvmeWorkersContext,
}

#[derive(Inspect)]
Expand Down Expand Up @@ -107,6 +137,8 @@ pub struct NvmeFaultControllerCaps {
/// The subsystem ID, used as part of the subnqn field of the identify
/// controller response.
pub subsystem_id: Guid,
/// Whether to advertise Function Level Reset (FLR) support.
pub flr_support: bool,
}

impl NvmeFaultController {
Expand All @@ -130,6 +162,20 @@ impl NvmeFaultController {
BarMemoryKind::Intercept(register_mmio.new_io_region("msix", msix.bar_len())),
);

// Prepare capabilities list
let mut capabilities: Vec<Box<dyn pci_core::capabilities::PciCapability>> =
vec![Box::new(msix_cap)];

// Optionally add PCI Express capability with FLR support
let flr_reset_requested = if caps.flr_support {
let (flr_handler, reset_requested) = NvmeFlrHandler::new();
let pcie_cap = PciExpressCapability::new(Some(Arc::new(flr_handler)));
capabilities.push(Box::new(pcie_cap));
Some(reset_requested)
} else {
None
};

let cfg_space = ConfigSpaceType0Emulator::new(
HardwareIds {
vendor_id: VENDOR_ID,
Expand All @@ -141,7 +187,7 @@ impl NvmeFaultController {
type0_sub_vendor_id: 0,
type0_sub_system_id: 0,
},
vec![Box::new(msix_cap)],
capabilities,
bars,
);

Expand All @@ -150,24 +196,27 @@ impl NvmeFaultController {
.collect();

let qe_sizes = Arc::new(Default::default());
let admin = NvmeWorkers::new(NvmeWorkersContext {
driver_source,
let worker_context = NvmeWorkersContext {
driver_source: driver_source.clone(),
mem: guest_memory,
interrupts,
max_sqs: caps.max_io_queues,
max_cqs: caps.max_io_queues,
qe_sizes: Arc::clone(&qe_sizes),
subsystem_id: caps.subsystem_id,
fault_configuration: fault_configuration.clone(),
});
fault_configuration,
};

let admin = NvmeWorkers::new(worker_context.clone());

Self {
cfg_space,
msix,
registers: RegState::new(),
workers: admin,
qe_sizes,
fault_configuration,
flr_reset_requested,
worker_context,
}
}

Expand Down Expand Up @@ -347,6 +396,7 @@ impl NvmeFaultController {
if cc.en() {
// If any fault was configured for cc.en() process it here
match self
.worker_context
.fault_configuration
.pci_fault
.controller_management_fault_enable
Expand Down Expand Up @@ -446,7 +496,8 @@ impl ChangeDeviceState for NvmeFaultController {
registers,
qe_sizes,
workers,
fault_configuration: _,
flr_reset_requested: _,
worker_context: _,
} = self;
workers.reset().await;
cfg_space.reset();
Expand Down Expand Up @@ -501,7 +552,23 @@ impl PciConfigSpace for NvmeFaultController {
}

fn pci_cfg_write(&mut self, offset: u16, value: u32) -> IoResult {
self.cfg_space.write_u32(offset, value)
let result = self.cfg_space.write_u32(offset, value);

// Check for FLR requests
if let Some(flr_requested) = &self.flr_reset_requested {
// According to the spec, FLR bit should always read 0, reset it before responding.
if flr_requested.swap(false, std::sync::atomic::Ordering::SeqCst) {
// FLR entails a state agnostic hard-reset. Instead of just resetting the controller,
// create a completely new worker backend to ensure clean state.
self.cfg_space.reset();
self.registers = RegState::new();
*self.qe_sizes.lock() = Default::default();

self.workers = NvmeWorkers::new(self.worker_context.clone());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of here, can we process the reset from the NvmeFlrHandler::initiate_flr function? It seems like the point of this FlrHandler stuff is to avoid the need to post-process the config space write directly. Instead, you just get a callback when you need to do something

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not know whether that's actually possible, it just seems weird to circumvent that intent to post-process the config space write anyway

}
}

result
}
}

Expand Down
1 change: 1 addition & 0 deletions vm/devices/storage/nvme_test/src/resolver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ impl AsyncResolveResource<PciDeviceHandleKind, NvmeFaultControllerHandle>
msix_count: resource.msix_count,
max_io_queues: resource.max_io_queues,
subsystem_id: resource.subsystem_id,
flr_support: resource.flr_support,
},
resource.fault_config,
);
Expand Down
1 change: 1 addition & 0 deletions vm/devices/storage/nvme_test/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
// Licensed under the MIT License.

mod controller_tests;
mod flr_tests;
mod shadow_doorbell_tests;
mod test_helpers;
51 changes: 19 additions & 32 deletions vm/devices/storage/nvme_test/src/tests/controller_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use crate::PAGE_SIZE64;
use crate::command_match::CommandMatchBuilder;
use crate::prp::PrpRange;
use crate::spec;
use crate::tests::test_helpers::find_pci_capability;
use crate::tests::test_helpers::read_completion_from_queue;
use crate::tests::test_helpers::test_memory;
use crate::tests::test_helpers::write_command_to_queue;
Expand Down Expand Up @@ -51,6 +52,7 @@ fn instantiate_controller(
msix_count: 64,
max_io_queues: 64,
subsystem_id: Guid::new_random(),
flr_support: false, // TODO: Add tests with flr support.
},
fault_configuration,
);
Expand Down Expand Up @@ -125,38 +127,23 @@ pub async fn instantiate_and_build_admin_queue(
nvmec.pci_cfg_write(0x20, BAR0_LEN as u32).unwrap();

// Find the MSI-X cap struct.
let mut cfg_dword = 0;
nvmec.pci_cfg_read(0x34, &mut cfg_dword).unwrap();
cfg_dword &= 0xff;
loop {
// Read a cap struct header and pull out the fields.
let mut cap_header = 0;
nvmec
.pci_cfg_read(cfg_dword as u16, &mut cap_header)
.unwrap();
if cap_header & 0xff == 0x11 {
// Read the table BIR and offset.
let mut table_loc = 0;
nvmec
.pci_cfg_read(cfg_dword as u16 + 4, &mut table_loc)
.unwrap();
// Code in other places assumes that the MSI-X table is at the beginning
// of BAR 4. If this becomes a fluid concept, capture the values
// here and use them, rather than just asserting on them.
assert_eq!(table_loc & 0x7, 4);
assert_eq!(table_loc >> 3, 0);

// Found MSI-X, enable it.
nvmec.pci_cfg_write(cfg_dword as u16, 0x80000000).unwrap();
break;
}
// Isolate the ptr to the next cap struct.
cfg_dword = (cap_header >> 8) & 0xff;
if cfg_dword == 0 {
// Hit the end.
panic!();
}
}
let cfg_dword =
find_pci_capability(&mut nvmec, 0x11).expect("MSI-X capability should be present");

// Read the table BIR and offset.
let mut table_loc = 0;
nvmec
.pci_cfg_read(cfg_dword as u16 + 4, &mut table_loc)
.unwrap();

// Code in other places assumes that the MSI-X table is at the beginning
// of BAR 4. If this becomes a fluid concept, capture the values
// here and use them, rather than just asserting on them.
assert_eq!(table_loc & 0x7, 4);
assert_eq!(table_loc >> 3, 0);

// Found MSI-X, enable it.
nvmec.pci_cfg_write(cfg_dword as u16, 0x80000000).unwrap();

// Turn on MMIO access by writing to the Command register in config space. Enable
// MMIO and DMA.
Expand Down
Loading
Loading