Skip to content

Commit 87cfe46

Browse files
committed
nvme_test: cc.enable() delay capability during servicing for the NvmeFaultController (microsoft#1922)
This PR will add a fault functionality while changing the cc enable bit of the nvme fault controller
1 parent 4a853b3 commit 87cfe46

File tree

6 files changed

+60
-1
lines changed

6 files changed

+60
-1
lines changed

vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ use mesh::CellUpdater;
1212
use nvme::NvmeControllerCaps;
1313
use nvme_resources::fault::AdminQueueFaultConfig;
1414
use nvme_resources::fault::FaultConfiguration;
15+
use nvme_resources::fault::PciFaultConfig;
1516
use nvme_resources::fault::QueueFaultBehavior;
1617
use nvme_spec::AdminOpcode;
1718
use nvme_spec::Cap;
@@ -50,6 +51,7 @@ async fn test_nvme_command_fault(driver: DefaultDriver) {
5051
AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
5152
QueueFaultBehavior::Update(output_cmd),
5253
),
54+
pci_fault: PciFaultConfig::new(),
5355
},
5456
)
5557
.await;

vm/devices/storage/nvme_resources/src/fault.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,22 @@ pub enum QueueFaultBehavior<T> {
2323
Panic(String),
2424
}
2525

26+
#[derive(Clone, MeshPayload)]
27+
/// Supported fault behaviour for PCI faults
28+
pub enum PciFaultBehavior {
29+
/// Introduce a delay to the PCI operation
30+
Delay(Duration),
31+
/// Do nothing
32+
Default,
33+
}
34+
35+
#[derive(MeshPayload, Clone)]
36+
/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... )
37+
pub struct PciFaultConfig {
38+
/// Fault to apply to cc.en() bit during enablement
39+
pub controller_management_fault_enable: PciFaultBehavior,
40+
}
41+
2642
#[derive(MeshPayload, Clone)]
2743
/// A buildable fault configuration
2844
pub struct AdminQueueFaultConfig {
@@ -37,6 +53,23 @@ pub struct FaultConfiguration {
3753
pub fault_active: Cell<bool>,
3854
/// Fault to apply to the admin queues
3955
pub admin_fault: AdminQueueFaultConfig,
56+
/// Fault to apply to management layer of the controller
57+
pub pci_fault: PciFaultConfig,
58+
}
59+
60+
impl PciFaultConfig {
61+
/// Create a new no-op fault configuration
62+
pub fn new() -> Self {
63+
Self {
64+
controller_management_fault_enable: PciFaultBehavior::Default,
65+
}
66+
}
67+
68+
/// Add a cc.en() fault
69+
pub fn with_cc_enable_fault(mut self, behaviour: PciFaultBehavior) -> Self {
70+
self.controller_management_fault_enable = behaviour;
71+
self
72+
}
4073
}
4174

4275
impl AdminQueueFaultConfig {

vm/devices/storage/nvme_test/src/pci.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ use guid::Guid;
3131
use inspect::Inspect;
3232
use inspect::InspectMut;
3333
use nvme_resources::fault::FaultConfiguration;
34+
use nvme_resources::fault::PciFaultBehavior;
3435
use parking_lot::Mutex;
3536
use pci_core::capabilities::msix::MsixEmulator;
3637
use pci_core::capabilities::pci_express::FlrHandler;
@@ -394,6 +395,18 @@ impl NvmeFaultController {
394395

395396
if cc.en() != self.registers.cc.en() {
396397
if cc.en() {
398+
// If any fault was configured for cc.en() process it here
399+
match self
400+
.fault_configuration
401+
.pci_fault
402+
.controller_management_fault_enable
403+
{
404+
PciFaultBehavior::Delay(duration) => {
405+
std::thread::sleep(duration);
406+
}
407+
PciFaultBehavior::Default => {}
408+
}
409+
397410
// Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work.
398411
if cc.iocqes() == 0 {
399412
cc.set_iocqes(IOCQES);

vm/devices/storage/nvme_test/src/tests/controller_tests.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use guid::Guid;
1919
use mesh::CellUpdater;
2020
use nvme_resources::fault::AdminQueueFaultConfig;
2121
use nvme_resources::fault::FaultConfiguration;
22+
use nvme_resources::fault::PciFaultConfig;
2223
use nvme_resources::fault::QueueFaultBehavior;
2324
use nvme_spec::Command;
2425
use nvme_spec::Completion;
@@ -195,6 +196,7 @@ async fn test_basic_registers(driver: DefaultDriver) {
195196
let fault_configuration = FaultConfiguration {
196197
fault_active: CellUpdater::new(false).cell(),
197198
admin_fault: AdminQueueFaultConfig::new(),
199+
pci_fault: PciFaultConfig::new(),
198200
};
199201
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
200202
let mut dword = 0u32;
@@ -223,6 +225,7 @@ async fn test_invalid_configuration(driver: DefaultDriver) {
223225
let fault_configuration = FaultConfiguration {
224226
fault_active: CellUpdater::new(false).cell(),
225227
admin_fault: AdminQueueFaultConfig::new(),
228+
pci_fault: PciFaultConfig::new(),
226229
};
227230
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
228231
let mut dword = 0u32;
@@ -241,6 +244,7 @@ async fn test_enable_controller(driver: DefaultDriver) {
241244
let fault_configuration = FaultConfiguration {
242245
fault_active: CellUpdater::new(false).cell(),
243246
admin_fault: AdminQueueFaultConfig::new(),
247+
pci_fault: PciFaultConfig::new(),
244248
};
245249
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
246250

@@ -272,6 +276,7 @@ async fn test_multi_page_admin_queues(driver: DefaultDriver) {
272276
let fault_configuration = FaultConfiguration {
273277
fault_active: CellUpdater::new(false).cell(),
274278
admin_fault: AdminQueueFaultConfig::new(),
279+
pci_fault: PciFaultConfig::new(),
275280
};
276281
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
277282

@@ -346,6 +351,7 @@ async fn test_send_identify_no_fault(driver: DefaultDriver) {
346351
let fault_configuration = FaultConfiguration {
347352
fault_active: CellUpdater::new(false).cell(),
348353
admin_fault: AdminQueueFaultConfig::new(),
354+
pci_fault: PciFaultConfig::new(),
349355
};
350356
let cqe = send_identify(driver, fault_configuration).await;
351357

@@ -363,6 +369,7 @@ async fn test_send_identify_with_sq_fault(driver: DefaultDriver) {
363369
nvme_spec::AdminOpcode::IDENTIFY.0,
364370
QueueFaultBehavior::Update(faulty_identify),
365371
),
372+
pci_fault: PciFaultConfig::new(),
366373
};
367374
let cqe = send_identify(driver, fault_configuration).await;
368375

vm/devices/storage/nvme_test/src/tests/shadow_doorbell_tests.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use guestmem::GuestMemory;
1515
use mesh::CellUpdater;
1616
use nvme_resources::fault::AdminQueueFaultConfig;
1717
use nvme_resources::fault::FaultConfiguration;
18+
use nvme_resources::fault::PciFaultConfig;
1819
use pal_async::DefaultDriver;
1920
use pal_async::async_test;
2021
use pci_core::test_helpers::TestPciInterruptController;
@@ -42,6 +43,7 @@ async fn setup_shadow_doorbells(
4243
let fault_configuration = FaultConfiguration {
4344
fault_active: CellUpdater::new(false).cell(),
4445
admin_fault: AdminQueueFaultConfig::new(),
46+
pci_fault: PciFaultConfig::new(),
4547
}; // Build a controller with 64 entries in the admin queue (just so that the ASQ fits in one page).
4648
let mut nvmec = instantiate_and_build_admin_queue(
4749
cq_buf,

vmm_tests/vmm_tests/tests/tests/multiarch/openhcl_servicing.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use nvme_resources::NamespaceDefinition;
1616
use nvme_resources::NvmeFaultControllerHandle;
1717
use nvme_resources::fault::AdminQueueFaultConfig;
1818
use nvme_resources::fault::FaultConfiguration;
19+
use nvme_resources::fault::PciFaultConfig;
1920
use nvme_resources::fault::QueueFaultBehavior;
2021
use petri::OpenHclServicingFlags;
2122
use petri::PetriVmBuilder;
@@ -255,8 +256,9 @@ async fn keepalive_with_nvme_fault(
255256
fault_active: fault_start_updater.cell(),
256257
admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault(
257258
nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
258-
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. This should never happen.".to_string()),
259+
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. THERE IS A BUG SOMEWHERE.".to_string()),
259260
),
261+
pci_fault: PciFaultConfig::new(),
260262
};
261263

262264
let (mut vm, agent) = config

0 commit comments

Comments
 (0)