Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions vm/devices/storage/disk_nvme/nvme_driver/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use mesh::CellUpdater;
use nvme::NvmeControllerCaps;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use nvme_resources::fault::QueueFaultBehavior;
use nvme_spec::AdminOpcode;
use nvme_spec::Cap;
Expand Down Expand Up @@ -50,6 +51,7 @@ async fn test_nvme_command_fault(driver: DefaultDriver) {
AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
QueueFaultBehavior::Update(output_cmd),
),
pci_fault: PciFaultConfig::new(),
},
)
.await;
Expand Down
33 changes: 33 additions & 0 deletions vm/devices/storage/nvme_resources/src/fault.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,22 @@ pub enum QueueFaultBehavior<T> {
Panic(String),
}

#[derive(Clone, MeshPayload)]
/// Supported fault behaviour for PCI faults
pub enum PciFaultBehavior {
/// Introduce a delay to the PCI operation
Delay(Duration),
/// Do nothing
Default,
}

#[derive(MeshPayload, Clone)]
/// A buildable fault configuration for the controller management interface (cc.en(), csts.rdy(), ... )
pub struct PciFaultConfig {
/// Fault to apply to cc.en() bit during enablement
pub controller_management_fault_enable: PciFaultBehavior,
}

#[derive(MeshPayload, Clone)]
/// A buildable fault configuration
pub struct AdminQueueFaultConfig {
Expand All @@ -37,6 +53,23 @@ pub struct FaultConfiguration {
pub fault_active: Cell<bool>,
/// Fault to apply to the admin queues
pub admin_fault: AdminQueueFaultConfig,
/// Fault to apply to management layer of the controller
pub pci_fault: PciFaultConfig,
}

impl PciFaultConfig {
/// Create a new no-op fault configuration
pub fn new() -> Self {
Self {
controller_management_fault_enable: PciFaultBehavior::Default,
}
}

/// Create a new fault configuration
pub fn with_cc_enable_fault(mut self, behaviour: PciFaultBehavior) -> Self {
self.controller_management_fault_enable = behaviour;
self
}
}

impl AdminQueueFaultConfig {
Expand Down
19 changes: 18 additions & 1 deletion vm/devices/storage/nvme_test/src/pci.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use guid::Guid;
use inspect::Inspect;
use inspect::InspectMut;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultBehavior;
use parking_lot::Mutex;
use pci_core::capabilities::msix::MsixEmulator;
use pci_core::cfg_space_emu::BarMemoryKind;
Expand Down Expand Up @@ -59,6 +60,8 @@ pub struct NvmeFaultController {
qe_sizes: Arc<Mutex<IoQueueEntrySizes>>,
#[inspect(flatten, mut)]
workers: NvmeWorkers,
#[inspect(skip)]
fault_configuration: FaultConfiguration,
}

#[derive(Inspect)]
Expand Down Expand Up @@ -155,7 +158,7 @@ impl NvmeFaultController {
max_cqs: caps.max_io_queues,
qe_sizes: Arc::clone(&qe_sizes),
subsystem_id: caps.subsystem_id,
fault_configuration,
fault_configuration: fault_configuration.clone(),
});

Self {
Expand All @@ -164,6 +167,7 @@ impl NvmeFaultController {
registers: RegState::new(),
workers: admin,
qe_sizes,
fault_configuration,
}
}

Expand Down Expand Up @@ -341,6 +345,18 @@ impl NvmeFaultController {

if cc.en() != self.registers.cc.en() {
if cc.en() {
// If any fault was configured for cc.en() process it here
match self
.fault_configuration
.pci_fault
.controller_management_fault_enable
{
PciFaultBehavior::Delay(duration) => {
std::thread::sleep(duration);
}
PciFaultBehavior::Default => {}
}

// Some drivers will write zeros to IOSQES and IOCQES, assuming that the defaults will work.
if cc.iocqes() == 0 {
cc.set_iocqes(IOCQES);
Expand Down Expand Up @@ -430,6 +446,7 @@ impl ChangeDeviceState for NvmeFaultController {
registers,
qe_sizes,
workers,
fault_configuration: _,
} = self;
workers.reset().await;
cfg_space.reset();
Expand Down
7 changes: 7 additions & 0 deletions vm/devices/storage/nvme_test/src/tests/controller_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use guid::Guid;
use mesh::CellUpdater;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use nvme_resources::fault::QueueFaultBehavior;
use nvme_spec::Command;
use nvme_spec::Completion;
Expand Down Expand Up @@ -208,6 +209,7 @@ async fn test_basic_registers(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
let mut dword = 0u32;
Expand Down Expand Up @@ -236,6 +238,7 @@ async fn test_invalid_configuration(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);
let mut dword = 0u32;
Expand All @@ -254,6 +257,7 @@ async fn test_enable_controller(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);

Expand Down Expand Up @@ -285,6 +289,7 @@ async fn test_multi_page_admin_queues(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let mut nvmec = instantiate_controller(driver, &gm, None, fault_configuration);

Expand Down Expand Up @@ -359,6 +364,7 @@ async fn test_send_identify_no_fault(driver: DefaultDriver) {
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
};
let cqe = send_identify(driver, fault_configuration).await;

Expand All @@ -376,6 +382,7 @@ async fn test_send_identify_with_sq_fault(driver: DefaultDriver) {
nvme_spec::AdminOpcode::IDENTIFY.0,
QueueFaultBehavior::Update(faulty_identify),
),
pci_fault: PciFaultConfig::new(),
};
let cqe = send_identify(driver, fault_configuration).await;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use guestmem::GuestMemory;
use mesh::CellUpdater;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use pal_async::DefaultDriver;
use pal_async::async_test;
use pci_core::test_helpers::TestPciInterruptController;
Expand Down Expand Up @@ -42,6 +43,7 @@ async fn setup_shadow_doorbells(
let fault_configuration = FaultConfiguration {
fault_active: CellUpdater::new(false).cell(),
admin_fault: AdminQueueFaultConfig::new(),
pci_fault: PciFaultConfig::new(),
}; // Build a controller with 64 entries in the admin queue (just so that the ASQ fits in one page).
let mut nvmec = instantiate_and_build_admin_queue(
cq_buf,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use nvme_resources::NamespaceDefinition;
use nvme_resources::NvmeFaultControllerHandle;
use nvme_resources::fault::AdminQueueFaultConfig;
use nvme_resources::fault::FaultConfiguration;
use nvme_resources::fault::PciFaultConfig;
use nvme_resources::fault::QueueFaultBehavior;
use petri::OpenHclServicingFlags;
use petri::PetriVmBuilder;
Expand Down Expand Up @@ -256,8 +257,9 @@ async fn keepalive_with_nvme_fault(
fault_active: fault_start_updater.cell(),
admin_fault: AdminQueueFaultConfig::new().with_submission_queue_fault(
nvme_spec::AdminOpcode::CREATE_IO_COMPLETION_QUEUE.0,
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. This should never happen.".to_string()),
QueueFaultBehavior::Panic("Received a CREATE_IO_COMPLETION_QUEUE command during servicing with keepalive enabled. THERE IS A BUG SOMEWHERE.".to_string()),
),
pci_fault: PciFaultConfig::new(),
};

let (mut vm, agent) = config
Expand Down
Loading