diff --git a/Cargo.lock b/Cargo.lock index 339ab721674..899cdb112b9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1053,6 +1053,7 @@ dependencies = [ "vm-allocator", "vm-device", "vm-memory", + "vmm-sys-util", ] [[package]] diff --git a/src/pci/Cargo.toml b/src/pci/Cargo.toml index a7ef102acfb..17dc30fcd6d 100644 --- a/src/pci/Cargo.toml +++ b/src/pci/Cargo.toml @@ -27,3 +27,4 @@ vm-memory = { version = "0.16.1", features = [ [dev-dependencies] serde_test = "1.0.177" +vmm-sys-util = "0.14.0" diff --git a/src/pci/src/bus.rs b/src/pci/src/bus.rs index eaa2923e8db..01c9b1f1933 100644 --- a/src/pci/src/bus.rs +++ b/src/pci/src/bus.rs @@ -5,7 +5,6 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::any::Any; use std::collections::HashMap; use std::ops::DerefMut; use std::sync::{Arc, Barrier, Mutex}; @@ -87,14 +86,6 @@ impl PciDevice for PciRoot { fn read_config_register(&mut self, reg_idx: usize) -> u32 { self.config.read_reg(reg_idx) } - - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - fn id(&self) -> Option { - None - } } pub struct PciBus { @@ -191,7 +182,7 @@ impl PciConfigIo { return None; } - let (bus, device, _function, register) = + let (bus, device, function, register) = parse_io_config_address(self.config_address & !0x8000_0000); // Only support one bus. @@ -199,6 +190,11 @@ impl PciConfigIo { return None; } + // Don't support multi-function devices. + if function > 0 { + return None; + } + let pci_bus = self.pci_bus.as_ref().lock().unwrap(); if let Some(d) = pci_bus.devices.get(&(device as u32)) { let mut device = d.lock().unwrap(); @@ -249,6 +245,16 @@ impl PciConfigIo { impl BusDevice for PciConfigIo { fn read(&mut self, _base: u64, offset: u64, data: &mut [u8]) { + // Only allow reads to the register boundary. + let start = offset as usize % 4; + let end = start + data.len(); + if end > 4 { + for d in data.iter_mut() { + *d = 0xff; + } + return; + } + // `offset` is relative to 0xcf8 let value = match offset { 0..=3 => self.config_address, @@ -256,17 +262,8 @@ impl BusDevice for PciConfigIo { _ => 0xffff_ffff, }; - // Only allow reads to the register boundary. - let start = offset as usize % 4; - let end = start + data.len(); - if end <= 4 { - for i in start..end { - data[i - start] = (value >> (i * 8)) as u8; - } - } else { - for d in data { - *d = 0xff; - } + for i in start..end { + data[i - start] = (value >> (i * 8)) as u8; } } @@ -294,13 +291,18 @@ impl PciConfigMmio { } fn config_space_read(&self, config_address: u32) -> u32 { - let (bus, device, _function, register) = parse_mmio_config_address(config_address); + let (bus, device, function, register) = parse_mmio_config_address(config_address); // Only support one bus. if bus != 0 { return 0xffff_ffff; } + // Don't support multi-function devices. + if function > 0 { + return 0xffff_ffff; + } + self.pci_bus .lock() .unwrap() @@ -316,13 +318,18 @@ impl PciConfigMmio { return; } - let (bus, device, _function, register) = parse_mmio_config_address(config_address); + let (bus, device, function, register) = parse_mmio_config_address(config_address); // Only support one bus. if bus != 0 { return; } + // Don't support multi-function devices. + if function > 0 { + return; + } + let pci_bus = self.pci_bus.lock().unwrap(); if let Some(d) = pci_bus.devices.get(&(device as u32)) { let mut device = d.lock().unwrap(); @@ -424,14 +431,28 @@ fn parse_io_config_address(config_address: u32) -> (usize, usize, usize, usize) #[cfg(test)] mod tests { + use std::sync::atomic::AtomicUsize; use std::sync::{Arc, Mutex}; use vm_device::BusDevice; - use super::{PciBus, PciConfigIo, PciRoot}; - use crate::DeviceRelocation; + use super::{PciBus, PciConfigIo, PciConfigMmio, PciRoot}; + use crate::bus::{DEVICE_ID_INTEL_VIRT_PCIE_HOST, VENDOR_ID_INTEL}; + use crate::{ + DeviceRelocation, PciBarConfiguration, PciBarPrefetchable, PciBarRegionType, PciClassCode, + PciConfiguration, PciDevice, PciHeaderType, PciMassStorageSubclass, + }; + + #[derive(Debug, Default)] + struct RelocationMock { + reloc_cnt: AtomicUsize, + } - struct RelocationMock; + impl RelocationMock { + fn cnt(&self) -> usize { + self.reloc_cnt.load(std::sync::atomic::Ordering::SeqCst) + } + } impl DeviceRelocation for RelocationMock { fn move_bar( @@ -442,13 +463,71 @@ mod tests { _pci_dev: &mut dyn crate::PciDevice, _region_type: crate::PciBarRegionType, ) -> std::result::Result<(), std::io::Error> { + self.reloc_cnt + .fetch_add(1, std::sync::atomic::Ordering::SeqCst); Ok(()) } } + struct PciDevMock(PciConfiguration); + + impl PciDevMock { + fn new() -> Self { + let mut config = PciConfiguration::new( + 0x42, + 0x0, + 0x0, + PciClassCode::MassStorage, + &PciMassStorageSubclass::SerialScsiController, + None, + PciHeaderType::Device, + 0x13, + 0x12, + None, + None, + ); + + config + .add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1000, + idx: 0, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }) + .unwrap(); + + PciDevMock(config) + } + } + + impl PciDevice for PciDevMock { + fn write_config_register( + &mut self, + reg_idx: usize, + offset: u64, + data: &[u8], + ) -> Option> { + self.0.write_config_register(reg_idx, offset, data); + None + } + + fn read_config_register(&mut self, reg_idx: usize) -> u32 { + self.0.read_reg(reg_idx) + } + + fn detect_bar_reprogramming( + &mut self, + reg_idx: usize, + data: &[u8], + ) -> Option { + self.0.detect_bar_reprogramming(reg_idx, data) + } + } + #[test] - fn test_writing_config_address() { - let mock = Arc::new(RelocationMock); + fn test_writing_io_config_address() { + let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciConfigIo::new(Arc::new(Mutex::new(PciBus::new(root, mock)))); @@ -489,8 +568,8 @@ mod tests { } #[test] - fn test_reading_config_address() { - let mock = Arc::new(RelocationMock); + fn test_reading_io_config_address() { + let mock = Arc::new(RelocationMock::default()); let root = PciRoot::new(None); let mut bus = PciConfigIo::new(Arc::new(Mutex::new(PciBus::new(root, mock)))); @@ -536,4 +615,336 @@ mod tests { bus.read(0, 0, &mut buffer); assert_eq!(buffer, [0x45, 0x44, 0x43, 0x42]); } + + fn initialize_bus() -> (PciConfigMmio, PciConfigIo, Arc) { + let mock = Arc::new(RelocationMock::default()); + let root = PciRoot::new(None); + let mut bus = PciBus::new(root, mock.clone()); + bus.add_device(1, Arc::new(Mutex::new(PciDevMock::new()))) + .unwrap(); + let bus = Arc::new(Mutex::new(bus)); + (PciConfigMmio::new(bus.clone()), PciConfigIo::new(bus), mock) + } + + #[test] + fn test_invalid_register_boundary_reads() { + let (mut mmio_config, mut io_config, _) = initialize_bus(); + + // Read crossing register boundaries + let mut buffer = [0u8; 4]; + mmio_config.read(0, 1, &mut buffer); + assert_eq!(0xffff_ffff, u32::from_le_bytes(buffer)); + + let mut buffer = [0u8; 4]; + io_config.read(0, 1, &mut buffer); + assert_eq!(0xffff_ffff, u32::from_le_bytes(buffer)); + + // As well in the config space + let mut buffer = [0u8; 4]; + io_config.read(0, 5, &mut buffer); + assert_eq!(0xffff_ffff, u32::from_le_bytes(buffer)); + } + + // MMIO config addresses are of the form + // + // | Base address upper bits | Bus Number | Device Number | Function Number | Register number | Byte offset | + // | 31-28 | 27-20 | 19-15 | 14-12 | 11-2 | 0-1 | + // + // Meaning that the offset is built using: + // + // `bus << 20 | device << 15 | function << 12 | register << 2 | byte` + fn mmio_offset(bus: u8, device: u8, function: u8, register: u16, byte: u8) -> u32 { + assert!(device < 32); + assert!(function < 8); + assert!(register < 1024); + assert!(byte < 4); + + (bus as u32) << 20 + | (device as u32) << 15 + | (function as u32) << 12 + | (register as u32) << 2 + | (byte as u32) + } + + fn read_mmio_config( + config: &mut PciConfigMmio, + bus: u8, + device: u8, + function: u8, + register: u16, + byte: u8, + data: &mut [u8], + ) { + config.read( + 0, + mmio_offset(bus, device, function, register, byte) as u64, + data, + ); + } + + fn write_mmio_config( + config: &mut PciConfigMmio, + bus: u8, + device: u8, + function: u8, + register: u16, + byte: u8, + data: &[u8], + ) { + config.write( + 0, + mmio_offset(bus, device, function, register, byte) as u64, + data, + ); + } + + // Similarly, when using the IO mechanism the config addresses have the following format + // + // | Enabled | zeros | Bus Number | Device Number | Function Number | Register number | zeros | + // | 31 | 30-24 | 23-16 | 15-11 | 10-8 | 7-2 | 1-0 | + // + // + // Meaning that the address is built using: + // + // 0x8000_0000 | bus << 16 | device << 11 | function << 8 | register << 2; + // + // Only 32-bit aligned accesses are allowed here. + fn pio_offset(enabled: bool, bus: u8, device: u8, function: u8, register: u8) -> u32 { + assert!(device < 32); + assert!(function < 8); + assert!(register < 64); + + let offset = if enabled { 0x8000_0000 } else { 0u32 }; + + offset + | (bus as u32) << 16 + | (device as u32) << 11 + | (function as u32) << 8 + | (register as u32) << 2 + } + + fn set_io_address( + config: &mut PciConfigIo, + enabled: bool, + bus: u8, + device: u8, + function: u8, + register: u8, + ) { + let address = u32::to_le_bytes(pio_offset(enabled, bus, device, function, register)); + config.write(0, 0, &address); + } + + fn read_io_config( + config: &mut PciConfigIo, + enabled: bool, + bus: u8, + device: u8, + function: u8, + register: u8, + data: &mut [u8], + ) { + set_io_address(config, enabled, bus, device, function, register); + config.read(0, 4, data); + } + + fn write_io_config( + config: &mut PciConfigIo, + enabled: bool, + bus: u8, + device: u8, + function: u8, + register: u8, + data: &[u8], + ) { + set_io_address(config, enabled, bus, device, function, register); + config.write(0, 4, data); + } + + #[test] + fn test_mmio_invalid_bus_number() { + let (mut mmio_config, _, _) = initialize_bus(); + let mut buffer = [0u8; 4]; + + // Asking for Bus 1 should return all 1s + read_mmio_config(&mut mmio_config, 1, 0, 0, 0, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + // Writing the same + buffer[0] = 0x42; + write_mmio_config(&mut mmio_config, 1, 0, 0, 15, 0, &buffer); + read_mmio_config(&mut mmio_config, 1, 0, 0, 15, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0x0)); + + // Asking for Bus 0 should work + read_mmio_config(&mut mmio_config, 0, 0, 0, 0, 0, &mut buffer); + assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); + assert_eq!( + &buffer[2..], + &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) + ); + } + + #[test] + fn test_io_invalid_bus_number() { + let (_, mut pio_config, _) = initialize_bus(); + let mut buffer = [0u8; 4]; + + // Asking for Bus 1 should return all 1s + read_io_config(&mut pio_config, true, 1, 0, 0, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + + // Asking for Bus 0 should work + read_io_config(&mut pio_config, true, 0, 0, 0, 0, &mut buffer); + assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); + assert_eq!( + &buffer[2..], + &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) + ); + } + + #[test] + fn test_mmio_invalid_function() { + let (mut mmio_config, _, _) = initialize_bus(); + let mut buffer = [0u8; 4]; + + // Asking for Bus 1 should return all 1s + read_mmio_config(&mut mmio_config, 0, 0, 1, 0, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + // Writing the same + buffer[0] = 0x42; + write_mmio_config(&mut mmio_config, 0, 0, 1, 15, 0, &buffer); + read_mmio_config(&mut mmio_config, 0, 0, 1, 15, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0x0)); + + // Asking for Bus 0 should work + read_mmio_config(&mut mmio_config, 0, 0, 0, 0, 0, &mut buffer); + assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); + assert_eq!( + &buffer[2..], + &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) + ); + } + + #[test] + fn test_io_invalid_function() { + let (_, mut pio_config, _) = initialize_bus(); + let mut buffer = [0u8; 4]; + + // Asking for Bus 1 should return all 1s + read_io_config(&mut pio_config, true, 0, 0, 1, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + + // Asking for Bus 0 should work + read_io_config(&mut pio_config, true, 0, 0, 0, 0, &mut buffer); + assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); + assert_eq!( + &buffer[2..], + &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) + ); + } + + #[test] + fn test_io_disabled_reads() { + let (_, mut pio_config, _) = initialize_bus(); + let mut buffer = [0u8; 4]; + + // Trying to read without enabling should return all 1s + read_io_config(&mut pio_config, false, 0, 0, 0, 0, &mut buffer); + assert_eq!(buffer, u32::to_le_bytes(0xffff_ffff)); + + // Asking for Bus 0 should work + read_io_config(&mut pio_config, true, 0, 0, 0, 0, &mut buffer); + assert_eq!(&buffer[..2], &u16::to_le_bytes(VENDOR_ID_INTEL)); + assert_eq!( + &buffer[2..], + &u16::to_le_bytes(DEVICE_ID_INTEL_VIRT_PCIE_HOST) + ); + } + + #[test] + fn test_io_disabled_writes() { + let (_, mut pio_config, _) = initialize_bus(); + + // Try to write the IRQ line used for the root port. + let mut buffer = [0u8; 4]; + + // First read the current value (use `enabled` bit) + read_io_config(&mut pio_config, true, 0, 0, 0, 15, &mut buffer); + let irq_line = buffer[0]; + + // Write without setting the `enabled` bit. + buffer[0] = 0x42; + write_io_config(&mut pio_config, false, 0, 0, 0, 15, &buffer); + + // IRQ line shouldn't have changed + read_io_config(&mut pio_config, true, 0, 0, 0, 15, &mut buffer); + assert_eq!(buffer[0], irq_line); + + // Write with `enabled` bit set. + buffer[0] = 0x42; + write_io_config(&mut pio_config, true, 0, 0, 0, 15, &buffer); + + // IRQ line should change + read_io_config(&mut pio_config, true, 0, 0, 0, 15, &mut buffer); + assert_eq!(buffer[0], 0x42); + } + + #[test] + fn test_mmio_writes() { + let (mut mmio_config, _, _) = initialize_bus(); + let mut buffer = [0u8; 4]; + + read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); + assert_eq!(buffer[0], 0x0); + write_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &[0x42]); + read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer); + assert_eq!(buffer[0], 0x42); + } + + #[test] + fn test_bar_reprogramming() { + let (mut mmio_config, _, mock) = initialize_bus(); + let mut buffer = [0u8; 4]; + assert_eq!(mock.cnt(), 0); + + read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); + let old_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; + assert_eq!(old_addr, 0x1000); + write_mmio_config( + &mut mmio_config, + 0, + 1, + 0, + 0x4, + 0, + &u32::to_le_bytes(0x1312_1110), + ); + + read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer); + let new_addr = u32::from_le_bytes(buffer) & 0xffff_fff0; + assert_eq!(new_addr, 0x1312_1110); + assert_eq!(mock.cnt(), 1); + + // BAR1 should not be used, so reading its address should return all 0s + read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer); + assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); + + // and reprogramming shouldn't have any effect + write_mmio_config( + &mut mmio_config, + 0, + 1, + 0, + 0x5, + 0, + &u32::to_le_bytes(0x1312_1110), + ); + + read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer); + assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]); + } } diff --git a/src/pci/src/configuration.rs b/src/pci/src/configuration.rs index 3a2639ca876..fd1e3958ec8 100644 --- a/src/pci/src/configuration.rs +++ b/src/pci/src/configuration.rs @@ -190,7 +190,7 @@ pub trait PciProgrammingInterface { } /// Types of PCI capabilities. -#[derive(PartialEq, Eq, Copy, Clone)] +#[derive(Debug, PartialEq, Eq, Copy, Clone)] #[allow(dead_code)] #[allow(non_camel_case_types)] #[repr(u8)] @@ -474,8 +474,6 @@ pub enum Error { BarInvalid(usize), BarInvalid64(usize), BarSizeInvalid(u64), - CapabilityEmpty, - CapabilityLengthInvalid(usize), CapabilitySpaceFull(usize), Decode32BarSize, Decode64BarSize, @@ -505,8 +503,6 @@ impl Display for Error { NUM_BAR_REGS - 1 ), BarSizeInvalid(s) => write!(f, "bar address {s} not a power of two"), - CapabilityEmpty => write!(f, "empty capabilities are invalid"), - CapabilityLengthInvalid(l) => write!(f, "Invalid capability length {l}"), CapabilitySpaceFull(s) => write!(f, "capability of size {s} doesn't fit"), Decode32BarSize => write!(f, "failed to decode 32 bits BAR size"), Decode64BarSize => write!(f, "failed to decode 64 bits BAR size"), @@ -706,6 +702,10 @@ impl PciConfiguration { let bar_idx = config.idx; let reg_idx = BAR0_REG + bar_idx; + if bar_idx >= NUM_BAR_REGS { + return Err(Error::BarInvalid(bar_idx)); + } + if self.bars[bar_idx].used { return Err(Error::BarInUse(bar_idx)); } @@ -714,10 +714,6 @@ impl PciConfiguration { return Err(Error::BarSizeInvalid(config.size)); } - if bar_idx >= NUM_BAR_REGS { - return Err(Error::BarInvalid(bar_idx)); - } - let end_addr = config .addr .checked_add(config.size - 1) @@ -739,7 +735,7 @@ impl PciConfiguration { } if self.bars[bar_idx + 1].used { - return Err(Error::BarInUse64(bar_idx)); + return Err(Error::BarInUse64(bar_idx + 1)); } // Encode the BAR size as expected by the software running in @@ -789,15 +785,12 @@ impl PciConfiguration { } /// Adds the capability `cap_data` to the list of capabilities. - /// `cap_data` should include the two-byte PCI capability header (type, next), - /// but not populate it. Correct values will be generated automatically based - /// on `cap_data.id()`. + /// + /// `cap_data` should not include the two-byte PCI capability header (type, next). + /// Correct values will be generated automatically based on `cap_data.id()` and + /// `cap_data.len()`. pub fn add_capability(&mut self, cap_data: &dyn PciCapability) -> Result { - let total_len = cap_data.bytes().len(); - // Check that the length is valid. - if cap_data.bytes().is_empty() { - return Err(Error::CapabilityEmpty); - } + let total_len = cap_data.bytes().len() + 2; let (cap_offset, tail_offset) = match self.last_capability { Some((offset, len)) => (Self::next_dword(offset, len), offset + 1), None => (FIRST_CAPABILITY_OFFSET, CAPABILITY_LIST_HEAD_OFFSET), @@ -838,6 +831,10 @@ impl PciConfiguration { } pub fn write_config_register(&mut self, reg_idx: usize, offset: u64, data: &[u8]) { + if reg_idx >= NUM_CONFIGURATION_REGISTERS { + return; + } + if offset as usize + data.len() > 4 { return; } @@ -846,11 +843,14 @@ impl PciConfiguration { if let Some(msix_cap_reg_idx) = self.msix_cap_reg_idx { if let Some(msix_config) = &self.msix_config { if msix_cap_reg_idx == reg_idx && offset == 2 && data.len() == 2 { + // 2-bytes write in the Message Control field msix_config .lock() .unwrap() .set_msg_ctl(LittleEndian::read_u16(data)); } else if msix_cap_reg_idx == reg_idx && offset == 0 && data.len() == 4 { + // 4 bytes write at the beginning. Ignore the first 2 bytes which are the + // capability id and next capability pointer msix_config .lock() .unwrap() @@ -927,9 +927,15 @@ impl PciConfiguration { region_type, }); } else if (reg_idx > BAR0_REG) - && ((self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]) - != (self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]) - || (value & mask) != (self.bars[bar_idx].addr & mask)) + && ( + // The lower BAR (of this 64bit BAR) has been reprogrammed to a different value + // than it used to be + (self.registers[reg_idx - 1] & self.writable_bits[reg_idx - 1]) + != (self.bars[bar_idx - 1].addr & self.writable_bits[reg_idx - 1]) || + // Or the lower BAR hasn't been changed but the upper one is being reprogrammed + // now to a different value + (value & mask) != (self.bars[bar_idx].addr & mask) + ) { info!( "Detected BAR reprogramming: (BAR {}) 0x{:x}->0x{:x}", @@ -1002,9 +1008,11 @@ impl Default for PciBarConfiguration { #[cfg(test)] mod tests { + use vm_memory::ByteValued; use super::*; + use crate::MsixCap; #[repr(C, packed)] #[derive(Clone, Copy, Default)] @@ -1027,6 +1035,28 @@ mod tests { } } + struct BadCap { + data: Vec, + } + + impl BadCap { + fn new(len: u8) -> Self { + Self { + data: (0..len).collect(), + } + } + } + + impl PciCapability for BadCap { + fn bytes(&self) -> &[u8] { + &self.data + } + + fn id(&self) -> PciCapabilityId { + PciCapabilityId::VendorSpecific + } + } + #[test] fn add_capability() { let mut cfg = PciConfiguration::new( @@ -1043,6 +1073,20 @@ mod tests { None, ); + // Bad size capabilities + assert!(matches!( + cfg.add_capability(&BadCap::new(127)), + Err(Error::CapabilitySpaceFull(129)) + )); + cfg.add_capability(&BadCap::new(62)).unwrap(); + cfg.add_capability(&BadCap::new(62)).unwrap(); + assert!(matches!( + cfg.add_capability(&BadCap::new(0)), + Err(Error::CapabilitySpaceFull(2)) + )); + // Reset capabilities + cfg.last_capability = None; + // Add two capabilities with different contents. let cap1 = TestCap { len: 4, foo: 0xAA }; let cap1_offset = cfg.add_capability(&cap1).unwrap(); @@ -1073,6 +1117,107 @@ mod tests { assert_eq!((cap2_data >> 24) & 0xFF, 0x55); // cap2.foo } + #[test] + fn test_msix_capability() { + let mut cfg = PciConfiguration::new( + 0x1234, + 0x5678, + 0x1, + PciClassCode::MultimediaController, + &PciMultimediaSubclass::AudioController, + None, + PciHeaderType::Device, + 0xABCD, + 0x2468, + None, + None, + ); + + // Information about the MSI-X capability layout: https://wiki.osdev.org/PCI#Enabling_MSI-X + let msix_cap = MsixCap::new( + 3, // Using BAR3 for message control table + 1024, // 1024 MSI-X vectors + 0x4000, // Offset of message control table inside the BAR + 4, // BAR4 used for pending control bit + 0x420, // Offset of pending bit array (PBA) inside BAR + ); + cfg.add_capability(&msix_cap).unwrap(); + + let cap_reg = FIRST_CAPABILITY_OFFSET / 4; + let reg = cfg.read_reg(cap_reg); + // Capability ID is MSI-X + assert_eq!( + PciCapabilityId::from((reg & 0xff) as u8), + PciCapabilityId::MsiX + ); + // We only have one capability, so `next` should be 0 + assert_eq!(((reg >> 8) & 0xff) as u8, 0); + let msg_ctl = (reg >> 16) as u16; + + // MSI-X is enabled + assert_eq!(msg_ctl & 0x8000, 0x8000); + // Vectors are not masked + assert_eq!(msg_ctl & 0x4000, 0x0); + // Reserved bits are 0 + assert_eq!(msg_ctl & 0x3800, 0x0); + // We've got 1024 vectors (Table size is N-1 encoded) + assert_eq!((msg_ctl & 0x7ff) + 1, 1024); + + let reg = cfg.read_reg(cap_reg + 1); + // We are using BAR3 + assert_eq!(reg & 0x7, 3); + // Message Control Table is located in offset 0x4000 inside the BAR + // We don't need to shift. Offset needs to be 8-byte aligned - so BIR + // is stored in its last 3 bits (which we need to mask out). + assert_eq!(reg & 0xffff_fff8, 0x4000); + + let reg = cfg.read_reg(cap_reg + 2); + // PBA is 0x420 bytes inside BAR4 + assert_eq!(reg & 0x7, 4); + assert_eq!(reg & 0xffff_fff8, 0x420); + + // Check read/write mask + // Capability Id of MSI-X is 0x11 + cfg.write_config_register(cap_reg, 0, &[0x0]); + assert_eq!( + PciCapabilityId::from((cfg.read_reg(cap_reg) & 0xff) as u8), + PciCapabilityId::MsiX + ); + // Cannot override next capability pointer + cfg.write_config_register(cap_reg, 1, &[0x42]); + assert_eq!((cfg.read_reg(cap_reg) >> 8) & 0xff, 0); + + // We are writing this: + // + // meaning: | MSI enabled | Vectors Masked | Reserved | Table size | + // bit: | 15 | 14 | 13 - 11 | 0 - 10 | + // R/W: | R/W | R/W | R | R | + let msg_ctl = (cfg.read_reg(cap_reg) >> 16) as u16; + // Try to flip all bits + cfg.write_config_register(cap_reg, 2, &u16::to_le_bytes(!msg_ctl)); + let msg_ctl = (cfg.read_reg(cap_reg) >> 16) as u16; + // MSI enabled and Vectors masked should be flipped (MSI disabled and vectors masked) + assert_eq!(msg_ctl & 0xc000, 0x4000); + // Reserved bits should still be 0 + assert_eq!(msg_ctl & 0x3800, 0); + // Table size should not have changed + assert_eq!((msg_ctl & 0x07ff) + 1, 1024); + + // Table offset is read only + let table_offset = cfg.read_reg(cap_reg + 1); + // Try to flip all bits + cfg.write_config_register(cap_reg + 1, 0, &u32::to_le_bytes(!table_offset)); + // None should be flipped + assert_eq!(cfg.read_reg(cap_reg + 1), table_offset); + + // PBA offset also + let pba_offset = cfg.read_reg(cap_reg + 2); + // Try to flip all bits + cfg.write_config_register(cap_reg + 2, 0, &u32::to_le_bytes(!pba_offset)); + // None should be flipped + assert_eq!(cfg.read_reg(cap_reg + 2), pba_offset); + } + #[derive(Copy, Clone)] enum TestPi { Test = 0x5a, @@ -1108,4 +1253,341 @@ mod tests { assert_eq!(subclass, 0x01); assert_eq!(prog_if, 0x5a); } + + #[test] + fn test_bar_size_encoding() { + assert!(encode_32_bits_bar_size(0).is_none()); + assert!(decode_32_bits_bar_size(0).is_none()); + assert!(encode_64_bits_bar_size(0).is_none()); + assert!(decode_64_bits_bar_size(0, 0).is_none()); + + // According to OSDev wiki (https://wiki.osdev.org/PCI#Address_and_size_of_the_BAR): + // + // > To determine the amount of address space needed by a PCI device, you must save the + // > original value of the BAR, write a value of all 1's to the register, then read it back. + // > The amount of memory can then be determined by masking the information bits, performing + // > a bitwise NOT ('~' in C), and incrementing the value by 1. The original value of the + // BAR > should then be restored. The BAR register is naturally aligned and as such you can + // only > modify the bits that are set. For example, if a device utilizes 16 MB it will + // have BAR0 > filled with 0xFF000000 (0x1000000 after decoding) and you can only modify + // the upper > 8-bits. + // + // So we should be encoding an address like this: `addr` -> `!(addr - 1)` + let encoded = encode_32_bits_bar_size(0x0101_0101).unwrap(); + assert_eq!(encoded, 0xfefe_feff); + assert_eq!(decode_32_bits_bar_size(encoded), Some(0x0101_0101)); + + // Similarly we encode a 64 bits size and then store it as a 2 32bit addresses (we use + // two BARs). + let (hi, lo) = encode_64_bits_bar_size(0xffff_ffff_ffff_fff0).unwrap(); + assert_eq!(hi, 0); + assert_eq!(lo, 0x0000_0010); + assert_eq!(decode_64_bits_bar_size(hi, lo), Some(0xffff_ffff_ffff_fff0)); + } + + #[test] + fn test_add_pci_bar() { + let mut pci_config = PciConfiguration::new( + 0x42, + 0x0, + 0x0, + PciClassCode::MassStorage, + &PciMassStorageSubclass::SerialScsiController, + None, + PciHeaderType::Device, + 0x13, + 0x12, + None, + None, + ); + + // BAR size can only be a power of 2 + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1001, + idx: 0, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }), + Err(Error::BarSizeInvalid(0x1001)) + )); + + // Invalid BAR index + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1000, + idx: NUM_BAR_REGS, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable + }), + Err(Error::BarInvalid(NUM_BAR_REGS)) + )); + // 64bit BARs need 2 BAR slots actually + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1000, + idx: NUM_BAR_REGS - 1, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable + }), + Err(Error::BarInvalid64(_)) + )); + + // Check for valid addresses + // Can't have an address that exceeds 32 bits for a 32bit BAR + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x1000_0000_0000_0000, + size: 0x1000, + idx: 0, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable + }), + Err(Error::BarAddressInvalid(0x1000_0000_0000_0000, 0x1000)) + )); + // Ensure that we handle properly overflows in 64bit BAR ranges + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: u64::MAX, + size: 0x2, + idx: 0, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable + }), + Err(Error::BarAddressInvalid(u64::MAX, 2)) + )); + + // We can't reuse a BAR slot + pci_config + .add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1000, + idx: 0, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }) + .unwrap(); + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1000, + idx: 0, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }), + Err(Error::BarInUse(0)) + )); + pci_config + .add_pci_bar(&PciBarConfiguration { + addr: 0x0000_0001_0000_0000, + size: 0x2000, + idx: 2, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }) + .unwrap(); + // For 64bit BARs two BARs are used (in this case BARs 1 and 2) + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x0000_0001_0000_0000, + size: 0x1000, + idx: 2, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }), + Err(Error::BarInUse(2)) + )); + assert!(matches!( + pci_config.add_pci_bar(&PciBarConfiguration { + addr: 0x0000_0001_0000_0000, + size: 0x1000, + idx: 1, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }), + Err(Error::BarInUse64(2)) + )); + + assert_eq!(pci_config.get_bar_addr(0), 0x1000); + assert_eq!(pci_config.get_bar_addr(2), 0x1_0000_0000); + } + + #[test] + fn test_access_invalid_reg() { + let mut pci_config = PciConfiguration::new( + 0x42, + 0x0, + 0x0, + PciClassCode::MassStorage, + &PciMassStorageSubclass::SerialScsiController, + None, + PciHeaderType::Device, + 0x13, + 0x12, + None, + None, + ); + + // Can't read past the end of the configuration space + assert_eq!( + pci_config.read_reg(NUM_CONFIGURATION_REGISTERS), + 0xffff_ffff + ); + + // Read out all of configuration space + let config_space: Vec = (0..NUM_CONFIGURATION_REGISTERS) + .map(|reg_idx| pci_config.read_reg(reg_idx)) + .collect(); + + // Various invalid write accesses + + // Past the end of config space + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 0, &[0x42]); + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 0, &[0x42, 0x42]); + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 0, &[0x42, 0x42, 0x42, 0x42]); + + // Past register boundaries + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 1, &[0x42, 0x42, 0x42, 0x42]); + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 2, &[0x42, 0x42, 0x42]); + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 3, &[0x42, 0x42]); + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 4, &[0x42]); + pci_config.write_config_register(NUM_CONFIGURATION_REGISTERS, 5, &[]); + + for (reg_idx, reg) in config_space.iter().enumerate() { + assert_eq!(*reg, pci_config.read_reg(reg_idx)); + } + } + + #[test] + fn test_detect_bar_reprogramming() { + let mut pci_config = PciConfiguration::new( + 0x42, + 0x0, + 0x0, + PciClassCode::MassStorage, + &PciMassStorageSubclass::SerialScsiController, + None, + PciHeaderType::Device, + 0x13, + 0x12, + None, + None, + ); + + // Trying to reprogram with something less than 4 bytes (length of the address) should fail + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13]) + .is_none()); + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) + .is_none()); + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12]) + .is_none()); + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG, &[0x13, 0x12, 0x16]) + .is_none()); + + // Writing all 1s is a special case where we're actually asking for the size of the BAR + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0xffff_ffff)) + .is_none()); + + // Trying to reprogram a BAR that hasn't be initialized does nothing + for reg_idx in BAR0_REG..BAR0_REG + NUM_BAR_REGS { + assert!(pci_config + .detect_bar_reprogramming(reg_idx, &u32::to_le_bytes(0x1312_4243)) + .is_none()); + } + + // Reprogramming of a 32bit BAR + pci_config + .add_pci_bar(&PciBarConfiguration { + addr: 0x1000, + size: 0x1000, + idx: 0, + region_type: PciBarRegionType::Memory32BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }) + .unwrap(); + + assert_eq!( + pci_config.detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x2000)), + Some(BarReprogrammingParams { + old_base: 0x1000, + new_base: 0x2000, + len: 0x1000, + region_type: PciBarRegionType::Memory32BitRegion + }) + ); + + pci_config.write_config_register(BAR0_REG, 0, &u32::to_le_bytes(0x2000)); + assert_eq!(pci_config.read_reg(BAR0_REG) & 0xffff_fff0, 0x2000); + + // Attempting to reprogram the BAR with the same address should not have any effect + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG, &u32::to_le_bytes(0x2000)) + .is_none()); + + // Reprogramming of a 64bit BAR + pci_config + .add_pci_bar(&PciBarConfiguration { + addr: 0x13_1200_0000, + size: 0x8000, + idx: 1, + region_type: PciBarRegionType::Memory64BitRegion, + prefetchable: PciBarPrefetchable::Prefetchable, + }) + .unwrap(); + + assert_eq!(pci_config.read_reg(BAR0_REG + 1) & 0xffff_fff0, 0x1200_0000); + assert_eq!( + pci_config.bars[1].r#type, + Some(PciBarRegionType::Memory64BitRegion) + ); + assert_eq!(pci_config.read_reg(BAR0_REG + 2), 0x13); + assert!(pci_config.bars[2].r#type.is_none()); + + // First we write the lower 32 bits and this shouldn't cause any reprogramming + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x4200_0000)) + .is_none()); + pci_config.write_config_register(BAR0_REG + 1, 0, &u32::to_le_bytes(0x4200_0000)); + + // Writing the upper 32 bits should trigger the reprogramming + assert_eq!( + pci_config.detect_bar_reprogramming(BAR0_REG + 2, &u32::to_le_bytes(0x84)), + Some(BarReprogrammingParams { + old_base: 0x13_1200_0000, + new_base: 0x84_4200_0000, + len: 0x8000, + region_type: PciBarRegionType::Memory64BitRegion + }) + ); + pci_config.write_config_register(BAR0_REG + 2, 0, &u32::to_le_bytes(0x84)); + + // Trying to reprogram the upper bits directly (without first touching the lower bits) + // should trigger a reprogramming + assert_eq!( + pci_config.detect_bar_reprogramming(BAR0_REG + 2, &u32::to_le_bytes(0x1312)), + Some(BarReprogrammingParams { + old_base: 0x84_4200_0000, + new_base: 0x1312_4200_0000, + len: 0x8000, + region_type: PciBarRegionType::Memory64BitRegion + }) + ); + pci_config.write_config_register(BAR0_REG + 2, 0, &u32::to_le_bytes(0x1312)); + + // Attempting to reprogram the BAR with the same address should not have any effect + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG + 1, &u32::to_le_bytes(0x4200_0000)) + .is_none()); + assert!(pci_config + .detect_bar_reprogramming(BAR0_REG + 2, &u32::to_le_bytes(0x1312)) + .is_none()); + } } diff --git a/src/pci/src/device.rs b/src/pci/src/device.rs index 57f5e63eaeb..11db4f478a5 100644 --- a/src/pci/src/device.rs +++ b/src/pci/src/device.rs @@ -5,14 +5,12 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::any::Any; use std::sync::{Arc, Barrier}; use std::{io, result}; use vm_allocator::AddressAllocator; use crate::configuration::{self, PciBarRegionType}; -use crate::PciBarConfiguration; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum Error { @@ -27,7 +25,7 @@ pub enum Error { } pub type Result = std::result::Result; -#[derive(Clone, Copy)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct BarReprogrammingParams { pub old_base: u64, pub new_base: u64, @@ -42,8 +40,8 @@ pub trait PciDevice: Send { &mut self, _mmio32_allocator: &mut AddressAllocator, _mmio64_allocator: &mut AddressAllocator, - ) -> Result> { - Ok(Vec::new()) + ) -> Result<()> { + Ok(()) } /// Frees the PCI BARs previously allocated with a call to allocate_bars(). @@ -89,12 +87,6 @@ pub trait PciDevice: Send { fn move_bar(&mut self, _old_base: u64, _new_base: u64) -> result::Result<(), io::Error> { Ok(()) } - /// Provides a mutable reference to the Any trait. This is useful to let - /// the caller have access to the underlying type behind the trait. - fn as_any_mut(&mut self) -> &mut dyn Any; - - /// Optionally returns a unique identifier. - fn id(&self) -> Option; } /// This trait defines a set of functions which can be triggered whenever a diff --git a/src/pci/src/lib.rs b/src/pci/src/lib.rs index 1b9a3a99f76..83f5a7a5dcf 100644 --- a/src/pci/src/lib.rs +++ b/src/pci/src/lib.rs @@ -30,10 +30,7 @@ pub use self::configuration::{ pub use self::device::{ BarReprogrammingParams, DeviceRelocation, Error as PciDeviceError, PciDevice, }; -pub use self::msix::{ - Error as MsixError, MsixCap, MsixConfig, MsixConfigState, MsixTableEntry, MSIX_CONFIG_ID, - MSIX_TABLE_ENTRY_SIZE, -}; +pub use self::msix::{Error as MsixError, MsixCap, MsixConfig, MsixConfigState, MsixTableEntry}; /// PCI has four interrupt pins A->D. #[derive(Copy, Clone)] diff --git a/src/pci/src/msix.rs b/src/pci/src/msix.rs index 82f851322b4..50abeaf9737 100644 --- a/src/pci/src/msix.rs +++ b/src/pci/src/msix.rs @@ -21,10 +21,6 @@ const MSIX_PBA_ENTRIES_MODULO: u64 = 8; const BITS_PER_PBA_ENTRY: usize = 64; const FUNCTION_MASK_BIT: u8 = 14; const MSIX_ENABLE_BIT: u8 = 15; -const FUNCTION_MASK_MASK: u16 = (1 << FUNCTION_MASK_BIT) as u16; -const MSIX_ENABLE_MASK: u16 = (1 << MSIX_ENABLE_BIT) as u16; -pub const MSIX_TABLE_ENTRY_SIZE: usize = 16; -pub const MSIX_CONFIG_ID: &str = "msix_config"; #[derive(Debug, thiserror::Error, displaydoc::Display)] pub enum Error { @@ -72,8 +68,8 @@ pub struct MsixConfig { pub pba_entries: Vec, pub devid: u32, pub interrupt_source_group: Arc, - masked: bool, - enabled: bool, + pub masked: bool, + pub enabled: bool, } impl std::fmt::Debug for MsixConfig { @@ -136,7 +132,7 @@ impl MsixConfig { let mut table_entries: Vec = Vec::new(); table_entries.resize_with(msix_vectors as usize, Default::default); let mut pba_entries: Vec = Vec::new(); - let num_pba_entries: usize = ((msix_vectors as usize) / BITS_PER_PBA_ENTRY) + 1; + let num_pba_entries: usize = (msix_vectors as usize).div_ceil(BITS_PER_PBA_ENTRY); pba_entries.resize_with(num_pba_entries, Default::default); (table_entries, pba_entries, true, false) @@ -161,14 +157,6 @@ impl MsixConfig { } } - pub fn masked(&self) -> bool { - self.masked - } - - pub fn enabled(&self) -> bool { - self.enabled - } - pub fn set_msg_ctl(&mut self, reg: u16) { let old_masked = self.masked; let old_enabled = self.enabled; @@ -225,8 +213,8 @@ impl MsixConfig { let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; if index >= self.table_entries.len() { - debug!("Invalid MSI-X table entry index {index}"); - data.copy_from_slice(&[0xff; 8][..data.len()]); + warn!("Invalid MSI-X table entry index {index}"); + data.fill(0xff); return; } @@ -237,13 +225,12 @@ impl MsixConfig { 0x4 => self.table_entries[index].msg_addr_hi, 0x8 => self.table_entries[index].msg_data, 0xc => self.table_entries[index].vector_ctl, - _ => { - error!("invalid offset"); - 0 + off => { + warn!("msi-x: invalid offset in table entry read: {off}"); + 0xffff_ffff } }; - debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); LittleEndian::write_u32(data, value); } 8 => { @@ -256,17 +243,17 @@ impl MsixConfig { (u64::from(self.table_entries[index].vector_ctl) << 32) | u64::from(self.table_entries[index].msg_data) } - _ => { - error!("invalid offset"); - 0 + off => { + warn!("msi-x: invalid offset in table entry read: {off}"); + 0xffff_ffff_ffff_ffff } }; - debug!("MSI_R TABLE offset 0x{:x} data 0x{:x}", offset, value); LittleEndian::write_u64(data, value); } - _ => { - error!("invalid data length"); + len => { + warn!("msi-x: invalid length in table entry read: {len}"); + data.fill(0xff); } } } @@ -278,7 +265,7 @@ impl MsixConfig { let modulo_offset = offset % MSIX_TABLE_ENTRIES_MODULO; if index >= self.table_entries.len() { - debug!("Invalid MSI-X table entry index {index}"); + warn!("msi-x: invalid table entry index {index}"); return; } @@ -295,10 +282,8 @@ impl MsixConfig { 0xc => { self.table_entries[index].vector_ctl = value; } - _ => error!("invalid offset"), + off => warn!("msi-x: invalid offset in table entry write: {off}"), }; - - debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); } 8 => { let value = LittleEndian::read_u64(data); @@ -311,12 +296,10 @@ impl MsixConfig { self.table_entries[index].msg_data = (value & 0xffff_ffffu64) as u32; self.table_entries[index].vector_ctl = (value >> 32) as u32; } - _ => error!("invalid offset"), + off => warn!("msi-x: invalid offset in table entry write: {off}"), }; - - debug!("MSI_W TABLE offset 0x{:x} data 0x{:x}", offset, value); } - _ => error!("invalid data length"), + len => warn!("msi-x: invalid length in table entry write: {len}"), }; let table_entry = &self.table_entries[index]; @@ -329,7 +312,7 @@ impl MsixConfig { // Update interrupt routes // Optimisation: only update routes if the entry is not masked; // this is safe because if the entry is masked (starts masked as per spec) - // in the table then it won't be triggered. (See: #4273) + // in the table then it won't be triggered. if self.enabled && !self.masked && !table_entry.masked() { let config = MsiIrqSourceConfig { high_addr: table_entry.msg_addr_hi, @@ -357,8 +340,8 @@ impl MsixConfig { // device. // Check if bit has been flipped - if !self.masked() - && self.enabled() + if !self.masked + && self.enabled && old_entry.masked() && !table_entry.masked() && self.get_pba_bit(index as u16) == 1 @@ -367,15 +350,13 @@ impl MsixConfig { } } - pub fn read_pba(&mut self, offset: u64, data: &mut [u8]) { - assert!(data.len() <= 8); - + pub fn read_pba(&self, offset: u64, data: &mut [u8]) { let index: usize = (offset / MSIX_PBA_ENTRIES_MODULO) as usize; let modulo_offset = offset % MSIX_PBA_ENTRIES_MODULO; if index >= self.pba_entries.len() { - debug!("Invalid MSI-X PBA entry index {index}"); - data.copy_from_slice(&[0xff; 8][..data.len()]); + warn!("msi-x: invalid PBA entry index {index}"); + data.fill(0xff); return; } @@ -384,29 +365,28 @@ impl MsixConfig { let value: u32 = match modulo_offset { 0x0 => (self.pba_entries[index] & 0xffff_ffffu64) as u32, 0x4 => (self.pba_entries[index] >> 32) as u32, - _ => { - error!("invalid offset"); - 0 + off => { + warn!("msi-x: invalid offset in pba entry read: {off}"); + 0xffff_ffff } }; - debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); LittleEndian::write_u32(data, value); } 8 => { let value: u64 = match modulo_offset { 0x0 => self.pba_entries[index], - _ => { - error!("invalid offset"); - 0 + off => { + warn!("msi-x: invalid offset in pba entry read: {off}"); + 0xffff_ffff_ffff_ffff } }; - debug!("MSI_R PBA offset 0x{:x} data 0x{:x}", offset, value); LittleEndian::write_u64(data, value); } - _ => { - error!("invalid data length"); + len => { + warn!("msi-x: invalid length in table entry read: {len}"); + data.fill(0xff); } } } @@ -418,9 +398,13 @@ impl MsixConfig { pub fn set_pba_bit(&mut self, vector: u16, reset: bool) { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); + if (vector as usize) >= self.table_entries.len() { + return; + } + let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; - let mut mask: u64 = (1 << shift) as u64; + let mut mask: u64 = 1u64 << shift; if reset { mask = !mask; @@ -433,6 +417,10 @@ impl MsixConfig { fn get_pba_bit(&self, vector: u16) -> u8 { assert!(vector < MAX_MSIX_VECTORS_PER_DEVICE); + if (vector as usize) >= self.table_entries.len() { + return 0xff; + } + let index: usize = (vector as usize) / BITS_PER_PBA_ENTRY; let shift: usize = (vector as usize) % BITS_PER_PBA_ENTRY; @@ -506,59 +494,396 @@ impl MsixCap { pba: (pba_off & 0xffff_fff8u32) | u32::from(pba_pci_bar & 0x7u8), } } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicUsize, Ordering}; + + use vmm_sys_util::eventfd::EventFd; - pub fn set_msg_ctl(&mut self, data: u16) { - self.msg_ctl = (self.msg_ctl & !(FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)) - | (data & (FUNCTION_MASK_MASK | MSIX_ENABLE_MASK)); + use super::*; + + #[derive(Debug)] + struct MockInterrupt { + trigger_cnt: [AtomicUsize; 2], + update_cnt: [AtomicUsize; 2], + event_fd: [EventFd; 2], } - pub fn masked(&self) -> bool { - (self.msg_ctl >> FUNCTION_MASK_BIT) & 0x1 == 0x1 + impl MockInterrupt { + fn new() -> Self { + MockInterrupt { + trigger_cnt: [AtomicUsize::new(0), AtomicUsize::new(0)], + update_cnt: [AtomicUsize::new(0), AtomicUsize::new(0)], + event_fd: [ + EventFd::new(libc::EFD_NONBLOCK).unwrap(), + EventFd::new(libc::EFD_NONBLOCK).unwrap(), + ], + } + } + + fn interrupt_cnt(&self, index: InterruptIndex) -> usize { + self.trigger_cnt[index as usize].load(Ordering::SeqCst) + } + + fn update_cnt(&self, index: InterruptIndex) -> usize { + self.update_cnt[index as usize].load(Ordering::SeqCst) + } } - pub fn enabled(&self) -> bool { - (self.msg_ctl >> MSIX_ENABLE_BIT) & 0x1 == 0x1 + impl InterruptSourceGroup for MockInterrupt { + fn trigger(&self, index: InterruptIndex) -> vm_device::interrupt::Result<()> { + self.trigger_cnt[index as usize].fetch_add(1, Ordering::SeqCst); + Ok(()) + } + + fn notifier(&self, index: InterruptIndex) -> Option<&EventFd> { + self.event_fd.get(index as usize) + } + + fn update( + &self, + index: InterruptIndex, + _config: InterruptSourceConfig, + _masked: bool, + _set_gsi: bool, + ) -> vm_device::interrupt::Result<()> { + self.update_cnt[index as usize].fetch_add(1, Ordering::SeqCst); + Ok(()) + } + + fn set_gsi(&self) -> vm_device::interrupt::Result<()> { + Ok(()) + } } - pub fn table_offset(&self) -> u32 { - self.table & 0xffff_fff8 + #[test] + #[should_panic] + fn test_too_many_vectors() { + MsixConfig::new(2049, Arc::new(MockInterrupt::new()), 0x42, None).unwrap(); } - pub fn pba_offset(&self) -> u32 { - self.pba & 0xffff_fff8 + #[test] + fn test_new_msix_config() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + assert_eq!(config.devid, 0x42); + assert!(config.masked); + assert!(!config.enabled); + assert_eq!(config.table_entries.len(), 2); + assert_eq!(config.pba_entries.len(), 1); } - pub fn table_set_offset(&mut self, addr: u32) { - self.table &= 0x7; - self.table += addr; + #[test] + fn test_enable_msix_vectors() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + + assert!(!config.enabled); + assert!(config.masked); + + // Bit 15 marks whether MSI-X is enabled + // Bit 14 marks whether vectors are masked + config.set_msg_ctl(0x8000); + assert!(config.enabled); + assert!(!config.masked); + + config.set_msg_ctl(0x4000); + assert!(!config.enabled); + assert!(config.masked); + + config.set_msg_ctl(0xC000); + assert!(config.enabled); + assert!(config.masked); + + config.set_msg_ctl(0x0); + assert!(!config.enabled); + assert!(!config.masked); } - pub fn pba_set_offset(&mut self, addr: u32) { - self.pba &= 0x7; - self.pba += addr; + #[test] + #[should_panic] + fn test_table_access_read_too_big() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut buffer = [0u8; 16]; + + config.read_table(0, &mut buffer); } - pub fn table_bir(&self) -> u32 { - self.table & 0x7 + #[test] + fn test_read_table_past_end() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut buffer = [0u8; 8]; + + // We have 2 vectors (16 bytes each), so we should be able to read up to 32 bytes. + // Past that the device should respond with all 1s + config.read_table(32, &mut buffer); + assert_eq!(buffer, [0xff; 8]); } - pub fn pba_bir(&self) -> u32 { - self.pba & 0x7 + #[test] + fn test_read_table_bad_length() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut buffer = [0u8; 8]; + + // We can either read 4 or 8 bytes + config.read_table(0, &mut buffer[..0]); + assert_eq!(buffer, [0x0; 8]); + config.read_table(0, &mut buffer[..1]); + assert_eq!(buffer[..1], [0xff; 1]); + config.read_table(0, &mut buffer[..2]); + assert_eq!(buffer[..2], [0xff; 2]); + config.read_table(0, &mut buffer[..3]); + assert_eq!(buffer[..3], [0xff; 3]); + config.read_table(0, &mut buffer[..5]); + assert_eq!(buffer[..5], [0xff; 5]); + config.read_table(0, &mut buffer[..6]); + assert_eq!(buffer[..6], [0xff; 6]); + config.read_table(0, &mut buffer[..7]); + assert_eq!(buffer[..7], [0xff; 7]); + config.read_table(0, &mut buffer[..4]); + assert_eq!(buffer, u64::to_le_bytes(0x00ff_ffff_0000_0000)); + config.read_table(0, &mut buffer); + assert_eq!(buffer, u64::to_le_bytes(0)); } - pub fn table_size(&self) -> u16 { - (self.msg_ctl & 0x7ff) + 1 + #[test] + fn test_access_table() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + // enabled and not masked + config.set_msg_ctl(0x8000); + assert_eq!(vectors.update_cnt(0), 1); + assert_eq!(vectors.update_cnt(1), 1); + let mut buffer = [0u8; 8]; + + // Write first vector's address with a single 8-byte write + config.write_table(0, &u64::to_le_bytes(0x0000_1312_0000_1110)); + // It's still masked so shouldn't be updated + assert_eq!(vectors.update_cnt(0), 1); + assert_eq!(vectors.update_cnt(1), 1); + // Same for control and message data + config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); + // Now, we enabled it, so we should see an update + assert_eq!(vectors.update_cnt(0), 2); + assert_eq!(vectors.update_cnt(1), 1); + + // Write second vector's fields with 4-byte writes + // low 32 bits of the address + config.write_table(16, &u32::to_le_bytes(0x4241)); + assert_eq!(vectors.update_cnt(0), 2); + // Still masked + assert_eq!(vectors.update_cnt(1), 1); + // high 32 bits of the address + config.write_table(20, &u32::to_le_bytes(0x4443)); + assert_eq!(vectors.update_cnt(0), 2); + // Still masked + assert_eq!(vectors.update_cnt(1), 1); + // message data + config.write_table(24, &u32::to_le_bytes(0x21)); + assert_eq!(vectors.update_cnt(0), 2); + // Still masked + assert_eq!(vectors.update_cnt(1), 1); + // vector control + config.write_table(28, &u32::to_le_bytes(0x0)); + assert_eq!(vectors.update_cnt(0), 2); + assert_eq!(vectors.update_cnt(1), 2); + + assert_eq!(config.table_entries[0].msg_addr_hi, 0x1312); + assert_eq!(config.table_entries[0].msg_addr_lo, 0x1110); + assert_eq!(config.table_entries[0].msg_data, 0x20); + assert_eq!(config.table_entries[0].vector_ctl, 0); + + assert_eq!(config.table_entries[1].msg_addr_hi, 0x4443); + assert_eq!(config.table_entries[1].msg_addr_lo, 0x4241); + assert_eq!(config.table_entries[1].msg_data, 0x21); + assert_eq!(config.table_entries[1].vector_ctl, 0); + + assert_eq!(config.table_entries.len(), 2); + assert_eq!(config.pba_entries.len(), 1); + + // reading at a bad offset should return all 1s + config.read_table(1, &mut buffer[..4]); + assert_eq!(buffer[..4], [0xff; 4]); + // read low address for first vector + config.read_table(0, &mut buffer[..4]); + assert_eq!( + buffer[..4], + u32::to_le_bytes(config.table_entries[0].msg_addr_lo) + ); + // read the high address for first vector + config.read_table(4, &mut buffer[4..]); + assert_eq!(0x0000_1312_0000_1110, u64::from_le_bytes(buffer)); + // read msg_data from second vector + config.read_table(24, &mut buffer[..4]); + assert_eq!(u32::to_le_bytes(0x21), &buffer[..4]); + // read vector control for second vector + config.read_table(28, &mut buffer[..4]); + assert_eq!(u32::to_le_bytes(0x0), &buffer[..4]); + + // reading with 8 bytes at bad offset should also return all 1s + config.read_table(19, &mut buffer); + assert_eq!(buffer, [0xff; 8]); + + // Read the second vector's address using an 8 byte read + config.read_table(16, &mut buffer); + assert_eq!(0x0000_4443_0000_4241, u64::from_le_bytes(buffer)); + + // Read the first vector's ctrl and data with a single 8 byte read + config.read_table(8, &mut buffer); + assert_eq!(0x0_0000_0020, u64::from_le_bytes(buffer)); + + // If we mask the interrupts we shouldn't see any update + config.write_table(12, &u32::to_le_bytes(0x1)); + config.write_table(28, &u32::to_le_bytes(0x1)); + assert_eq!(vectors.update_cnt(0), 2); + assert_eq!(vectors.update_cnt(1), 2); + + // Un-masking them should update them + config.write_table(12, &u32::to_le_bytes(0x0)); + config.write_table(28, &u32::to_le_bytes(0x0)); + assert_eq!(vectors.update_cnt(0), 3); + assert_eq!(vectors.update_cnt(1), 3); + + // Setting up the same config should have no effect + config.write_table(12, &u32::to_le_bytes(0x0)); + config.write_table(28, &u32::to_le_bytes(0x0)); + assert_eq!(vectors.update_cnt(0), 3); + assert_eq!(vectors.update_cnt(1), 3); + } + + #[test] + #[should_panic] + fn test_table_access_write_too_big() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let buffer = [0u8; 16]; + + config.write_table(0, &buffer); + } + + #[test] + fn test_pba_read_too_big() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut buffer = [0u8; 16]; + + config.read_pba(0, &mut buffer); + assert_eq!(buffer, [0xff; 16]); + } + + #[test] + fn test_pba_invalid_offset() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + let mut buffer = [0u8; 8]; + + // Past the end of the PBA array + config.read_pba(128, &mut buffer); + assert_eq!(buffer, [0xffu8; 8]); + + // Invalid offset within a valid entry + let mut buffer = [0u8; 8]; + config.read_pba(3, &mut buffer[..4]); + assert_eq!(buffer[..4], [0xffu8; 4]); + config.read_pba(3, &mut buffer); + assert_eq!(buffer, [0xffu8; 8]); + } + + #[test] + #[should_panic] + fn test_set_pba_bit_vector_too_big() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + + config.set_pba_bit(2048, false); + } + + #[test] + #[should_panic] + fn test_get_pba_bit_vector_too_big() { + let vectors = Arc::new(MockInterrupt::new()); + let config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + + config.get_pba_bit(2048); + } + + #[test] + fn test_pba_bit_invalid_vector() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + + // We have two vectors, so setting the pending bit for the third one + // should be ignored + config.set_pba_bit(2, false); + assert_eq!(config.pba_entries[0], 0); + + // Same for getting the bit + assert_eq!(config.get_pba_bit(2), 0xff); } - pub fn table_range(&self) -> (u64, u64) { - // The table takes 16 bytes per entry. - let size = self.table_size() as u64 * 16; - (self.table_offset() as u64, size) + #[test] + fn test_pba_read() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(128, vectors.clone(), 0x42, None).unwrap(); + let mut buffer = [0u8; 8]; + + config.set_pba_bit(1, false); + assert_eq!(config.pba_entries[0], 2); + assert_eq!(config.pba_entries[1], 0); + config.read_pba(0, &mut buffer); + assert_eq!(0x2, u64::from_le_bytes(buffer)); + + let mut buffer = [0u8; 4]; + config.set_pba_bit(96, false); + assert_eq!(config.pba_entries[0], 2); + assert_eq!(config.pba_entries[1], 0x1_0000_0000); + config.read_pba(8, &mut buffer); + assert_eq!(0x0, u32::from_le_bytes(buffer)); + config.read_pba(12, &mut buffer); + assert_eq!(0x1, u32::from_le_bytes(buffer)); } - pub fn pba_range(&self) -> (u64, u64) { - // The table takes 1 bit per entry modulo 8 bytes. - let size = ((self.table_size() as u64 / 64) + 1) * 8; - (self.pba_offset() as u64, size) + #[test] + fn test_pending_interrupt() { + let vectors = Arc::new(MockInterrupt::new()); + let mut config = MsixConfig::new(2, vectors.clone(), 0x42, None).unwrap(); + config.set_pba_bit(1, false); + assert_eq!(config.get_pba_bit(1), 1); + // Enable MSI-X vector and unmask interrupts + config.set_msg_ctl(0x8000); + + // Individual vectors are still masked, so no change + assert_eq!(vectors.interrupt_cnt(0), 0); + assert_eq!(vectors.interrupt_cnt(1), 0); + + // Enable all vectors + config.write_table(8, &u64::to_le_bytes(0x0_0000_0020)); + config.write_table(24, &u64::to_le_bytes(0x0_0000_0020)); + + // Vector one had a pending bit, so we must have triggered an interrupt for it + // and cleared the pending bit + assert_eq!(vectors.interrupt_cnt(0), 0); + assert_eq!(vectors.interrupt_cnt(1), 1); + assert_eq!(config.get_pba_bit(1), 0); + + // Check that interrupt is sent as well for enabled vectors once we unmask from + // Message Control + + // Mask vectors and set pending bit for vector 0 + config.set_msg_ctl(0xc000); + config.set_pba_bit(0, false); + assert_eq!(vectors.interrupt_cnt(0), 0); + assert_eq!(vectors.interrupt_cnt(1), 1); + + // Unmask them + config.set_msg_ctl(0x8000); + assert_eq!(vectors.interrupt_cnt(0), 1); + assert_eq!(vectors.interrupt_cnt(1), 1); + assert_eq!(config.get_pba_bit(0), 0); } } diff --git a/src/vmm/src/devices/virtio/transport/pci/device.rs b/src/vmm/src/devices/virtio/transport/pci/device.rs index 9daf88201ac..038264bb417 100644 --- a/src/vmm/src/devices/virtio/transport/pci/device.rs +++ b/src/vmm/src/devices/virtio/transport/pci/device.rs @@ -7,7 +7,6 @@ // // SPDX-License-Identifier: Apache-2.0 AND BSD-3-Clause -use std::any::Any; use std::cmp; use std::collections::HashMap; use std::fmt::{Debug, Formatter}; @@ -784,7 +783,7 @@ impl VirtioInterrupt for VirtioInterruptMsix { // device should not inject the interrupt. // Instead, the Pending Bit Array table is updated to reflect there // is a pending interrupt for this specific vector. - if config.masked() || entry.masked() { + if config.masked || entry.masked() { config.set_pba_bit(vector, false); return Ok(()); } @@ -870,7 +869,7 @@ impl PciDevice for VirtioPciDevice { &mut self, mmio32_allocator: &mut AddressAllocator, mmio64_allocator: &mut AddressAllocator, - ) -> std::result::Result, PciDeviceError> { + ) -> std::result::Result<(), PciDeviceError> { let device_clone = self.device.clone(); let device = device_clone.lock().unwrap(); @@ -905,25 +904,6 @@ impl PciDevice for VirtioPciDevice { self.add_pci_capabilities()?; self.bar_region = bar; - Ok(vec![bar]) - } - - fn free_bars( - &mut self, - mmio32_allocator: &mut AddressAllocator, - mmio64_allocator: &mut AddressAllocator, - ) -> std::result::Result<(), PciDeviceError> { - assert_eq!( - self.bar_region.region_type, - PciBarRegionType::Memory64BitRegion - ); - - let range = RangeInclusive::new( - self.bar_region.addr, - self.bar_region.addr + self.bar_region.size, - ) - .unwrap(); - mmio64_allocator.free(&range); Ok(()) } @@ -1083,14 +1063,6 @@ impl PciDevice for VirtioPciDevice { None } - - fn id(&self) -> Option { - Some(self.id.clone()) - } - - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } } impl BusDevice for VirtioPciDevice {