diff --git a/kvm-bindings/src/lib.rs b/kvm-bindings/src/lib.rs index 35757532..2016c194 100644 --- a/kvm-bindings/src/lib.rs +++ b/kvm-bindings/src/lib.rs @@ -27,3 +27,7 @@ pub use self::arm64::*; mod riscv64; #[cfg(target_arch = "riscv64")] pub use self::riscv64::*; + +// linux defines these based on _BITUL macros and bindgen fails to generate them +pub const KVM_DIRTY_GFN_F_DIRTY: u32 = 0b1; +pub const KVM_DIRTY_GFN_F_RESET: u32 = 0b10; diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md index fd17f5f2..04984c8e 100644 --- a/kvm-ioctls/CHANGELOG.md +++ b/kvm-ioctls/CHANGELOG.md @@ -2,6 +2,24 @@ ## Upcoming Release +### Fixed + +- Fixed `VmFd::enable_cap` available for all architectures + +### Added + +- Added `KvmDirtyLogRing` structure to mmap the dirty log ring. +- Added `KVM_DIRTY_GFN_F_DIRTY` and `KVM_DIRTY_GFN_F_RESET` bitflags. +- Added `KvmDirtyLogRing` iterator type for accessing dirty log entries. +- Added `dirty_log_ring` field to `VcpuFd` to access per-vCpu dirty rings. +- Inserted fences in KvmDirtyLogRing iterator `next` for architectures with weak memory consistency that require Acquire/Release +- Added `DirtyLogRingInfo` struct and `dirty_log_ring_info` field to `VmFd` to + track dirty ring configuration. +- Added `enable_dirty_log_ring` function on `VmFd` to check corresponding + capabilities and enable KVM's dirty log ring. +- Added `VcpuFd::dirty_log_ring_iter()` to iterate over dirty guest frame numbers. +- Added `VmFd::reset_dirty_rings()` to reset all dirty rings for the VM. + - Plumb through KVM_CAP_DIRTY_LOG_RING as DirtyLogRing cap. ## v0.24.0 diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs index 22cd6067..086a2979 100644 --- a/kvm-ioctls/src/ioctls/mod.rs +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -8,9 +8,11 @@ use std::mem::size_of; use std::os::unix::io::AsRawFd; use std::ptr::{NonNull, null_mut}; +use std::sync::atomic::{Ordering, fence}; use kvm_bindings::{ - KVM_COALESCED_MMIO_PAGE_OFFSET, kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, + KVM_COALESCED_MMIO_PAGE_OFFSET, KVM_DIRTY_GFN_F_DIRTY, KVM_DIRTY_GFN_F_RESET, + KVM_DIRTY_LOG_PAGE_OFFSET, kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_dirty_gfn, kvm_run, }; use vmm_sys_util::errno; @@ -29,6 +31,121 @@ pub mod vm; /// is otherwise a direct mapping to Result. pub type Result = std::result::Result; +/// A wrapper around the KVM dirty log ring page. +#[derive(Debug)] +pub(crate) struct KvmDirtyLogRing { + /// Next potentially dirty guest frame number slot index + next_dirty: u64, + /// Memory-mapped array of dirty guest frame number entries + gfns: NonNull, + /// Ring size mask (size-1) for efficient modulo operations + mask: u64, + /// `true` if we need to use Acquire/Release memory ordering + use_acq_rel: bool, +} + +impl KvmDirtyLogRing { + /// Maps the KVM dirty log ring from the vCPU file descriptor. + /// + /// # Arguments + /// * `fd` - vCPU file descriptor to mmap from. + /// * `size` - Size of memory region in bytes. + pub(crate) fn mmap_from_fd( + fd: &F, + bytes: usize, + use_acq_rel: bool, + ) -> Result { + // SAFETY: We trust the sysconf libc function and we're calling it + // with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + let offset = page_size * KVM_DIRTY_LOG_PAGE_OFFSET as usize; + + if bytes % std::mem::size_of::() != 0 { + // Size of dirty ring in bytes must be multiples of slot size + return Err(errno::Error::new(libc::EINVAL)); + } + let slots = bytes / std::mem::size_of::(); + if !slots.is_power_of_two() { + // Number of slots must be power of two + return Err(errno::Error::new(libc::EINVAL)); + } + + // SAFETY: KVM guarantees that there is a page at offset + // KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE if the appropriate + // capability is available. If it is not, the call will simply + // fail. + let gfns = unsafe { + NonNull::::new(libc::mmap( + null_mut(), + bytes, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset as i64, + ) as *mut kvm_dirty_gfn) + .filter(|addr| addr.as_ptr() != libc::MAP_FAILED as *mut kvm_dirty_gfn) + .ok_or_else(errno::Error::last)? + }; + Ok(Self { + next_dirty: 0, + gfns, + mask: (slots - 1) as u64, + use_acq_rel, + }) + } +} + +impl Drop for KvmDirtyLogRing { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the page ourselves, and nobody + // else is holding a reference to it. + unsafe { + libc::munmap( + self.gfns.as_ptr().cast(), + (self.mask + 1) as usize * std::mem::size_of::(), + ); + } + } +} + +impl Iterator for KvmDirtyLogRing { + type Item = (u32, u64); + fn next(&mut self) -> Option { + let i = self.next_dirty & self.mask; + // SAFETY: i is not larger than mask, thus is a valid offset into self.gfns, + // therefore this operation produces a valid pointer to a kvm_dirty_gfn + let gfn_ptr = unsafe { self.gfns.add(i as usize).as_ptr() }; + + if self.use_acq_rel { + fence(Ordering::Acquire); + } + + // SAFETY: Can read a valid pointer to a kvm_dirty_gfn + let gfn = unsafe { gfn_ptr.read_volatile() }; + + if gfn.flags & KVM_DIRTY_GFN_F_DIRTY == 0 { + // next_dirty stays the same, it will become the next dirty element + None + } else { + self.next_dirty += 1; + let mut updated_gfn = gfn; + updated_gfn.flags ^= KVM_DIRTY_GFN_F_RESET; + // SAFETY: Can write to a valid pointer to a kvm_dirty_gfn + unsafe { + gfn_ptr.write_volatile(updated_gfn); + }; + if self.use_acq_rel { + fence(Ordering::Release); + } + Some((gfn.slot, gfn.offset)) + } + } +} + /// A wrapper around the coalesced MMIO ring page. #[derive(Debug)] pub(crate) struct KvmCoalescedIoRing { diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs index 6efaa465..66738f30 100644 --- a/kvm-ioctls/src/ioctls/vcpu.rs +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -16,7 +16,7 @@ use libc::EINVAL; use std::fs::File; use std::os::unix::io::{AsRawFd, RawFd}; -use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result}; +use crate::ioctls::{KvmCoalescedIoRing, KvmDirtyLogRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref}; @@ -197,6 +197,9 @@ pub struct VcpuFd { kvm_run_ptr: KvmRunWrapper, /// A pointer to the coalesced MMIO page coalesced_mmio_ring: Option, + /// A pointer to the dirty log ring + #[allow(unused)] + dirty_log_ring: Option, } /// KVM Sync Registers used to tell KVM which registers to sync @@ -2047,6 +2050,37 @@ impl VcpuFd { } } + /// Gets the dirty log ring iterator if one is mapped. + /// + /// Returns an iterator over dirty guest frame numbers as (slot, offset) tuples. + /// Returns `None` if no dirty log ring has been mapped. + /// + /// # Returns + /// + /// An optional iterator over the dirty log ring entries. + /// + /// # Example + /// + /// ```no_run + /// # use kvm_ioctls::Kvm; + /// # use kvm_ioctls::Cap; + /// let kvm = Kvm::new().unwrap(); + /// let mut vm = kvm.create_vm().unwrap(); + /// vm.enable_dirty_log_ring(None).unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::DirtyLogRing) { + /// if let Some(mut iter) = vcpu.dirty_log_ring_iter() { + /// for (slot, offset) in iter { + /// println!("Dirty page in slot {} at offset {}", slot, offset); + /// } + /// } + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn dirty_log_ring_iter(&mut self) -> Option> { + self.dirty_log_ring.as_mut() + } + /// Maps the coalesced MMIO ring page. This allows reading entries from /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read). /// @@ -2102,11 +2136,16 @@ impl VcpuFd { /// This should not be exported as a public function because the preferred way is to use /// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because /// then it would be exported with the public `VcpuFd` interface. -pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd { +pub fn new_vcpu( + vcpu: File, + kvm_run_ptr: KvmRunWrapper, + dirty_log_ring: Option, +) -> VcpuFd { VcpuFd { vcpu, kvm_run_ptr, coalesced_mmio_ring: None, + dirty_log_ring, } } @@ -2777,6 +2816,144 @@ mod tests { } } + #[cfg(target_arch = "x86_64")] + #[test] + fn test_run_code_dirty_log_ring() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + + // Enable dirty log ring + let need_bitmap = vm.enable_dirty_log_ring(None).unwrap(); + + // This example is based on https://lwn.net/Articles/658511/ + #[rustfmt::skip] + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, %dx */ + 0xec, /* in %dx, %al */ + 0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/ + 0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/ + 0xc6, 0x06, 0x00, 0x20, 0x00, /* movl $0, (0x2000); Dirty one page in guest mem. */ + 0xf4, /* hlt */ + ]; + let expected_rips: [u64; 3] = [0x1003, 0x1005, 0x1007]; + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + + let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + + let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + // Set the Instruction Pointer to the guest address where we loaded the code. + vcpu_regs.rip = guest_addr; + vcpu_regs.rax = 2; + vcpu_regs.rbx = 3; + vcpu_regs.rflags = 2; + vcpu_fd.set_regs(&vcpu_regs).unwrap(); + + let mut debug_struct = kvm_guest_debug { + control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, + pad: 0, + arch: kvm_guest_debug_arch { + debugreg: [0, 0, 0, 0, 0, 0, 0, 0], + }, + }; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + + let mut instr_idx = 0; + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::IoIn(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + } + VcpuExit::IoOut(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + assert_eq!(data[0], b'5'); + } + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + assert_eq!(data[0], 0); + } + VcpuExit::Debug(debug) => { + if instr_idx == expected_rips.len() - 1 { + // Disabling debugging/single-stepping + debug_struct.control = 0; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + } else if instr_idx >= expected_rips.len() { + unreachable!(); + } + let vcpu_regs = vcpu_fd.get_regs().unwrap(); + assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]); + assert_eq!(debug.exception, 1); + assert_eq!(debug.pc, expected_rips[instr_idx]); + // Check first 15 bits of DR6 + let mask = (1 << 16) - 1; + assert_eq!(debug.dr6 & mask, 0b100111111110000); + // Bit 10 in DR7 is always 1 + assert_eq!(debug.dr7, 1 << 10); + instr_idx += 1; + } + VcpuExit::Hlt => { + // The code snippet dirties 2 pages: + // * one when the code itself is loaded in memory; + // * and one more from the `movl` that writes to address 0x8000 + + let dirty_pages: u32 = + u32::try_from(vcpu_fd.dirty_log_ring_iter().unwrap().count()).unwrap() + + if need_bitmap { + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum() + } else { + 0 + }; + assert_eq!(dirty_pages, 2); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + #[test] #[cfg(target_arch = "aarch64")] fn test_get_preferred_target() { diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 217465d8..10291e04 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -18,11 +18,10 @@ use crate::ioctls::device::DeviceFd; use crate::ioctls::device::new_device; use crate::ioctls::vcpu::VcpuFd; use crate::ioctls::vcpu::new_vcpu; -use crate::ioctls::{KvmRunWrapper, Result}; +use crate::ioctls::{KvmDirtyLogRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; -#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use vmm_sys_util::ioctl::ioctl; #[cfg(target_arch = "x86_64")] use vmm_sys_util::ioctl::ioctl_with_mut_ptr; @@ -54,11 +53,21 @@ impl From for u64 { } } +/// Information about dirty log ring configuration. +#[derive(Debug)] +struct DirtyLogRingInfo { + /// Size of dirty ring in bytes. + bytes: usize, + /// Whether to use acquire/release semantics. + acq_rel: bool, +} + /// Wrapper over KVM VM ioctls. #[derive(Debug)] pub struct VmFd { vm: File, run_size: usize, + dirty_log_ring_info: Option, } impl VmFd { @@ -1207,7 +1216,17 @@ impl VmFd { let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; - Ok(new_vcpu(vcpu, kvm_run_ptr)) + let dirty_log_ring = if let Some(info) = &self.dirty_log_ring_info { + Some(KvmDirtyLogRing::mmap_from_fd( + &vcpu, + info.bytes, + info.acq_rel, + )?) + } else { + None + }; + + Ok(new_vcpu(vcpu, kvm_run_ptr, dirty_log_ring)) } /// Creates a VcpuFd object from a vcpu RawFd. @@ -1243,7 +1262,16 @@ impl VmFd { // SAFETY: we trust the kernel and verified parameters let vcpu = unsafe { File::from_raw_fd(fd) }; let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; - Ok(new_vcpu(vcpu, kvm_run_ptr)) + let dirty_log_ring = if let Some(info) = &self.dirty_log_ring_info { + Some(KvmDirtyLogRing::mmap_from_fd( + &vcpu, + info.bytes, + info.acq_rel, + )?) + } else { + None + }; + Ok(new_vcpu(vcpu, kvm_run_ptr, dirty_log_ring)) } /// Creates an emulated device in the kernel. @@ -1379,9 +1407,9 @@ impl VmFd { /// // Because an IOAPIC supports 24 pins, that's the reason why this test /// // picked this number as reference. /// cap.args[0] = 24; + /// #[cfg(target_arch = "x86_64")] /// vm.enable_cap(&cap).unwrap(); /// ``` - #[cfg(any(target_arch = "x86_64", target_arch = "s390x", target_arch = "powerpc"))] pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { // SAFETY: The ioctl is safe because we allocated the struct and we know the // kernel will write exactly the size of the struct. @@ -1908,6 +1936,131 @@ impl VmFd { Ok(()) } + /// Enables KVM's dirty log ring for new vCPUs created on this VM. Checks required capabilities and returns + /// a boolean `use_bitmap` as a result. `use_bitmap` is `true` if the ring needs to be used + /// together with a backup bitmap `KVM_GET_DIRTY_LOG`. Takes optional dirty ring size as bytes, if not supplied, will + /// use maximum supported dirty ring size. The size needs to be multiple of `std::mem::size_of::()` + /// and power of two. Enabling the dirty log ring is only allowed before any vCPU was created on the VmFd. + /// + /// # Arguments + /// + /// * `bytes` - Size of the dirty log ring in bytes. + /// + /// # Example + /// + /// ``` + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ KVM_CAP_DIRTY_LOG_RING, }; + /// + /// let kvm = Kvm::new().unwrap(); + /// let mut vm = kvm.create_vm().unwrap(); + /// let max_supported_size = vm.check_extension_raw(KVM_CAP_DIRTY_LOG_RING.into()); + /// vm.enable_dirty_log_ring(Some(max_supported_size)); + /// // Create one vCPU with the ID=0 to cause a dirty log ring to be mapped. + /// let vcpu = vm.create_vcpu(0); + /// ``` + pub fn enable_dirty_log_ring(&mut self, bytes: Option) -> Result { + // Check if requested size is larger than 0 + if let Some(sz) = bytes { + if sz <= 0 + || !(sz as u32).is_power_of_two() + || (sz as usize % std::mem::size_of::() != 0) + { + return Err(errno::Error::new(libc::EINVAL)); + } + } + + let (dirty_ring_cap, max_bytes, use_bitmap) = { + // Check if KVM_CAP_DIRTY_LOG_RING_ACQ_REL is available, enable if possible + let acq_rel_sz = self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING_ACQ_REL.into()); + if acq_rel_sz > 0 { + if self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP.into()) != 0 { + (KVM_CAP_DIRTY_LOG_RING_ACQ_REL, acq_rel_sz, true) + } else { + (KVM_CAP_DIRTY_LOG_RING_ACQ_REL, acq_rel_sz, false) + } + } else { + let sz = self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING.into()); + if sz > 0 { + (KVM_CAP_DIRTY_LOG_RING, sz, false) + } else { + (0, 0, false) + } + } + }; + + if dirty_ring_cap == 0 { + // Neither KVM_CAP_DIRTY_LOG_RING nor KVM_CAP_DIRTY_LOG_RING_ACQ_REL are available + return Err(errno::Error::new(libc::EOPNOTSUPP)); + } + + let cap_ring_size = bytes.unwrap_or(max_bytes); + + // Check if supplied size is larger than what the kernel supports + if cap_ring_size > max_bytes { + return Err(errno::Error::new(libc::EINVAL)); + } + + // Enable dirty rings with _ACQ_REL if supported, or without otherwise + let ar_ring_cap = kvm_enable_cap { + cap: dirty_ring_cap, + args: [cap_ring_size as u64, 0, 0, 0], + ..Default::default() + }; + let use_acq_rel = dirty_ring_cap == KVM_CAP_DIRTY_LOG_RING_ACQ_REL; + + // Enable the ring cap first + self.enable_cap(&ar_ring_cap)?; + + if use_bitmap { + let with_bitmap_cap = kvm_enable_cap { + cap: KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP, + ..Default::default() + }; + + // Enable backup bitmap + self.enable_cap(&with_bitmap_cap)?; + } + + self.dirty_log_ring_info = Some(DirtyLogRingInfo { + bytes: cap_ring_size as usize, + acq_rel: use_acq_rel, + }); + + Ok(use_bitmap) + } + + /// Resets all vCPU's dirty log rings. This notifies the kernel that pages have been harvested + /// from the dirty ring and the corresponding pages can be reprotected. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Cap, Kvm}; + /// let kvm = Kvm::new().unwrap(); + /// let mut vm = kvm.create_vm().unwrap(); + /// if kvm.check_extension(Cap::DirtyLogRing) { + /// vm.enable_dirty_log_ring(None).unwrap(); + /// // Create one vCPU with the ID=0 to cause a dirty log ring to be mapped. + /// let vcpu = vm.create_vcpu(0); + /// vm.reset_dirty_rings().unwrap(); + /// } + /// ``` + /// + pub fn reset_dirty_rings(&self) -> Result { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + let ret = unsafe { ioctl(self, KVM_RESET_DIRTY_RINGS()) }; + if ret < 0 { + Err(errno::Error::last()) + } else { + Ok(ret) + } + } + /// Sets a specified piece of vm configuration and/or state. /// /// See the documentation for `KVM_SET_DEVICE_ATTR` in @@ -2004,7 +2157,11 @@ impl VmFd { /// `create_vm` from `Kvm`. The function cannot be part of the `VmFd` implementation because /// then it would be exported with the public `VmFd` interface. pub fn new_vmfd(vm: File, run_size: usize) -> VmFd { - VmFd { vm, run_size } + VmFd { + vm, + run_size, + dirty_log_ring_info: None, + } } impl AsRawFd for VmFd { @@ -2594,6 +2751,7 @@ mod tests { let faulty_vm_fd = VmFd { vm: unsafe { File::from_raw_fd(-2) }, run_size: 0, + dirty_log_ring_info: None, }; let invalid_mem_region = kvm_userspace_memory_region { @@ -2900,4 +3058,73 @@ mod tests { vm.has_device_attr(&dist_attr).unwrap(); vm.set_device_attr(&dist_attr).unwrap(); } + + #[test] + fn test_enable_dirty_log_rings() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + vm.enable_dirty_log_ring(None).unwrap(); + // Create two vCPUs to cause two dirty log rings to be mapped. + let _vcpu0 = vm.create_vcpu(0).unwrap(); + let _vcpu1 = vm.create_vcpu(1).unwrap(); + vm.reset_dirty_rings().unwrap(); + } + } + + #[test] + fn test_enable_dirty_log_rings_sized() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + let max_supported_size = vm.check_extension_raw(KVM_CAP_DIRTY_LOG_RING.into()); + let size = std::cmp::max(max_supported_size / 2, size_of::() as i32); + vm.enable_dirty_log_ring(Some(size)).unwrap(); + // Create two vCPUs to cause two dirty log rings to be mapped. + let _vcpu0 = vm.create_vcpu(0).unwrap(); + let _vcpu1 = vm.create_vcpu(1).unwrap(); + vm.reset_dirty_rings().unwrap(); + } + } + + #[test] + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + fn test_enable_dirty_log_rings_acq_rel() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + vm.enable_dirty_log_ring(None).unwrap(); + + // Manually enable Acq/Rel + vm.dirty_log_ring_info = vm + .dirty_log_ring_info + .map(|i| DirtyLogRingInfo { acq_rel: true, ..i }); + + // Create two vCPUs to cause two dirty log rings to be mapped. + let _vcpu0 = vm.create_vcpu(0).unwrap(); + let _vcpu1 = vm.create_vcpu(1).unwrap(); + + // Reset dirty rings + vm.reset_dirty_rings().unwrap(); + } + } + + #[test] + fn test_illegal_dirty_ring() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + // Create one vCPU without dirty log ring + let _vcpu0 = vm.create_vcpu(0).unwrap(); + + // Not allowed after vCPU has been created + vm.enable_dirty_log_ring(None).unwrap_err(); + + // Create another vCPU + let _vcpu1 = vm.create_vcpu(1).unwrap(); + + // Dirty ring should not be enabled + vm.reset_dirty_rings().unwrap_err(); + } + } } diff --git a/kvm-ioctls/src/kvm_ioctls.rs b/kvm-ioctls/src/kvm_ioctls.rs index 89117d3f..4920648c 100644 --- a/kvm-ioctls/src/kvm_ioctls.rs +++ b/kvm-ioctls/src/kvm_ioctls.rs @@ -221,9 +221,10 @@ ioctl_io_nr!(KVM_SET_TSC_KHZ, KVMIO, 0xa2); /* Available with KVM_CAP_GET_TSC_KHZ */ #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_GET_TSC_KHZ, KVMIO, 0xa3); +/* Available with KVM_CAP_DIRTY_LOG_RING */ +ioctl_io_nr!(KVM_RESET_DIRTY_RINGS, KVMIO, 0xc7); /* Available with KVM_CAP_ENABLE_CAP */ -#[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] ioctl_iow_nr!(KVM_ENABLE_CAP, KVMIO, 0xa3, kvm_enable_cap); /* Available with KVM_CAP_SIGNAL_MSI */ #[cfg(any(