From 1653881273df238bff295a5a04a3aed2369ca1c5 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Tue, 26 Aug 2025 14:05:30 +0200 Subject: [PATCH 1/9] cap: add DirtyLogRing cap The capability is used for the KVM dirty ring interface for tracking dirtied pages. Signed-off-by: David Kleymann --- kvm-ioctls/CHANGELOG.md | 2 ++ kvm-ioctls/src/cap.rs | 1 + 2 files changed, 3 insertions(+) diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md index e5df7b76..fd17f5f2 100644 --- a/kvm-ioctls/CHANGELOG.md +++ b/kvm-ioctls/CHANGELOG.md @@ -2,6 +2,8 @@ ## Upcoming Release +- Plumb through KVM_CAP_DIRTY_LOG_RING as DirtyLogRing cap. + ## v0.24.0 ### Added diff --git a/kvm-ioctls/src/cap.rs b/kvm-ioctls/src/cap.rs index 67d4eb54..5b192cf0 100644 --- a/kvm-ioctls/src/cap.rs +++ b/kvm-ioctls/src/cap.rs @@ -169,4 +169,5 @@ pub enum Cap { NestedState = KVM_CAP_NESTED_STATE, #[cfg(target_arch = "x86_64")] X2ApicApi = KVM_CAP_X2APIC_API, + DirtyLogRing = KVM_CAP_DIRTY_LOG_RING, } From c96b8f134962a9c760cb12a12dae6374d807967d Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Tue, 7 Oct 2025 12:06:25 +0200 Subject: [PATCH 2/9] kvm-ioctls: Added KvmDirtyLogRing structure Added the KvmDirtyLogRing structure with an implementation of mmap_from_fd. Added the bitflags KVM_DIRTY_GFN_F_DIRTY and KVM_DIRTY_GFN_F_RESET in kvm-bindings, because bindgen does not generate them from the kernel. Signed-off-by: David Kleymann --- kvm-bindings/src/lib.rs | 4 ++ kvm-ioctls/CHANGELOG.md | 6 +++ kvm-ioctls/src/ioctls/mod.rs | 101 ++++++++++++++++++++++++++++++++++- 3 files changed, 110 insertions(+), 1 deletion(-) diff --git a/kvm-bindings/src/lib.rs b/kvm-bindings/src/lib.rs index ec56e597..f3d99d39 100644 --- a/kvm-bindings/src/lib.rs +++ b/kvm-bindings/src/lib.rs @@ -39,3 +39,7 @@ pub use self::arm64::*; mod riscv64; #[cfg(target_arch = "riscv64")] pub use self::riscv64::*; + +// linux defines these based on _BITUL macros and bindgen fails to generate them +pub const KVM_DIRTY_GFN_F_DIRTY: u32 = 0b1; +pub const KVM_DIRTY_GFN_F_RESET: u32 = 0b10; diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md index fd17f5f2..62708da1 100644 --- a/kvm-ioctls/CHANGELOG.md +++ b/kvm-ioctls/CHANGELOG.md @@ -2,6 +2,12 @@ ## Upcoming Release +### Added + +- Added `KvmDirtyLogRing` structure to mmap the dirty log ring. +- Added `KVM_DIRTY_GFN_F_DIRTY` and `KVM_DIRTY_GFN_F_RESET` bitflags. + + - Plumb through KVM_CAP_DIRTY_LOG_RING as DirtyLogRing cap. ## v0.24.0 diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs index 22cd6067..d7bc5a1f 100644 --- a/kvm-ioctls/src/ioctls/mod.rs +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -10,7 +10,8 @@ use std::os::unix::io::AsRawFd; use std::ptr::{NonNull, null_mut}; use kvm_bindings::{ - KVM_COALESCED_MMIO_PAGE_OFFSET, kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_run, + KVM_COALESCED_MMIO_PAGE_OFFSET, KVM_DIRTY_GFN_F_DIRTY, KVM_DIRTY_GFN_F_RESET, + KVM_DIRTY_LOG_PAGE_OFFSET, kvm_coalesced_mmio, kvm_coalesced_mmio_ring, kvm_dirty_gfn, kvm_run, }; use vmm_sys_util::errno; @@ -29,6 +30,104 @@ pub mod vm; /// is otherwise a direct mapping to Result. pub type Result = std::result::Result; +/// A wrapper around the KVM dirty log ring page. +#[derive(Debug)] +pub(crate) struct KvmDirtyLogRing { + /// Next potentially dirty guest frame number slot index + next_dirty: u64, + /// Memory-mapped array of dirty guest frame number entries + gfns: NonNull, + /// Ring size mask (size-1) for efficient modulo operations + mask: u64, +} + +// SAFETY: TBD +unsafe impl Send for KvmDirtyLogRing {} +unsafe impl Sync for KvmDirtyLogRing {} +impl KvmDirtyLogRing { + /// Maps the KVM dirty log ring from the vCPU file descriptor. + /// + /// # Arguments + /// * `fd` - vCPU file descriptor to mmap from. + /// * `size` - Size of memory region in bytes. + pub(crate) fn mmap_from_fd(fd: &F, bytes: usize) -> Result { + // SAFETY: We trust the sysconf libc function and we're calling it + // with a correct parameter. + let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { + -1 => return Err(errno::Error::last()), + ps => ps as usize, + }; + + let offset = page_size * KVM_DIRTY_LOG_PAGE_OFFSET as usize; + + if bytes % std::mem::size_of::() != 0 { + // Size of dirty ring in bytes must be multiples of slot size + return Err(errno::Error::new(libc::EINVAL)); + } + let slots = bytes / std::mem::size_of::(); + if !slots.is_power_of_two() { + // Number of slots must be power of two + return Err(errno::Error::new(libc::EINVAL)); + } + + // SAFETY: KVM guarantees that there is a page at offset + // KVM_DIRTY_LOG_PAGE_OFFSET * PAGE_SIZE if the appropriate + // capability is available. If it is not, the call will simply + // fail. + let gfns = unsafe { + NonNull::::new(libc::mmap( + null_mut(), + bytes, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + fd.as_raw_fd(), + offset as i64, + ) as *mut kvm_dirty_gfn) + .filter(|addr| addr.as_ptr() != libc::MAP_FAILED as *mut kvm_dirty_gfn) + .ok_or_else(|| errno::Error::last())? + }; + return Ok(Self { + next_dirty: 0, + gfns, + mask: (slots - 1) as u64, + }); + } +} + +impl Drop for KvmDirtyLogRing { + fn drop(&mut self) { + // SAFETY: This is safe because we mmap the page ourselves, and nobody + // else is holding a reference to it. + unsafe { + libc::munmap( + self.gfns.as_ptr().cast(), + (self.mask + 1) as usize * std::mem::size_of::(), + ); + } + } +} + +impl Iterator for KvmDirtyLogRing { + type Item = (u32, u64); + fn next(&mut self) -> Option { + let i = self.next_dirty & self.mask; + unsafe { + let gfn_ptr = self.gfns.add(i as usize).as_ptr(); + let gfn = gfn_ptr.read_volatile(); + if gfn.flags & KVM_DIRTY_GFN_F_DIRTY == 0 { + // next_dirty stays the same, it will become the next dirty element + return None; + } else { + self.next_dirty += 1; + let mut updated_gfn = gfn; + updated_gfn.flags ^= KVM_DIRTY_GFN_F_RESET; + gfn_ptr.write_volatile(updated_gfn); + return Some((gfn.slot, gfn.offset)); + } + } + } +} + /// A wrapper around the coalesced MMIO ring page. #[derive(Debug)] pub(crate) struct KvmCoalescedIoRing { From ec0da8b4acbf4d2a83f30119639794f09a44b485 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Tue, 7 Oct 2025 13:05:50 +0200 Subject: [PATCH 3/9] kvm-ioctls: Implement dirty ring interface Added `dirty_log_ring_iter` to access iterator of dirty pages. Implement rest of dirty log ring interface, including `enable_dirty_log_ring` and subsequent automatic mapping of `KvmDirtyLogRing` for new vcpus. Signed-off-by: David Kleymann --- kvm-ioctls/CHANGELOG.md | 9 ++- kvm-ioctls/src/ioctls/vcpu.rs | 43 ++++++++++- kvm-ioctls/src/ioctls/vm.rs | 131 ++++++++++++++++++++++++++++++++-- kvm-ioctls/src/kvm_ioctls.rs | 2 + 4 files changed, 178 insertions(+), 7 deletions(-) diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md index 62708da1..839f81ea 100644 --- a/kvm-ioctls/CHANGELOG.md +++ b/kvm-ioctls/CHANGELOG.md @@ -6,7 +6,14 @@ - Added `KvmDirtyLogRing` structure to mmap the dirty log ring. - Added `KVM_DIRTY_GFN_F_DIRTY` and `KVM_DIRTY_GFN_F_RESET` bitflags. - +- Added `KvmDirtyLogRing` iterator type for accessing dirty log entries. +- Added `dirty_log_ring` field to `VcpuFd` to access per-vCpu dirty rings. +- Added `dirty_log_bytes` field to `VmFd` to automatically map correct size dirty + rings for vCpus as they are created. +- Added `enable_dirty_log_ring` function on `VmFd` to check corresponding + capabilities and enable KVM's dirty log ring. +- Added `VcpuFd::dirty_log_ring_iter()` to iterate over dirty guest frame numbers. +- Added `VmFd::reset_dirty_rings()` to reset all dirty rings for the VM. - Plumb through KVM_CAP_DIRTY_LOG_RING as DirtyLogRing cap. diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs index a1002aa5..0428babc 100644 --- a/kvm-ioctls/src/ioctls/vcpu.rs +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -16,7 +16,7 @@ use libc::EINVAL; use std::fs::File; use std::os::unix::io::{AsRawFd, RawFd}; -use crate::ioctls::{KvmCoalescedIoRing, KvmRunWrapper, Result}; +use crate::ioctls::{KvmCoalescedIoRing, KvmDirtyLogRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::ioctl::{ioctl, ioctl_with_mut_ref, ioctl_with_ref}; @@ -197,6 +197,9 @@ pub struct VcpuFd { kvm_run_ptr: KvmRunWrapper, /// A pointer to the coalesced MMIO page coalesced_mmio_ring: Option, + /// A pointer to the dirty log ring + #[allow(unused)] + dirty_log_ring: Option, } /// KVM Sync Registers used to tell KVM which registers to sync @@ -2104,6 +2107,37 @@ impl VcpuFd { } } + /// Gets the dirty log ring iterator if one is mapped. + /// + /// Returns an iterator over dirty guest frame numbers as (slot, offset) tuples. + /// Returns `None` if no dirty log ring has been mapped via [`map_dirty_log_ring`](VcpuFd::map_dirty_log_ring). + /// + /// # Returns + /// + /// An optional iterator over the dirty log ring entries. + /// + /// # Example + /// + /// ```no_run + /// # use kvm_ioctls::Kvm; + /// # use kvm_ioctls::Cap; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.enable_dirty_log_ring(None).unwrap(); + /// let mut vcpu = vm.create_vcpu(0).unwrap(); + /// if kvm.check_extension(Cap::DirtyLogRing) { + /// if let Some(mut iter) = vcpu.dirty_log_ring_iter() { + /// for (slot, offset) in iter { + /// println!("Dirty page in slot {} at offset {}", slot, offset); + /// } + /// } + /// } + /// ``` + #[cfg(target_arch = "x86_64")] + pub fn dirty_log_ring_iter(&mut self) -> Option> { + self.dirty_log_ring.as_mut() + } + /// Maps the coalesced MMIO ring page. This allows reading entries from /// the ring via [`coalesced_mmio_read()`](VcpuFd::coalesced_mmio_read). /// @@ -2159,11 +2193,16 @@ impl VcpuFd { /// This should not be exported as a public function because the preferred way is to use /// `create_vcpu` from `VmFd`. The function cannot be part of the `VcpuFd` implementation because /// then it would be exported with the public `VcpuFd` interface. -pub fn new_vcpu(vcpu: File, kvm_run_ptr: KvmRunWrapper) -> VcpuFd { +pub fn new_vcpu( + vcpu: File, + kvm_run_ptr: KvmRunWrapper, + dirty_log_ring: Option, +) -> VcpuFd { VcpuFd { vcpu, kvm_run_ptr, coalesced_mmio_ring: None, + dirty_log_ring: dirty_log_ring, } } diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 1b58c243..19ea6bca 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -18,7 +18,7 @@ use crate::ioctls::device::DeviceFd; use crate::ioctls::device::new_device; use crate::ioctls::vcpu::VcpuFd; use crate::ioctls::vcpu::new_vcpu; -use crate::ioctls::{KvmRunWrapper, Result}; +use crate::ioctls::{KvmDirtyLogRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; @@ -59,6 +59,7 @@ impl From for u64 { pub struct VmFd { vm: File, run_size: usize, + dirty_ring_bytes: usize, } impl VmFd { @@ -1214,7 +1215,15 @@ impl VmFd { let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; - Ok(new_vcpu(vcpu, kvm_run_ptr)) + let dirty_log_ring = { + if self.dirty_ring_bytes > 0 { + Some(KvmDirtyLogRing::mmap_from_fd(&vcpu, self.dirty_ring_bytes)?) + } else { + None + } + }; + + Ok(new_vcpu(vcpu, kvm_run_ptr, dirty_log_ring)) } /// Creates a VcpuFd object from a vcpu RawFd. @@ -1250,7 +1259,14 @@ impl VmFd { // SAFETY: we trust the kernel and verified parameters let vcpu = unsafe { File::from_raw_fd(fd) }; let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; - Ok(new_vcpu(vcpu, kvm_run_ptr)) + let dirty_log_ring = { + if self.dirty_ring_bytes > 0 { + Some(KvmDirtyLogRing::mmap_from_fd(&vcpu, self.dirty_ring_bytes)?) + } else { + None + } + }; + Ok(new_vcpu(vcpu, kvm_run_ptr, dirty_log_ring)) } /// Creates an emulated device in the kernel. @@ -1915,6 +1931,108 @@ impl VmFd { Ok(()) } + /// Enables KVM's dirty log ring for new vCPUs created on this VM. Checks required capabilities and returns + /// `true` if the ring needs to be used together with a backup bitmap `KVM_GET_DIRTY_LOG`. Takes optional + /// dirty ring size as bytes, if not supplied, will use maximum supported dirty ring size. Enabling the dirty + /// log ring is only allowed before any vCPU was created on the VmFd. + /// # Arguments + /// + /// * `bytes` - Size of the dirty log ring in bytes. Needs to be multiple of `std::mem::size_of::()` + /// and power of two. + #[cfg(target_arch = "x86_64")] + pub fn enable_dirty_log_ring(&self, bytes: Option) -> Result { + // Check if requested size is larger than 0 + if let Some(sz) = bytes { + if sz <= 0 + || !(sz as u32).is_power_of_two() + || (sz as usize % std::mem::size_of::() == 0) + { + return Err(errno::Error::new(libc::EINVAL)); + } + } + + let (dirty_ring_cap, max_bytes, bitmap) = { + // Check if KVM_CAP_DIRTY_LOG_RING_ACQ_REL is available, enable if possible + let acq_rel_sz = self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING_ACQ_REL.into()); + if acq_rel_sz > 0 { + if self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP.into()) != 0 { + (KVM_CAP_DIRTY_LOG_RING_ACQ_REL, acq_rel_sz, true) + } else { + (KVM_CAP_DIRTY_LOG_RING_ACQ_REL, acq_rel_sz, false) + } + } else { + let sz = self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING.into()); + if sz > 0 { + (KVM_CAP_DIRTY_LOG_RING, sz, false) + } else { + (0, 0, false) + } + } + }; + + if dirty_ring_cap == 0 { + // Neither KVM_CAP_DIRTY_LOG_RING nor KVM_CAP_DIRTY_LOG_RING_ACQ_REL are available + return Err(errno::Error::new(libc::EOPNOTSUPP)); + } + + let cap_ring_size = bytes.unwrap_or(max_bytes); + + // Check if supplied size is larger than what the kernel supports + if cap_ring_size > max_bytes { + return Err(errno::Error::new(libc::EINVAL)); + } + + // Enable dirty rings with _ACQ_REL if supported, or without otherwise + let ar_ring_cap = kvm_enable_cap { + cap: dirty_ring_cap, + args: [cap_ring_size as u64, 0, 0, 0], + ..Default::default() + }; + + // Enable the ring cap first + self.enable_cap(&ar_ring_cap)?; + + if bitmap { + let with_bitmap_cap = kvm_enable_cap { + cap: KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP, + ..Default::default() + }; + + // Enable backup bitmap + self.enable_cap(&with_bitmap_cap)?; + } + + Ok(bitmap) + } + + /// Resets all vCPU's dirty log rings. This notifies the kernel that pages have been harvested + /// from the dirty ring and the corresponding pages can be reprotected. + /// + /// # Example + /// + /// ```rust + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::{Cap, Kvm}; + /// let kvm = Kvm::new().unwrap(); + /// let vm = kvm.create_vm().unwrap(); + /// vm.enable_dirty_log_ring(None).unwrap(); + /// if kvm.check_extension(Cap::DirtyLogRing) { + /// vm.reset_dirty_rings().unwrap(); + /// } + /// ``` + /// + #[cfg(target_arch = "x86_64")] + pub fn reset_dirty_rings(&self) -> Result { + // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of + // the ones defined by kernel. + let ret = unsafe { ioctl(self, KVM_RESET_DIRTY_RINGS()) }; + if ret < 0 { + Err(errno::Error::last()) + } else { + Ok(ret) + } + } + /// Sets a specified piece of vm configuration and/or state. /// /// See the documentation for `KVM_SET_DEVICE_ATTR` in @@ -2011,7 +2129,11 @@ impl VmFd { /// `create_vm` from `Kvm`. The function cannot be part of the `VmFd` implementation because /// then it would be exported with the public `VmFd` interface. pub fn new_vmfd(vm: File, run_size: usize) -> VmFd { - VmFd { vm, run_size } + VmFd { + vm, + run_size, + dirty_ring_bytes: 0, + } } impl AsRawFd for VmFd { @@ -2601,6 +2723,7 @@ mod tests { let faulty_vm_fd = VmFd { vm: unsafe { File::from_raw_fd(-2) }, run_size: 0, + dirty_ring_bytes: 0, }; let invalid_mem_region = kvm_userspace_memory_region { diff --git a/kvm-ioctls/src/kvm_ioctls.rs b/kvm-ioctls/src/kvm_ioctls.rs index 43898ba3..b9620170 100644 --- a/kvm-ioctls/src/kvm_ioctls.rs +++ b/kvm-ioctls/src/kvm_ioctls.rs @@ -220,6 +220,8 @@ ioctl_io_nr!(KVM_SET_TSC_KHZ, KVMIO, 0xa2); /* Available with KVM_CAP_GET_TSC_KHZ */ #[cfg(target_arch = "x86_64")] ioctl_io_nr!(KVM_GET_TSC_KHZ, KVMIO, 0xa3); +/* Available with KVM_CAP_DIRTY_LOG_RING */ +ioctl_io_nr!(KVM_RESET_DIRTY_RINGS, KVMIO, 0xc7); /* Available with KVM_CAP_ENABLE_CAP */ #[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] From e533567c06791a535341b32b0757794c6cd55bc4 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Sat, 18 Oct 2025 21:50:05 +0200 Subject: [PATCH 4/9] kvm-ioctls: Enable VmFd enable_cap for all architectures Disable conditional compilation of enable_cap for VmFd, making it available on all architectures Signed-off-by: David Kleymann --- kvm-ioctls/CHANGELOG.md | 4 ++++ kvm-ioctls/src/ioctls/vm.rs | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md index 839f81ea..a6ba3469 100644 --- a/kvm-ioctls/CHANGELOG.md +++ b/kvm-ioctls/CHANGELOG.md @@ -2,6 +2,10 @@ ## Upcoming Release +### Fixed + +- Fixed `VmFd::enable_cap` available for all architectures + ### Added - Added `KvmDirtyLogRing` structure to mmap the dirty log ring. diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 19ea6bca..905f2a4c 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -1404,7 +1404,6 @@ impl VmFd { /// cap.args[0] = 24; /// vm.enable_cap(&cap).unwrap(); /// ``` - #[cfg(any(target_arch = "x86_64", target_arch = "s390x", target_arch = "powerpc"))] pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { // SAFETY: The ioctl is safe because we allocated the struct and we know the // kernel will write exactly the size of the struct. From 231dc89fd296daca9a9aa3ae91b2f493d8345bf1 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Sat, 18 Oct 2025 22:06:11 +0200 Subject: [PATCH 5/9] kvm-ioctls: Add memory fences to dirty ring iterator Inserted acquire/release memory fences in KvmDirtyLogRing iterator's next() method for architectures with weak memory consistency. This ensures proper synchronization when reading/writing dirty log entries shared with the kernel. Signed-off-by: David Kleymann --- kvm-ioctls/CHANGELOG.md | 5 +-- kvm-ioctls/src/ioctls/mod.rs | 45 ++++++++++++++++------- kvm-ioctls/src/ioctls/vcpu.rs | 4 +-- kvm-ioctls/src/ioctls/vm.rs | 67 ++++++++++++++++++++++------------- 4 files changed, 81 insertions(+), 40 deletions(-) diff --git a/kvm-ioctls/CHANGELOG.md b/kvm-ioctls/CHANGELOG.md index a6ba3469..04984c8e 100644 --- a/kvm-ioctls/CHANGELOG.md +++ b/kvm-ioctls/CHANGELOG.md @@ -12,8 +12,9 @@ - Added `KVM_DIRTY_GFN_F_DIRTY` and `KVM_DIRTY_GFN_F_RESET` bitflags. - Added `KvmDirtyLogRing` iterator type for accessing dirty log entries. - Added `dirty_log_ring` field to `VcpuFd` to access per-vCpu dirty rings. -- Added `dirty_log_bytes` field to `VmFd` to automatically map correct size dirty - rings for vCpus as they are created. +- Inserted fences in KvmDirtyLogRing iterator `next` for architectures with weak memory consistency that require Acquire/Release +- Added `DirtyLogRingInfo` struct and `dirty_log_ring_info` field to `VmFd` to + track dirty ring configuration. - Added `enable_dirty_log_ring` function on `VmFd` to check corresponding capabilities and enable KVM's dirty log ring. - Added `VcpuFd::dirty_log_ring_iter()` to iterate over dirty guest frame numbers. diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs index d7bc5a1f..0d1d30a1 100644 --- a/kvm-ioctls/src/ioctls/mod.rs +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -8,6 +8,7 @@ use std::mem::size_of; use std::os::unix::io::AsRawFd; use std::ptr::{NonNull, null_mut}; +use std::sync::atomic::{Ordering, fence}; use kvm_bindings::{ KVM_COALESCED_MMIO_PAGE_OFFSET, KVM_DIRTY_GFN_F_DIRTY, KVM_DIRTY_GFN_F_RESET, @@ -39,6 +40,8 @@ pub(crate) struct KvmDirtyLogRing { gfns: NonNull, /// Ring size mask (size-1) for efficient modulo operations mask: u64, + /// `true` if we need to use Acquire/Release memory ordering + use_acq_rel: bool, } // SAFETY: TBD @@ -50,7 +53,11 @@ impl KvmDirtyLogRing { /// # Arguments /// * `fd` - vCPU file descriptor to mmap from. /// * `size` - Size of memory region in bytes. - pub(crate) fn mmap_from_fd(fd: &F, bytes: usize) -> Result { + pub(crate) fn mmap_from_fd( + fd: &F, + bytes: usize, + use_acq_rel: bool, + ) -> Result { // SAFETY: We trust the sysconf libc function and we're calling it // with a correct parameter. let page_size = match unsafe { libc::sysconf(libc::_SC_PAGESIZE) } { @@ -90,6 +97,7 @@ impl KvmDirtyLogRing { next_dirty: 0, gfns, mask: (slots - 1) as u64, + use_acq_rel, }); } } @@ -111,19 +119,32 @@ impl Iterator for KvmDirtyLogRing { type Item = (u32, u64); fn next(&mut self) -> Option { let i = self.next_dirty & self.mask; - unsafe { - let gfn_ptr = self.gfns.add(i as usize).as_ptr(); - let gfn = gfn_ptr.read_volatile(); - if gfn.flags & KVM_DIRTY_GFN_F_DIRTY == 0 { - // next_dirty stays the same, it will become the next dirty element - return None; - } else { - self.next_dirty += 1; - let mut updated_gfn = gfn; - updated_gfn.flags ^= KVM_DIRTY_GFN_F_RESET; + // SAFETY: i is not larger than mask, thus is a valid offset into self.gfns, + // therefore this operation produces a valid pointer to a kvm_dirty_gfn + let gfn_ptr = unsafe { self.gfns.add(i as usize).as_ptr() }; + + if self.use_acq_rel { + fence(Ordering::Acquire); + } + + // SAFETY: Can read a valid pointer to a kvm_dirty_gfn + let gfn = unsafe { gfn_ptr.read_volatile() }; + + if gfn.flags & KVM_DIRTY_GFN_F_DIRTY == 0 { + // next_dirty stays the same, it will become the next dirty element + return None; + } else { + self.next_dirty += 1; + let mut updated_gfn = gfn; + updated_gfn.flags ^= KVM_DIRTY_GFN_F_RESET; + // SAFETY: Can write to a valid pointer to a kvm_dirty_gfn + unsafe { gfn_ptr.write_volatile(updated_gfn); - return Some((gfn.slot, gfn.offset)); + }; + if self.use_acq_rel { + fence(Ordering::Release); } + return Some((gfn.slot, gfn.offset)); } } } diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs index 0428babc..8a640a32 100644 --- a/kvm-ioctls/src/ioctls/vcpu.rs +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -2110,7 +2110,7 @@ impl VcpuFd { /// Gets the dirty log ring iterator if one is mapped. /// /// Returns an iterator over dirty guest frame numbers as (slot, offset) tuples. - /// Returns `None` if no dirty log ring has been mapped via [`map_dirty_log_ring`](VcpuFd::map_dirty_log_ring). + /// Returns `None` if no dirty log ring has been mapped. /// /// # Returns /// @@ -2122,7 +2122,7 @@ impl VcpuFd { /// # use kvm_ioctls::Kvm; /// # use kvm_ioctls::Cap; /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); + /// let mut vm = kvm.create_vm().unwrap(); /// vm.enable_dirty_log_ring(None).unwrap(); /// let mut vcpu = vm.create_vcpu(0).unwrap(); /// if kvm.check_extension(Cap::DirtyLogRing) { diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 905f2a4c..714f6051 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -54,12 +54,21 @@ impl From for u64 { } } +/// Information about dirty log ring configuration. +#[derive(Debug)] +struct DirtyLogRingInfo { + /// Size of dirty ring in bytes. + bytes: usize, + /// Whether to use acquire/release semantics. + acq_rel: bool, +} + /// Wrapper over KVM VM ioctls. #[derive(Debug)] pub struct VmFd { vm: File, run_size: usize, - dirty_ring_bytes: usize, + dirty_log_ring_info: Option, } impl VmFd { @@ -1215,12 +1224,14 @@ impl VmFd { let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; - let dirty_log_ring = { - if self.dirty_ring_bytes > 0 { - Some(KvmDirtyLogRing::mmap_from_fd(&vcpu, self.dirty_ring_bytes)?) - } else { - None - } + let dirty_log_ring = if let Some(info) = &self.dirty_log_ring_info { + Some(KvmDirtyLogRing::mmap_from_fd( + &vcpu, + info.bytes, + info.acq_rel, + )?) + } else { + None }; Ok(new_vcpu(vcpu, kvm_run_ptr, dirty_log_ring)) @@ -1259,12 +1270,14 @@ impl VmFd { // SAFETY: we trust the kernel and verified parameters let vcpu = unsafe { File::from_raw_fd(fd) }; let kvm_run_ptr = KvmRunWrapper::mmap_from_fd(&vcpu, self.run_size)?; - let dirty_log_ring = { - if self.dirty_ring_bytes > 0 { - Some(KvmDirtyLogRing::mmap_from_fd(&vcpu, self.dirty_ring_bytes)?) - } else { - None - } + let dirty_log_ring = if let Some(info) = &self.dirty_log_ring_info { + Some(KvmDirtyLogRing::mmap_from_fd( + &vcpu, + info.bytes, + info.acq_rel, + )?) + } else { + None }; Ok(new_vcpu(vcpu, kvm_run_ptr, dirty_log_ring)) } @@ -1931,15 +1944,15 @@ impl VmFd { } /// Enables KVM's dirty log ring for new vCPUs created on this VM. Checks required capabilities and returns - /// `true` if the ring needs to be used together with a backup bitmap `KVM_GET_DIRTY_LOG`. Takes optional - /// dirty ring size as bytes, if not supplied, will use maximum supported dirty ring size. Enabling the dirty - /// log ring is only allowed before any vCPU was created on the VmFd. + /// a boolean `use_bitmap` as a result. `use_bitmap` is `true` if the ring needs to be used + /// together with a backup bitmap `KVM_GET_DIRTY_LOG`. Takes optional dirty ring size as bytes, if not supplied, will + /// use maximum supported dirty ring size. Enabling the dirty log ring is only allowed before any vCPU was + /// created on the VmFd. /// # Arguments /// /// * `bytes` - Size of the dirty log ring in bytes. Needs to be multiple of `std::mem::size_of::()` /// and power of two. - #[cfg(target_arch = "x86_64")] - pub fn enable_dirty_log_ring(&self, bytes: Option) -> Result { + pub fn enable_dirty_log_ring(&mut self, bytes: Option) -> Result { // Check if requested size is larger than 0 if let Some(sz) = bytes { if sz <= 0 @@ -1950,7 +1963,7 @@ impl VmFd { } } - let (dirty_ring_cap, max_bytes, bitmap) = { + let (dirty_ring_cap, max_bytes, use_bitmap) = { // Check if KVM_CAP_DIRTY_LOG_RING_ACQ_REL is available, enable if possible let acq_rel_sz = self.check_extension_raw(KVM_CAP_DIRTY_LOG_RING_ACQ_REL.into()); if acq_rel_sz > 0 { @@ -1987,11 +2000,12 @@ impl VmFd { args: [cap_ring_size as u64, 0, 0, 0], ..Default::default() }; + let use_acq_rel = dirty_ring_cap == KVM_CAP_DIRTY_LOG_RING_ACQ_REL; // Enable the ring cap first self.enable_cap(&ar_ring_cap)?; - if bitmap { + if use_bitmap { let with_bitmap_cap = kvm_enable_cap { cap: KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP, ..Default::default() @@ -2001,7 +2015,12 @@ impl VmFd { self.enable_cap(&with_bitmap_cap)?; } - Ok(bitmap) + self.dirty_log_ring_info = Some(DirtyLogRingInfo { + bytes: cap_ring_size as usize, + acq_rel: use_acq_rel, + }); + + Ok(use_bitmap) } /// Resets all vCPU's dirty log rings. This notifies the kernel that pages have been harvested @@ -2013,7 +2032,7 @@ impl VmFd { /// # extern crate kvm_ioctls; /// # use kvm_ioctls::{Cap, Kvm}; /// let kvm = Kvm::new().unwrap(); - /// let vm = kvm.create_vm().unwrap(); + /// let mut vm = kvm.create_vm().unwrap(); /// vm.enable_dirty_log_ring(None).unwrap(); /// if kvm.check_extension(Cap::DirtyLogRing) { /// vm.reset_dirty_rings().unwrap(); @@ -2131,7 +2150,7 @@ pub fn new_vmfd(vm: File, run_size: usize) -> VmFd { VmFd { vm, run_size, - dirty_ring_bytes: 0, + dirty_log_ring_info: None, } } @@ -2722,7 +2741,7 @@ mod tests { let faulty_vm_fd = VmFd { vm: unsafe { File::from_raw_fd(-2) }, run_size: 0, - dirty_ring_bytes: 0, + dirty_log_ring_info: None, }; let invalid_mem_region = kvm_userspace_memory_region { From 483342aaca0b711e13bc9d75120804fe52d89d08 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Sat, 18 Oct 2025 22:19:14 +0200 Subject: [PATCH 6/9] kvm-ioctls: Enable KVM_ENABLE_CAP on all architectures Remove conditional compilation of KVM_ENABLE_CAP definition Signed-off-by: David Kleymann --- kvm-ioctls/src/ioctls/vm.rs | 1 + kvm-ioctls/src/kvm_ioctls.rs | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 714f6051..45a84bef 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -1415,6 +1415,7 @@ impl VmFd { /// // Because an IOAPIC supports 24 pins, that's the reason why this test /// // picked this number as reference. /// cap.args[0] = 24; + /// #[cfg(target_arch = "x86_64")] /// vm.enable_cap(&cap).unwrap(); /// ``` pub fn enable_cap(&self, cap: &kvm_enable_cap) -> Result<()> { diff --git a/kvm-ioctls/src/kvm_ioctls.rs b/kvm-ioctls/src/kvm_ioctls.rs index b9620170..d264551f 100644 --- a/kvm-ioctls/src/kvm_ioctls.rs +++ b/kvm-ioctls/src/kvm_ioctls.rs @@ -224,7 +224,6 @@ ioctl_io_nr!(KVM_GET_TSC_KHZ, KVMIO, 0xa3); ioctl_io_nr!(KVM_RESET_DIRTY_RINGS, KVMIO, 0xc7); /* Available with KVM_CAP_ENABLE_CAP */ -#[cfg(not(any(target_arch = "aarch64", target_arch = "riscv64")))] ioctl_iow_nr!(KVM_ENABLE_CAP, KVMIO, 0xa3, kvm_enable_cap); /* Available with KVM_CAP_SIGNAL_MSI */ #[cfg(any( From 79956e8128f87f5e8004b283c7ff2114f2ea02b2 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Mon, 20 Oct 2025 11:14:18 +0200 Subject: [PATCH 7/9] kvm-ioctls: Remove Send and Sync from KvmDirtyLogRing Remove Send and Sync from KvmDirtyLogRing Signed-off-by: David Kleymann --- kvm-ioctls/src/ioctls/mod.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs index 0d1d30a1..00f208e4 100644 --- a/kvm-ioctls/src/ioctls/mod.rs +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -44,9 +44,12 @@ pub(crate) struct KvmDirtyLogRing { use_acq_rel: bool, } -// SAFETY: TBD -unsafe impl Send for KvmDirtyLogRing {} -unsafe impl Sync for KvmDirtyLogRing {} +// // SAFETY: For each vcpu we only allow creating a single instance of the KvmDirtyLogRing, +// // therefore ownership can safely be transferred between threads (`Send`). +// unsafe impl Send for KvmDirtyLogRing {} + +// // SAFETY: TBD +// unsafe impl Sync for KvmDirtyLogRing {} impl KvmDirtyLogRing { /// Maps the KVM dirty log ring from the vCPU file descriptor. /// @@ -132,7 +135,7 @@ impl Iterator for KvmDirtyLogRing { if gfn.flags & KVM_DIRTY_GFN_F_DIRTY == 0 { // next_dirty stays the same, it will become the next dirty element - return None; + None } else { self.next_dirty += 1; let mut updated_gfn = gfn; @@ -144,7 +147,7 @@ impl Iterator for KvmDirtyLogRing { if self.use_acq_rel { fence(Ordering::Release); } - return Some((gfn.slot, gfn.offset)); + Some((gfn.slot, gfn.offset)) } } } From 1d09123935935b41a773ca1aeeeb59e6eb10d85f Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Mon, 20 Oct 2025 14:08:08 +0200 Subject: [PATCH 8/9] kvm-ioctls: Fix a bug in enable_dirty_log_ring and add test Fixes a bug in enable_dirty_log_ring that incorrectly checks if the size passed is a multiple of the element size. Adds a example / doctest that checks basic functionality of enable_dirty_log_ring Signed-off-by: David Kleymann --- kvm-ioctls/src/ioctls/mod.rs | 12 +-- kvm-ioctls/src/ioctls/vcpu.rs | 140 +++++++++++++++++++++++++++++++++- kvm-ioctls/src/ioctls/vm.rs | 100 ++++++++++++++++++++++-- 3 files changed, 235 insertions(+), 17 deletions(-) diff --git a/kvm-ioctls/src/ioctls/mod.rs b/kvm-ioctls/src/ioctls/mod.rs index 00f208e4..086a2979 100644 --- a/kvm-ioctls/src/ioctls/mod.rs +++ b/kvm-ioctls/src/ioctls/mod.rs @@ -44,12 +44,6 @@ pub(crate) struct KvmDirtyLogRing { use_acq_rel: bool, } -// // SAFETY: For each vcpu we only allow creating a single instance of the KvmDirtyLogRing, -// // therefore ownership can safely be transferred between threads (`Send`). -// unsafe impl Send for KvmDirtyLogRing {} - -// // SAFETY: TBD -// unsafe impl Sync for KvmDirtyLogRing {} impl KvmDirtyLogRing { /// Maps the KVM dirty log ring from the vCPU file descriptor. /// @@ -94,14 +88,14 @@ impl KvmDirtyLogRing { offset as i64, ) as *mut kvm_dirty_gfn) .filter(|addr| addr.as_ptr() != libc::MAP_FAILED as *mut kvm_dirty_gfn) - .ok_or_else(|| errno::Error::last())? + .ok_or_else(errno::Error::last)? }; - return Ok(Self { + Ok(Self { next_dirty: 0, gfns, mask: (slots - 1) as u64, use_acq_rel, - }); + }) } } diff --git a/kvm-ioctls/src/ioctls/vcpu.rs b/kvm-ioctls/src/ioctls/vcpu.rs index 8a640a32..628ce9ff 100644 --- a/kvm-ioctls/src/ioctls/vcpu.rs +++ b/kvm-ioctls/src/ioctls/vcpu.rs @@ -2202,7 +2202,7 @@ pub fn new_vcpu( vcpu, kvm_run_ptr, coalesced_mmio_ring: None, - dirty_log_ring: dirty_log_ring, + dirty_log_ring, } } @@ -2874,6 +2874,144 @@ mod tests { } } + #[cfg(target_arch = "x86_64")] + #[test] + fn test_run_code_dirty_log_ring() { + use std::io::Write; + + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + + // Enable dirty log ring + let need_bitmap = vm.enable_dirty_log_ring(None).unwrap(); + + // This example is based on https://lwn.net/Articles/658511/ + #[rustfmt::skip] + let code = [ + 0xba, 0xf8, 0x03, /* mov $0x3f8, %dx */ + 0x00, 0xd8, /* add %bl, %al */ + 0x04, b'0', /* add $'0', %al */ + 0xee, /* out %al, %dx */ + 0xec, /* in %dx, %al */ + 0xc6, 0x06, 0x00, 0x80, 0x00, /* movl $0, (0x8000); This generates a MMIO Write.*/ + 0x8a, 0x16, 0x00, 0x80, /* movl (0x8000), %dl; This generates a MMIO Read.*/ + 0xc6, 0x06, 0x00, 0x20, 0x00, /* movl $0, (0x2000); Dirty one page in guest mem. */ + 0xf4, /* hlt */ + ]; + let expected_rips: [u64; 3] = [0x1003, 0x1005, 0x1007]; + + let mem_size = 0x4000; + let load_addr = mmap_anonymous(mem_size).as_ptr(); + let guest_addr: u64 = 0x1000; + let slot: u32 = 0; + let mem_region = kvm_userspace_memory_region { + slot, + guest_phys_addr: guest_addr, + memory_size: mem_size as u64, + userspace_addr: load_addr as u64, + flags: KVM_MEM_LOG_DIRTY_PAGES, + }; + unsafe { + vm.set_user_memory_region(mem_region).unwrap(); + } + + unsafe { + // Get a mutable slice of `mem_size` from `load_addr`. + // This is safe because we mapped it before. + let mut slice = std::slice::from_raw_parts_mut(load_addr, mem_size); + slice.write_all(&code).unwrap(); + } + + let mut vcpu_fd = vm.create_vcpu(0).unwrap(); + + let mut vcpu_sregs = vcpu_fd.get_sregs().unwrap(); + assert_ne!(vcpu_sregs.cs.base, 0); + assert_ne!(vcpu_sregs.cs.selector, 0); + vcpu_sregs.cs.base = 0; + vcpu_sregs.cs.selector = 0; + vcpu_fd.set_sregs(&vcpu_sregs).unwrap(); + + let mut vcpu_regs = vcpu_fd.get_regs().unwrap(); + // Set the Instruction Pointer to the guest address where we loaded the code. + vcpu_regs.rip = guest_addr; + vcpu_regs.rax = 2; + vcpu_regs.rbx = 3; + vcpu_regs.rflags = 2; + vcpu_fd.set_regs(&vcpu_regs).unwrap(); + + let mut debug_struct = kvm_guest_debug { + control: KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP, + pad: 0, + arch: kvm_guest_debug_arch { + debugreg: [0, 0, 0, 0, 0, 0, 0, 0], + }, + }; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + + let mut instr_idx = 0; + loop { + match vcpu_fd.run().expect("run failed") { + VcpuExit::IoIn(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + } + VcpuExit::IoOut(addr, data) => { + assert_eq!(addr, 0x3f8); + assert_eq!(data.len(), 1); + assert_eq!(data[0], b'5'); + } + VcpuExit::MmioRead(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + } + VcpuExit::MmioWrite(addr, data) => { + assert_eq!(addr, 0x8000); + assert_eq!(data.len(), 1); + assert_eq!(data[0], 0); + } + VcpuExit::Debug(debug) => { + if instr_idx == expected_rips.len() - 1 { + // Disabling debugging/single-stepping + debug_struct.control = 0; + vcpu_fd.set_guest_debug(&debug_struct).unwrap(); + } else if instr_idx >= expected_rips.len() { + unreachable!(); + } + let vcpu_regs = vcpu_fd.get_regs().unwrap(); + assert_eq!(vcpu_regs.rip, expected_rips[instr_idx]); + assert_eq!(debug.exception, 1); + assert_eq!(debug.pc, expected_rips[instr_idx]); + // Check first 15 bits of DR6 + let mask = (1 << 16) - 1; + assert_eq!(debug.dr6 & mask, 0b100111111110000); + // Bit 10 in DR7 is always 1 + assert_eq!(debug.dr7, 1 << 10); + instr_idx += 1; + } + VcpuExit::Hlt => { + // The code snippet dirties 2 pages: + // * one when the code itself is loaded in memory; + // * and one more from the `movl` that writes to address 0x8000 + + let dirty_pages: u32 = + u32::try_from(vcpu_fd.dirty_log_ring_iter().unwrap().count()).unwrap() + + if need_bitmap { + let dirty_pages_bitmap = vm.get_dirty_log(slot, mem_size).unwrap(); + dirty_pages_bitmap + .into_iter() + .map(|page| page.count_ones()) + .sum() + } else { + 0 + }; + assert_eq!(dirty_pages, 2); + break; + } + r => panic!("unexpected exit reason: {:?}", r), + } + } + } + #[test] #[cfg(target_arch = "aarch64")] fn test_get_preferred_target() { diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index 45a84bef..ced3ce38 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -1947,18 +1947,34 @@ impl VmFd { /// Enables KVM's dirty log ring for new vCPUs created on this VM. Checks required capabilities and returns /// a boolean `use_bitmap` as a result. `use_bitmap` is `true` if the ring needs to be used /// together with a backup bitmap `KVM_GET_DIRTY_LOG`. Takes optional dirty ring size as bytes, if not supplied, will - /// use maximum supported dirty ring size. Enabling the dirty log ring is only allowed before any vCPU was - /// created on the VmFd. + /// use maximum supported dirty ring size. The size needs to be multiple of `std::mem::size_of::()` + /// and power of two. Enabling the dirty log ring is only allowed before any vCPU was created on the VmFd. + /// /// # Arguments /// - /// * `bytes` - Size of the dirty log ring in bytes. Needs to be multiple of `std::mem::size_of::()` - /// and power of two. + /// * `bytes` - Size of the dirty log ring in bytes. + /// + /// # Example + /// + /// ``` + /// # extern crate kvm_bindings; + /// # extern crate kvm_ioctls; + /// # use kvm_ioctls::Kvm; + /// # use kvm_bindings::{ KVM_CAP_DIRTY_LOG_RING, }; + /// + /// let kvm = Kvm::new().unwrap(); + /// let mut vm = kvm.create_vm().unwrap(); + /// let max_supported_size = vm.check_extension_raw(KVM_CAP_DIRTY_LOG_RING.into()); + /// vm.enable_dirty_log_ring(Some(max_supported_size)); + /// // Create one vCPU with the ID=0 to cause a dirty log ring to be mapped. + /// let vcpu = vm.create_vcpu(0); + /// ``` pub fn enable_dirty_log_ring(&mut self, bytes: Option) -> Result { // Check if requested size is larger than 0 if let Some(sz) = bytes { if sz <= 0 || !(sz as u32).is_power_of_two() - || (sz as usize % std::mem::size_of::() == 0) + || (sz as usize % std::mem::size_of::() != 0) { return Err(errno::Error::new(libc::EINVAL)); } @@ -2034,13 +2050,14 @@ impl VmFd { /// # use kvm_ioctls::{Cap, Kvm}; /// let kvm = Kvm::new().unwrap(); /// let mut vm = kvm.create_vm().unwrap(); - /// vm.enable_dirty_log_ring(None).unwrap(); /// if kvm.check_extension(Cap::DirtyLogRing) { + /// vm.enable_dirty_log_ring(None).unwrap(); + /// // Create one vCPU with the ID=0 to cause a dirty log ring to be mapped. + /// let vcpu = vm.create_vcpu(0); /// vm.reset_dirty_rings().unwrap(); /// } /// ``` /// - #[cfg(target_arch = "x86_64")] pub fn reset_dirty_rings(&self) -> Result { // SAFETY: Safe because we know that our file is a KVM fd and that the request is one of // the ones defined by kernel. @@ -3049,4 +3066,73 @@ mod tests { vm.has_device_attr(&dist_attr).unwrap(); vm.set_device_attr(&dist_attr).unwrap(); } + + #[test] + fn test_enable_dirty_log_rings() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + vm.enable_dirty_log_ring(None).unwrap(); + // Create two vCPUs to cause two dirty log rings to be mapped. + let _vcpu0 = vm.create_vcpu(0).unwrap(); + let _vcpu1 = vm.create_vcpu(1).unwrap(); + vm.reset_dirty_rings().unwrap(); + } + } + + #[test] + fn test_enable_dirty_log_rings_sized() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + let max_supported_size = vm.check_extension_raw(KVM_CAP_DIRTY_LOG_RING.into()); + let size = std::cmp::max(max_supported_size / 2, size_of::() as i32); + vm.enable_dirty_log_ring(Some(size)).unwrap(); + // Create two vCPUs to cause two dirty log rings to be mapped. + let _vcpu0 = vm.create_vcpu(0).unwrap(); + let _vcpu1 = vm.create_vcpu(1).unwrap(); + vm.reset_dirty_rings().unwrap(); + } + } + + #[test] + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + fn test_enable_dirty_log_rings_acq_rel() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + vm.enable_dirty_log_ring(None).unwrap(); + + // Manually enable Acq/Rel + vm.dirty_log_ring_info = vm + .dirty_log_ring_info + .map(|i| DirtyLogRingInfo { acq_rel: true, ..i }); + + // Create two vCPUs to cause two dirty log rings to be mapped. + let _vcpu0 = vm.create_vcpu(0).unwrap(); + let _vcpu1 = vm.create_vcpu(1).unwrap(); + + // Reset dirty rings + vm.reset_dirty_rings().unwrap(); + } + } + + #[test] + fn test_illegal_dirty_ring() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + if kvm.check_extension(Cap::DirtyLogRing) { + // Create one vCPU without dirty log ring + let _vcpu0 = vm.create_vcpu(0).unwrap(); + + // Not allowed after vCPU has been created + vm.enable_dirty_log_ring(None).unwrap_err(); + + // Create another vCPU + let _vcpu1 = vm.create_vcpu(1).unwrap(); + + // Dirty ring should not be enabled + vm.reset_dirty_rings().unwrap_err(); + } + } } From d53749f16f4c27cc3c4843f9a3a786a543363d23 Mon Sep 17 00:00:00 2001 From: David Kleymann Date: Wed, 22 Oct 2025 12:28:21 +0200 Subject: [PATCH 9/9] kvm-ioctls: import ioctl on every architecture By importing ioctl on every architecture, as opposed to just aarch64 and x86_64, we enable support for dirty log rings on riscv64. Signed-off-by: David Kleymann --- kvm-ioctls/src/ioctls/vm.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/kvm-ioctls/src/ioctls/vm.rs b/kvm-ioctls/src/ioctls/vm.rs index ced3ce38..22152150 100644 --- a/kvm-ioctls/src/ioctls/vm.rs +++ b/kvm-ioctls/src/ioctls/vm.rs @@ -22,7 +22,6 @@ use crate::ioctls::{KvmDirtyLogRing, KvmRunWrapper, Result}; use crate::kvm_ioctls::*; use vmm_sys_util::errno; use vmm_sys_util::eventfd::EventFd; -#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] use vmm_sys_util::ioctl::ioctl; #[cfg(target_arch = "x86_64")] use vmm_sys_util::ioctl::ioctl_with_mut_ptr;