From 8b8ef8a20f77682fcd08dc70dfe8e1969df141c6 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Sat, 12 Oct 2024 21:06:22 +0100 Subject: [PATCH 01/14] feat: make `make`command work on macos --- Makefile | 5 +++++ build_on_krunvm.sh | 0 2 files changed, 5 insertions(+) mode change 100644 => 100755 build_on_krunvm.sh diff --git a/Makefile b/Makefile index 36f980747..a323d2a86 100644 --- a/Makefile +++ b/Makefile @@ -83,8 +83,13 @@ debug: $(LIBRARY_DEBUG_$(OS)) libkrun.pc ifeq ($(BUILD_INIT),1) INIT_BINARY = init/init $(INIT_BINARY): $(INIT_SRC) +ifeq ($(OS),Darwin) + @echo "Building on macOS, using ./build_on_krunvm.sh" + ./build_on_krunvm.sh +else gcc -O2 -static -Wall $(INIT_DEFS) -o $@ $(INIT_SRC) $(INIT_DEFS) endif +endif $(LIBRARY_RELEASE_$(OS)): $(INIT_BINARY) cargo build --release $(FEATURE_FLAGS) diff --git a/build_on_krunvm.sh b/build_on_krunvm.sh old mode 100644 new mode 100755 From 29d53ff8ba75edb74d50b43e86d1480aa227cada Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Wed, 30 Oct 2024 15:30:14 +0100 Subject: [PATCH 02/14] feat: change ld link path from /opt/hombrew/lib to /usr/local/lib on macos (#3) --- examples/Makefile | 4 ++-- src/libkrun/build.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index 8c163059e..34c204f2d 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -2,9 +2,9 @@ ARCH = $(shell uname -m) OS = $(shell uname -s) LDFLAGS_x86_64_Linux = -lkrun LDFLAGS_aarch64_Linux = -lkrun -LDFLAGS_arm64_Darwin = -L/opt/homebrew/lib -lkrun +LDFLAGS_arm64_Darwin = -L/usr/local/lib -lkrun LDFLAGS_sev = -lkrun-sev -LDFLAGS_efi = -L/opt/homebrew/lib -lkrun-efi +LDFLAGS_efi = -L/usr/local/lib -lkrun-efi CFLAGS = -O2 -g -I../include ROOTFS_DISTRO := fedora ROOTFS_DIR = rootfs_$(ROOTFS_DISTRO) diff --git a/src/libkrun/build.rs b/src/libkrun/build.rs index a3ccc2288..936b183aa 100644 --- a/src/libkrun/build.rs +++ b/src/libkrun/build.rs @@ -2,7 +2,7 @@ fn main() { #[cfg(target_os = "macos")] println!("cargo:rustc-link-lib=framework=Hypervisor"); #[cfg(target_os = "macos")] - println!("cargo:rustc-link-search=/opt/homebrew/lib"); + println!("cargo:rustc-link-search=/usr/local/lib"); #[cfg(all(not(feature = "tee"), not(feature = "efi")))] println!("cargo:rustc-link-lib=krunfw"); #[cfg(feature = "tee")] From d79d1d6f5e5886d7ecd1fc2756d807b585c1d043 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Thu, 20 Mar 2025 05:02:02 +0100 Subject: [PATCH 03/14] feat(virtiofs): add overlayfs implementation for macOS (#7) * feat(devices): add overlayfs implementation for macOS Add an overlay filesystem implementation that combines multiple layers into a single logical filesystem, following OCI image specification's layer filesystem changeset format. This implementation: - Supports multiple read-only lower layers and one writable upper layer - Uses OCI-style whiteout files (.wh. prefix) to mark deleted files - Uses OCI-style opaque directory markers (.wh..wh..opq) to mask directories - Includes comprehensive test suite for layer operations - Adds intaglio and tempfile dependencies for symbol interning and testing The implementation provides a foundation for container image support on macOS by allowing multiple filesystem layers to be combined into a single view. * refactor(overlayfs): improve lookup logic and file organization - Split overlayfs.rs into separate fs.rs and tests.rs modules - Simplify layer root handling by replacing path_to_inode_map with layer_roots - Improve lookup logic to handle whiteouts and opaque directories correctly - Add comprehensive test cases for complex directory structures - Add Debug derive for MultikeyBTreeMap and other structs - Remove unused helper methods and simplify path handling - Add helper functions for volume path construction - Improve error handling consistency The main architectural change is moving from a path-based lookup system to a layer-based traversal system that better handles overlay filesystem semantics like whiteouts and opaque directories. * feat(overlayfs): implement setattr and copyup functionality Adds support for modifying file attributes and copying files from lower layers to the upper layer in the macOS overlayfs implementation. Key changes include: - Add setattr support for changing file permissions, ownership, size and timestamps - Implement copyup functionality to promote files to upper layer when modified - Add extended attribute support for storing overlayfs-specific metadata - Add comprehensive tests for operations - Refactor stat operations to handle overlayfs permission overrides - Make inode data fields public within crate for testing * feat(overlayfs): implement unlink operation and add tests Implements the unlink operation for the overlayfs filesystem, allowing files to be deleted from any layer. Key changes include: - Add get_top_layer_idx helper method to get index of top layer - Implement do_unlink to handle file deletion and whiteout creation - Refactor do_forget to handle inode reference counting - Add dev_ino_and_name_to_vol_whiteout_path helper for whiteout files - Add comprehensive test suite for unlink functionality including: - Basic file deletion - Whiteout creation for lower layer files - Multi-layer scenarios - Error cases - Complex directory structures * feat(overlayfs): implement rmdir and fix lookup path handling - Add do_rmdir() implementation for directory removal across layers - Fix path_inodes handling in lookup_segment_by_segment - Add whiteout file creation for removed directories - Fix path handling in do_copyup to skip root inode - Add comprehensive test coverage for rmdir functionality * feat(overlayfs): implement symlink support for OverlayFs and add tests Implement symlink support for the OverlayFs filesystem and add comprehensive test coverage for OverlayFs. Key changes include: - Add do_symlink() implementation in OverlayFs - Add symlink tests covering basic functionality, nested directories, multiple layers, and error cases - Clean up error handling pattern in do_mkdir() * feat(overlayfs): implement rename operation and add tests This commit implements the rename operation for the overlayfs filesystem on macOS and adds extensive test coverage. Key changes include: - Implement `do_rename` method to handle file/directory renaming - Add helper method `create_whiteout_for_lower` to handle whiteout creation - Support LINUX_RENAME_WHITEOUT and LINUX_RENAME_EXCHANGE flags - Add comprehensive tests for rename functionality including: - Basic rename operations - Whiteout handling - Multi-layer scenarios - Complex directory structures - Error cases * feat(overlayfs): implement hard link support for overlayfs Adds support for creating hard links in the overlayfs filesystem implementation. Key changes include: - Implement do_link() method to handle hard link creation - Add comprehensive test suite for link functionality including: - Basic link creation - Links across multiple layers - Error handling - Nested directory scenarios - Whiteout handling * feat(overlayfs): Implement open/release and improve copy-up operations This commit adds several key improvements to the overlayfs implementation: - Implements open() and release() operations for files and directories - Adds CachePolicy enum to control FUSE caching behavior - Improves copy-up operation by: - Using clonefile() for COW semantics when available on macOS - Falling back to regular copy when clonefile fails - Extracting copy logic into separate helper method - Adds comprehensive tests for open/release and copy-up operations - Fixes init_handle value to start at 0 - Renames do_copyup to copy_up for consistency The changes improve performance by using COW semantics where possible and add proper file handle management. The new CachePolicy gives users control over how aggressively the FUSE client caches file data. * feat(overlayfs): Implement read functionality and add comprehensive tests Implement the read method for the OverlayFS filesystem, allowing files to be read from any layer in the overlay stack. The implementation handles: - Basic file reading with offset and size controls - Reading through copied-up files - Proper handling of whiteout files and opaque directories - Special handling for the init binary when not in EFI mode Add extensive test coverage including: - Basic read functionality - Reading with offsets - Partial reads - Reading through nested directories - Proper handling of whiteouts and opaque directories - Reading after copy-up operations - Error cases with invalid handles * feat(overlayfs): Implement write functionality and add tests Implements the write method for the overlayfs filesystem, allowing files to be written to and modified. The implementation includes: - Basic write functionality with offset support - Copy-up behavior when writing to files in lower layers - Proper handling of whiteouts and opaque directories - Support for partial writes and multiple write operations Also adds comprehensive test coverage for the write functionality, including: - Basic write operations - Writing with offsets - Partial writes - Copy-up behavior - Invalid handle handling - Multiple sequential writes - Writing to files in nested directories - Interaction with whiteouts and opaque directories Additionally: - Renames NotReallyZeroCopyWriter to TestContainer - Implements ZeroCopyReader for TestContainer to support write tests - Removes commented out test_open_root_directory * feat(overlayfs): implement flush, fsync and opendir operations for macos overlayfs Implement several filesystem operations for the macOS overlayfs: - Add flush operation that emulates fd close behavior using dup/close - Add fsync operation to synchronize file contents - Implement opendir by reusing existing do_open with O_DIRECTORY flag - Add comprehensive test suite for opendir functionality The tests cover: - Basic directory opening - Handling non-existent directories - Whiteout directory behavior - Copy-up scenarios - Multiple open/release cycles - Various open flags combinations * feat(overlayfs): Skip copy-up for read-only file operations Previously, the overlayfs implementation would copy files to the top layer even for read-only operations. This was unnecessary overhead since files only need to be in the top layer when they are being modified. This change optimizes the behavior by only performing copy-up when write access is requested. The implementation now checks the O_ACCMODE flags to determine if the file needs to be copied to the top layer. Added test cases to verify: - Opening files read-only keeps them in the bottom layer - Opening directories read-only keeps them in the bottom layer * feat(overlayfs): implement extended attributes support Add support for extended attributes (xattrs) in the overlayfs implementation: - Implement setxattr, getxattr, listxattr and removexattr operations - Add xattr configuration flag to enable/disable xattr support - Handle copy-up operations when setting xattrs on files in lower layers - Add comprehensive test coverage for xattr operations including: - Basic set/get/list/remove operations - Copy-up behavior for lower layer files - Error handling for invalid operations - Proper handling when xattr support is disabled * feat(virtio-fs): implement create, mknod and access ops for macOS overlayfs - Add create() operation to support creating new files with permissions - Add mknod() operation for special files (as regular files on macOS) - Add access() operation with Unix permission checking logic - Reorganize tests into separate modules The implementation handles file ownership, permissions, security contexts, and parent directory copy-up when needed. * feat(overlayfs): implement fs operations for macOS overlayfs Implements the following filesystem operations for macOS overlayfs: - fallocate: Preallocates space for files - lseek: Repositions file offset with special handling for SEEK_DATA/SEEK_HOLE - setupmapping: Sets up DAX memory mapping between guest and host - removemapping: Removes DAX memory mappings Also fixes comparison of libc function return values to check for < 0 instead of != 0 to properly handle errors. --- .gitignore | 1 + Cargo.lock | 93 +- src/devices/Cargo.toml | 10 +- src/devices/src/virtio/fs/macos/mod.rs | 1 + .../src/virtio/fs/macos/overlayfs/fs.rs | 3320 +++++++++++++++++ .../src/virtio/fs/macos/overlayfs/mod.rs | 5 + .../src/virtio/fs/macos/overlayfs/tests.rs | 191 + .../virtio/fs/macos/overlayfs/tests/create.rs | 1494 ++++++++ .../virtio/fs/macos/overlayfs/tests/lookup.rs | 455 +++ .../fs/macos/overlayfs/tests/metadata.rs | 904 +++++ .../virtio/fs/macos/overlayfs/tests/misc.rs | 555 +++ .../virtio/fs/macos/overlayfs/tests/open.rs | 380 ++ .../virtio/fs/macos/overlayfs/tests/read.rs | 1266 +++++++ .../virtio/fs/macos/overlayfs/tests/remove.rs | 508 +++ .../virtio/fs/macos/overlayfs/tests/write.rs | 428 +++ src/devices/src/virtio/fs/multikey.rs | 2 +- 16 files changed, 9608 insertions(+), 5 deletions(-) create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/fs.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/mod.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs create mode 100644 src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs diff --git a/.gitignore b/.gitignore index aa03a7cab..7837b63a7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pycache__ /libkrun.pc init/init examples/chroot_vm +ignore/ diff --git a/Cargo.lock b/Cargo.lock index 42e60ad7c..93a6f13bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -363,6 +363,7 @@ dependencies = [ "env_logger", "hvf", "imago", + "intaglio", "libc", "log", "lru", @@ -371,6 +372,7 @@ dependencies = [ "polly", "rand", "rutabaga_gfx", + "tempfile", "thiserror", "utils", "virtio-bindings", @@ -425,6 +427,22 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +[[package]] +name = "errno" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +dependencies = [ + "libc", + "windows-sys 0.59.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "flate2" version = "1.0.35" @@ -553,7 +571,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.13.3+wasi-0.2.2", + "windows-targets 0.52.6", ] [[package]] @@ -664,6 +694,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "intaglio" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7785e397d45f5a00bd35df6c293518c240c321b734b15a02718aa21103de1ce9" + [[package]] name = "iocuddle" version = "0.1.1" @@ -828,6 +864,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" + [[package]] name = "log" version = "0.4.22" @@ -1125,7 +1167,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.15", ] [[package]] @@ -1143,7 +1185,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.15", "libredox", "thiserror", ] @@ -1209,6 +1251,19 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "0.38.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.59.0", +] + [[package]] name = "rutabaga_gfx" version = "0.1.2" @@ -1405,6 +1460,20 @@ version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +[[package]] +name = "tempfile" +version = "3.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +dependencies = [ + "cfg-if", + "fastrand", + "getrandom 0.3.1", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1628,6 +1697,15 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.13.3+wasi-0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" version = "0.2.95" @@ -1880,6 +1958,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "yansi-term" version = "0.1.2" diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 7e5163467..7bf79ae60 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -15,6 +15,7 @@ snd = ["pw", "thiserror"] virgl_resource_map2 = [] [dependencies] +intaglio = "1.10.0" bitflags = "1.2.0" crossbeam-channel = "0.5" env_logger = "0.9.0" @@ -32,7 +33,10 @@ zerocopy-derive = { version = "0.6.3", optional = true } arch = { path = "../arch" } utils = { path = "../utils" } polly = { path = "../polly" } -rutabaga_gfx = { path = "../rutabaga_gfx", features = ["virgl_renderer", "virgl_renderer_next"], optional = true } +rutabaga_gfx = { path = "../rutabaga_gfx", features = [ + "virgl_renderer", + "virgl_renderer_next", +], optional = true } imago = { path = "../imago", features = ["sync-wrappers", "vm-memory"] } @@ -43,3 +47,7 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" + + +[dev-dependencies] +tempfile = "3.17.1" diff --git a/src/devices/src/virtio/fs/macos/mod.rs b/src/devices/src/virtio/fs/macos/mod.rs index b8edbc7f9..0dcdeab84 100644 --- a/src/devices/src/virtio/fs/macos/mod.rs +++ b/src/devices/src/virtio/fs/macos/mod.rs @@ -1,2 +1,3 @@ pub mod fs_utils; +pub mod overlayfs; pub mod passthrough; diff --git a/src/devices/src/virtio/fs/macos/overlayfs/fs.rs b/src/devices/src/virtio/fs/macos/overlayfs/fs.rs new file mode 100644 index 000000000..0e8d16c33 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/fs.rs @@ -0,0 +1,3320 @@ +use std::collections::{btree_map, BTreeMap, HashMap, HashSet}; +use std::ffi::{CStr, CString}; +use std::fs::File; +use std::io; +use std::mem::MaybeUninit; +use std::os::unix::ffi::OsStrExt; +use std::os::unix::fs::MetadataExt; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::path::PathBuf; +use std::ptr::null_mut; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::{Arc, Mutex, RwLock}; +use std::time::Duration; + +use crossbeam_channel::{unbounded, Sender}; +use hvf::MemoryMapping; +use intaglio::cstr::SymbolTable; +use intaglio::Symbol; + +use crate::virtio::bindings; +use crate::virtio::fs::filesystem::{ + Context, DirEntry, Entry, ExportTable, Extensions, FileSystem, FsOptions, GetxattrReply, + ListxattrReply, OpenOptions, SecContext, SetattrValid, ZeroCopyReader, ZeroCopyWriter, +}; +use crate::virtio::fs::fuse; +use crate::virtio::fs::multikey::MultikeyBTreeMap; +use crate::virtio::linux_errno::{linux_error, LINUX_ERANGE}; + +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +/// The prefix for whiteout files +const WHITEOUT_PREFIX: &str = ".wh."; + +/// The marker for opaque directories +const OPAQUE_MARKER: &str = ".wh..wh..opq"; + +/// The volume directory +const VOL_DIR: &str = ".vol"; + +/// The owner and permissions attribute +const OWNER_PERMS_XATTR_KEY: &[u8] = b"user.vm.owner_perms\0"; + +/// Maximum allowed number of layers for the overlay filesystem. +const MAX_LAYERS: usize = 128; + +#[cfg(not(feature = "efi"))] +static INIT_BINARY: &[u8] = include_bytes!("../../../../../../../init/init"); + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// Type alias for inode identifiers +type Inode = u64; + +/// Type alias for file handle identifiers +type Handle = u64; + +/// Alternative key for looking up inodes by device and inode number +#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] +struct InodeAltKey { + /// The inode number from the host filesystem + ino: u64, + + /// The device ID from the host filesystem + dev: i32, +} + +/// Data associated with an inode +#[derive(Debug)] +pub(crate) struct InodeData { + /// The inode number in the overlay filesystem + pub(crate) inode: Inode, + + /// The inode number from the host filesystem + pub(crate) ino: u64, + + /// The device ID from the host filesystem + pub(crate) dev: i32, + + /// Reference count for this inode from the perspective of [`FileSystem::lookup`] + pub(crate) refcount: AtomicU64, + + /// Path to inode + pub(crate) path: Vec, + + /// The layer index this inode belongs to + pub(crate) layer_idx: usize, +} + +/// The caching policy that the file system should report to the FUSE client. By default the FUSE +/// protocol uses close-to-open consistency. This means that any cached contents of the file are +/// invalidated the next time that file is opened. +#[derive(Debug, Default, Clone)] +pub enum CachePolicy { + /// The client should never cache file data and all I/O should be directly forwarded to the + /// server. This policy must be selected when file contents may change without the knowledge of + /// the FUSE client (i.e., the file system does not have exclusive access to the directory). + Never, + + /// The client is free to choose when and how to cache file data. This is the default policy and + /// uses close-to-open consistency as described in the enum documentation. + #[default] + Auto, + + /// The client should always cache file data. This means that the FUSE client will not + /// invalidate any cached data that was returned by the file system the last time the file was + /// opened. This policy should only be selected when the file system has exclusive access to the + /// directory. + Always, +} + +/// Data associated with an open file handle +#[derive(Debug)] +pub(crate) struct HandleData { + /// The inode this handle refers to + pub(crate) inode: Inode, + + /// The underlying file object + pub(crate) file: RwLock, +} + +/// Represents either a file descriptor or a path +#[derive(Clone)] +enum FileId { + /// A file descriptor + Fd(RawFd), + + /// A path + Path(CString), +} + +/// Configuration for the overlay filesystem +#[derive(Debug)] +pub struct Config { + /// How long the FUSE client should consider directory entries to be valid. + /// If the contents of a directory can only be modified by the FUSE client, + /// this should be a large value. + pub entry_timeout: Duration, + + /// How long the FUSE client should consider file and directory attributes to be valid. + /// If the attributes of a file or directory can only be modified by the FUSE client, + /// this should be a large value. + pub attr_timeout: Duration, + + /// The caching policy the file system should use. + pub cache_policy: CachePolicy, + + /// Whether writeback caching is enabled. + /// This can improve performance but increases the risk of data corruption if file + /// contents can change without the knowledge of the FUSE client. + pub writeback: bool, + + /// Whether the filesystem should support Extended Attributes (xattr). + /// Enabling this feature may have a significant impact on performance. + pub xattr: bool, + + /// Optional file descriptor for /proc/self/fd. + /// This is useful for sandboxing scenarios. + pub proc_sfd_rawfd: Option, + + /// ID of this filesystem to uniquely identify exports. + pub export_fsid: u64, + + /// Table of exported FDs to share with other subsystems. + pub export_table: Option, +} + +/// An overlay filesystem implementation that combines multiple layers into a single logical filesystem. +/// +/// This implementation follows standard overlay filesystem concepts, similar to Linux's OverlayFS, +/// while using OCI image specification's layer filesystem changeset format for whiteouts: +/// +/// - Uses OCI-style whiteout files (`.wh.` prefixed files) to mark deleted files in upper layers +/// - Uses OCI-style opaque directory markers (`.wh..wh..opq`) to mask lower layer directories +/// +/// ## Layer Structure +/// +/// The overlay filesystem consists of: +/// - A single top layer (upperdir) that is writable +/// - Zero or more lower layers that are read-only +/// +/// ## Layer Ordering +/// +/// When creating an overlay filesystem, layers are provided in order from lowest to highest: +/// The last layer in the provided sequence becomes the top layer (upperdir), while +/// the others become read-only lower layers. This matches the OCI specification where: +/// - The top layer (upperdir) handles all modifications +/// - Lower layers provide the base content +/// - Changes in the top layer shadow content in lower layers +/// +/// ## Layer Behavior +/// +/// - All write operations occur in the top layer +/// - When reading, the top layer takes precedence over lower layers +/// - Whiteout files in the top layer hide files from lower layers +/// - Opaque directory markers completely mask lower layer directory contents +pub struct OverlayFs { + /// Map of inodes by ID and alternative keys + inodes: RwLock>>, + + /// Counter for generating the next inode ID + next_inode: AtomicU64, + + /// The initial inode ID (typically 1 for the root directory) + init_inode: u64, + + /// Map of open file handles by ID + handles: RwLock>>, + + /// Counter for generating the next handle ID + next_handle: AtomicU64, + + /// The initial handle ID + init_handle: u64, + + /// Map of memory-mapped windows + map_windows: Mutex>, + + /// Whether writeback caching is enabled + writeback: AtomicBool, + + /// Configuration options + config: Config, + + /// Symbol table for interned filenames + filenames: Arc>, + + /// Root inodes for each layer, ordered from bottom to top + layer_roots: Arc>>, +} + +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- + +impl InodeAltKey { + fn new(ino: u64, dev: i32) -> Self { + Self { ino, dev } + } +} + +impl OverlayFs { + /// Creates a new OverlayFs with the given layers + pub fn new(layers: Vec, config: Config) -> io::Result { + if layers.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "at least one layer must be provided", + )); + } + + if layers.len() > MAX_LAYERS { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "maximum overlayfs layer count exceeded", + )); + } + + let mut next_inode = 1; + let mut inodes = MultikeyBTreeMap::new(); + + // Initialize the root inodes for all layers + let layer_roots = Self::init_root_inodes(&layers, &mut inodes, &mut next_inode)?; + + Ok(OverlayFs { + inodes: RwLock::new(inodes), + next_inode: AtomicU64::new(next_inode), + init_inode: 1, + handles: RwLock::new(BTreeMap::new()), + next_handle: AtomicU64::new(1), + init_handle: 0, + map_windows: Mutex::new(HashMap::new()), + writeback: AtomicBool::new(false), + config, + filenames: Arc::new(RwLock::new(SymbolTable::new())), + layer_roots: Arc::new(RwLock::new(layer_roots)), + }) + } + + /// Initialize root inodes for all layers + /// + /// This function processes layers from top to bottom, creating root inodes for each layer. + /// + /// Parameters: + /// - layers: Slice of paths to the layer roots, ordered from bottom to top + /// - inodes: Mutable reference to the inodes map to populate + /// - next_inode: Mutable reference to the next inode counter + /// + /// Returns: + /// - io::Result> containing the root inodes for each layer + fn init_root_inodes( + layers: &[PathBuf], + inodes: &mut MultikeyBTreeMap>, + next_inode: &mut u64, + ) -> io::Result> { + // Pre-allocate layer_roots with the right size + let mut layer_roots = vec![0; layers.len()]; + + // Process layers from top to bottom + for (i, layer_path) in layers.iter().enumerate().rev() { + let layer_idx = i; // Layer index from bottom to top + + // Get the stat information for this layer's root + let c_path = CString::new(layer_path.to_string_lossy().as_bytes())?; + let st = Self::unpatched_stat(&FileId::Path(c_path))?; + + // Create the alt key for this inode + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev as i32); + + // Create the inode data + let inode_id = *next_inode; + *next_inode += 1; + + let inode_data = Arc::new(InodeData { + inode: inode_id, + ino: st.st_ino, + dev: st.st_dev as i32, + refcount: AtomicU64::new(1), + path: vec![], + layer_idx, + }); + + // Insert the inode into the map + inodes.insert(inode_id, alt_key, inode_data); + + // Store the root inode for this layer + layer_roots[layer_idx] = inode_id; + } + + Ok(layer_roots) + } + + pub fn get_config(&self) -> &Config { + &self.config + } + + pub fn get_filenames(&self) -> &Arc> { + &self.filenames + } + + fn get_layer_root(&self, layer_idx: usize) -> io::Result> { + let layer_roots = self.layer_roots.read().unwrap(); + + // Check if the layer index is valid + if layer_idx >= layer_roots.len() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "layer index out of bounds", + )); + } + + // Get the inode for this layer + let inode = layer_roots[layer_idx]; + if inode == 0 { + return Err(io::Error::new(io::ErrorKind::NotFound, "layer not found")); + } + + // Get the inode data + self.get_inode_data(inode) + } + + /// Creates a new inode and adds it to the inode map + fn create_inode( + &self, + ino: u64, + dev: i32, + path: Vec, + layer_idx: usize, + ) -> (Inode, Arc) { + let inode = self.next_inode.fetch_add(1, Ordering::SeqCst); + + let data = Arc::new(InodeData { + inode, + ino, + dev, + refcount: AtomicU64::new(1), + path, + layer_idx, + }); + + let alt_key = InodeAltKey::new(ino, dev); + self.inodes + .write() + .unwrap() + .insert(inode, alt_key, data.clone()); + + (inode, data) + } + + /// Gets the InodeData for an inode + pub(super) fn get_inode_data(&self, inode: Inode) -> io::Result> { + self.inodes + .read() + .unwrap() + .get(&inode) + .cloned() + .ok_or_else(ebadf) + } + + /// Gets the HandleData for a handle + pub(super) fn get_inode_handle_data( + &self, + inode: Inode, + handle: Handle, + ) -> io::Result> { + self.handles + .read() + .unwrap() + .get(&handle) + .filter(|hd| hd.inode == inode) + .cloned() + .ok_or_else(ebadf) + } + + fn get_top_layer_idx(&self) -> usize { + self.layer_roots.read().unwrap().len() - 1 + } + + fn bump_refcount(&self, inode: Inode) { + let inodes = self.inodes.write().unwrap(); + let inode_data = inodes.get(&inode).unwrap(); + inode_data.refcount.fetch_add(1, Ordering::SeqCst); + } + + fn set_secctx(file: &FileId, secctx: SecContext, symlink: bool) -> io::Result<()> { + let options = if symlink { libc::XATTR_NOFOLLOW } else { 0 }; + let ret = match file { + FileId::Path(path) => unsafe { + libc::setxattr( + path.as_ptr(), + secctx.name.as_ptr(), + secctx.secctx.as_ptr() as *const libc::c_void, + secctx.secctx.len(), + 0, + options, + ) + }, + FileId::Fd(fd) => unsafe { + libc::fsetxattr( + *fd, + secctx.name.as_ptr(), + secctx.secctx.as_ptr() as *const libc::c_void, + secctx.secctx.len(), + 0, + options, + ) + }, + }; + + if ret != 0 { + Err(io::Error::last_os_error()) + } else { + Ok(()) + } + } + + /// Converts a dev/ino pair to a volume path + fn dev_ino_to_vol_path(&self, dev: i32, ino: u64) -> io::Result { + let path = format!("/{}/{}/{}", VOL_DIR, dev, ino); + CString::new(path).map_err(|_| einval()) + } + + /// Converts a dev/ino pair and name to a volume path + fn dev_ino_and_name_to_vol_path(&self, dev: i32, ino: u64, name: &CStr) -> io::Result { + let path = format!("/{}/{}/{}/{}", VOL_DIR, dev, ino, name.to_string_lossy()); + CString::new(path).map_err(|_| einval()) + } + + fn dev_ino_and_name_to_vol_whiteout_path( + &self, + dev: i32, + ino: u64, + name: &CStr, + ) -> io::Result { + // Create whiteout file (.wh.) in parent directory + let whiteout_name = format!( + "{}{}", + WHITEOUT_PREFIX, + name.to_str().map_err(|_| einval())? + ); + + let whiteout_cstr = CString::new(whiteout_name).map_err(|_| einval())?; + + // Get full path for whiteout file + self.dev_ino_and_name_to_vol_path(dev, ino, &whiteout_cstr) + } + + /// Converts an inode number to a volume path + fn inode_number_to_vol_path(&self, inode: Inode) -> io::Result { + let data = self.get_inode_data(inode)?; + self.dev_ino_to_vol_path(data.dev, data.ino) + } + + /// Turns an inode into an opened file. + fn open_inode(&self, inode: Inode, mut flags: i32) -> io::Result { + // When writeback caching is enabled, the kernel may send read requests even if the + // userspace program opened the file write-only. So we need to ensure that we have opened + // the file for reading as well as writing. + let writeback = self.writeback.load(Ordering::Relaxed); + if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY { + flags &= !libc::O_ACCMODE; + flags |= libc::O_RDWR; + } + + // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`. + // However, this breaks atomicity as the file may have changed on disk, invalidating the + // cached copy of the data in the kernel and the offset that the kernel thinks is the end of + // the file. Just allow this for now as it is the user's responsibility to enable writeback + // caching only for directories that are not shared. It also means that we need to clear the + // `O_APPEND` flag. + if writeback && flags & libc::O_APPEND != 0 { + flags &= !libc::O_APPEND; + } + + let c_path = self.inode_number_to_vol_path(inode)?; + + let fd = unsafe { + libc::open( + c_path.as_ptr(), + (flags | libc::O_CLOEXEC) & (!libc::O_NOFOLLOW) & (!libc::O_EXLOCK), + ) + }; + + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Parses open flags + fn parse_open_flags(&self, flags: i32) -> i32 { + let mut mflags: i32 = flags & 0b11; + + if (flags & bindings::LINUX_O_NONBLOCK) != 0 { + mflags |= libc::O_NONBLOCK; + } + if (flags & bindings::LINUX_O_APPEND) != 0 { + mflags |= libc::O_APPEND; + } + if (flags & bindings::LINUX_O_CREAT) != 0 { + mflags |= libc::O_CREAT; + } + if (flags & bindings::LINUX_O_TRUNC) != 0 { + mflags |= libc::O_TRUNC; + } + if (flags & bindings::LINUX_O_EXCL) != 0 { + mflags |= libc::O_EXCL; + } + if (flags & bindings::LINUX_O_NOFOLLOW) != 0 { + mflags |= libc::O_NOFOLLOW; + } + if (flags & bindings::LINUX_O_CLOEXEC) != 0 { + mflags |= libc::O_CLOEXEC; + } + + mflags + } + + /// Creates an Entry from stat information and inode data + fn create_entry(&self, inode: Inode, st: bindings::stat64) -> Entry { + Entry { + inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + } + } + + /// Checks for whiteout file in top layer + fn check_whiteout(&self, parent_path: &CStr, name: &CStr) -> io::Result { + let parent_str = parent_path.to_str().map_err(|_| einval())?; + let name_str = name.to_str().map_err(|_| einval())?; + + let whiteout_path = format!("{}/{}{}", parent_str, WHITEOUT_PREFIX, name_str); + let whiteout_cpath = CString::new(whiteout_path).map_err(|_| einval())?; + + match Self::unpatched_stat(&FileId::Path(whiteout_cpath)) { + Ok(_) => Ok(true), + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(e), + } + } + + /// Interns a name and returns the corresponding Symbol + fn intern_name(&self, name: &CStr) -> io::Result { + // Clone the name to avoid lifetime issues + let name_to_intern = CString::new(name.to_bytes()).map_err(|_| einval())?; + + // Get a write lock to intern it + let mut filenames = self.filenames.write().unwrap(); + filenames.intern(name_to_intern).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to intern filename: {}", e), + ) + }) + } + + /// Checks for an opaque directory marker in the given parent directory path. + fn check_opaque_marker(&self, parent_path: &CStr) -> io::Result { + let parent_str = parent_path.to_str().map_err(|_| einval())?; + let opaque_path = format!("{}/{}", parent_str, OPAQUE_MARKER); + let opaque_cpath = CString::new(opaque_path).map_err(|_| einval())?; + match Self::unpatched_stat(&FileId::Path(opaque_cpath)) { + Ok(_) => Ok(true), + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(e), + } + } + + /// Validates a name to prevent path traversal attacks and special overlay markers + /// + /// This function checks if a name contains: + /// - Path traversal sequences like ".." + /// - Other potentially dangerous patterns like slashes + /// - Whiteout markers (.wh. prefix) + /// - Opaque directory markers (.wh..wh..opq) + /// + /// Returns: + /// - Ok(()) if the name is safe + /// - Err(io::Error) if the name contains invalid patterns + fn validate_name(name: &CStr) -> io::Result<()> { + let name_bytes = name.to_bytes(); + + // Check for empty name + if name_bytes.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "empty name is not allowed", + )); + } + + // Check for path traversal sequences + if name_bytes == b".." || name_bytes.contains(&b'/') || name_bytes.contains(&b'\\') { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "path traversal attempt detected", + )); + } + + // Check for null bytes + if name_bytes.contains(&0) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains null bytes", + )); + } + + // Convert to str for string pattern matching + let name_str = match std::str::from_utf8(name_bytes) { + Ok(s) => s, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains invalid UTF-8", + )) + } + }; + + // Check for whiteout prefix + if name_str.starts_with(".wh.") { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot start with whiteout prefix", + )); + } + + // Check for opaque marker + if name_str == ".wh..wh..opq" { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot be an opaque directory marker", + )); + } + + Ok(()) + } + + /// Looks up a path segment by segment in a given layer + /// + /// This function traverses a path one segment at a time within a specific layer, + /// handling whiteouts and opaque markers along the way. + /// + /// ### Arguments + /// * `layer_root` - Root inode data for the layer being searched + /// * `path_segments` - Path components to traverse, as interned symbols + /// * `path_inodes` - Vector to store inode data for each path segment traversed + /// + /// # Return Value + /// Returns `Option>` where: + /// - `Some(Ok(stat))` - Successfully found the file/directory and retrieved its stats + /// - `Some(Err(e))` - Encountered an error during lookup that should be propagated: + /// - If error is `NotFound`, caller should try next layer + /// - For any other IO error, caller should stop searching entirely + /// - `None` - Stop searching lower layers because either: + /// - Found a whiteout file for this path (file was deleted in this layer) + /// - Found an opaque directory marker (directory contents are masked in this layer) + /// + /// # Example Return Flow + /// 1. If path exists: `Some(Ok(stat))` + /// 2. If path has whiteout: `None` + /// 3. If path not found: `Some(Err(NotFound))` + /// 4. If directory has opaque marker: `None` + /// 5. If IO error occurs: `Some(Err(io_error))` + /// + /// # Side Effects + /// - Creates inodes for each path segment if they don't already exist + /// - Updates path_inodes with inode data for each segment traversed + /// - Increments reference counts for existing inodes that are reused + /// + /// # Path Resolution + /// For a path like "foo/bar/baz", the function: + /// 1. Starts at layer_root + /// 2. Looks up "foo", checking for whiteouts/opaque markers + /// 3. If "foo" exists, creates/reuses its inode and adds to path_inodes + /// 4. Repeats for "bar" and "baz" + /// 5. Returns stats for "baz" if found + fn lookup_segment_by_segment( + &self, + layer_root: &Arc, + path_segments: &[Symbol], + path_inodes: &mut Vec>, + ) -> Option> { + let mut current_stat; + let mut parent_dev = layer_root.dev; + let mut parent_ino = layer_root.ino; + let mut opaque_marker_found = false; + + // Start from layer root + let root_vol_path = match self.dev_ino_to_vol_path(parent_dev, parent_ino) { + Ok(path) => path, + Err(e) => return Some(Err(e)), + }; + + current_stat = match Self::patched_stat(&FileId::Path(root_vol_path)) { + Ok(stat) => stat, + Err(e) => return Some(Err(e)), + }; + + // Traverse each path segment + for (depth, segment) in path_segments.iter().enumerate() { + // Get the current segment name and parent vol path + let filenames = self.filenames.read().unwrap(); + let segment_name = filenames.get(*segment).unwrap(); + let parent_vol_path = match self.dev_ino_to_vol_path(parent_dev, parent_ino) { + Ok(path) => path, + Err(e) => return Some(Err(e)), + }; + + // TODO: whiteout should not override entry at the same level. so this check should be in not found case. + // Check for whiteout at current level + match self.check_whiteout(&parent_vol_path, segment_name) { + Ok(true) => return None, // Found whiteout, stop searching + Ok(false) => (), // No whiteout, continue + Err(e) => return Some(Err(e)), + } + + // Check for opaque marker at current level + match self.check_opaque_marker(&parent_vol_path) { + Ok(true) => { + opaque_marker_found = true; + } + Ok(false) => (), + Err(e) => return Some(Err(e)), + } + + // Try to stat the current segment using parent dev/ino + let current_vol_path = + match self.dev_ino_and_name_to_vol_path(parent_dev, parent_ino, segment_name) { + Ok(path) => path, + Err(e) => return Some(Err(e)), + }; + + drop(filenames); // Now safe to drop filenames lock + + match Self::patched_stat(&FileId::Path(current_vol_path)) { + Ok(st) => { + // Update parent dev/ino for next iteration + parent_dev = st.st_dev as i32; + parent_ino = st.st_ino; + current_stat = st; + + // Create or get inode for this path segment + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev as i32); + let inode_data = { + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + data.clone() + } else { + drop(inodes); // Drop read lock before write lock + + let mut path = path_inodes[depth].path.clone(); + path.push(*segment); + + let (_, data) = self.create_inode( + st.st_ino, + st.st_dev as i32, + path, + layer_root.layer_idx, + ); + + data + } + }; + + // Update path_inodes with the current segment's inode data + if (depth + 1) >= path_inodes.len() { + // Haven't seen this depth before, append + path_inodes.push(inode_data); + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound && opaque_marker_found => { + // For example, for a lookup of /foo/bar/baz, where /foo/bar has an opaque marker, + // then if we cannot find /foo/bar/baz in the current layer, we cannot find it + // in any other layer as /foo/bar is masked. + return None; + } + Err(e) => return Some(Err(e)), + } + } + + Some(Ok(current_stat)) + } + + /// Looks up a file or directory entry across multiple filesystem layers. + /// + /// This function starts from the specified upper layer (given by start_layer_idx) and searches downwards + /// through the layers to locate the file represented by the provided path segments (an interned path). + /// At each layer, it calls lookup_segment_by_segment to traverse the path step by step while handling + /// whiteout files and opaque directory markers. If an entry is found in a layer, the function returns + /// an Entry structure containing the file metadata along with a vector of InodeData for each path segment traversed. + /// + /// ## Arguments + /// + /// * `start_layer_idx` - The index of the starting layer (from the topmost, which may be the writable layer). + /// * `path_segments` - A slice of interned symbols representing the path components to traverse. + /// + /// ## Returns + /// + /// On success, returns a tuple containing: + /// - An Entry representing the located file or directory along with its attributes. + /// - A vector of Arc corresponding to the inodes for each traversed path segment. + /// + /// ## Errors + /// + /// Returns an io::Error if: + /// - The file is not found in any layer (ENOENT), or + /// - An error occurs during the lookup process in one of the layers. + fn lookup_layer_by_layer<'a>( + &'a self, + start_layer_idx: usize, + path_segments: &[Symbol], + ) -> io::Result<(Entry, Vec>)> { + let mut path_inodes = vec![]; + + // Start from the start_layer_idx and try each layer down to layer 0 + for layer_idx in (0..=start_layer_idx).rev() { + let layer_root = self.get_layer_root(layer_idx)?; + + // If path_inodes has only the root inode or is empty, we need to restart the lookup with the new layer root. + if path_inodes.len() < 2 { + path_inodes = vec![layer_root.clone()]; + } + + match self.lookup_segment_by_segment(&layer_root, &path_segments, &mut path_inodes) { + Some(Ok(st)) => { + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev as i32); + + // Check if we already have this inode + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + return Ok((self.create_entry(data.inode, st), path_inodes)); + } + + drop(inodes); + + // Create new inode + let (inode, data) = self.create_inode( + st.st_ino, + st.st_dev as i32, + path_segments.to_vec(), + layer_idx, + ); + path_inodes.push(data.clone()); + return Ok((self.create_entry(inode, st), path_inodes)); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + // Continue to check lower layers + continue; + } + Some(Err(e)) => { + return Err(e); + } + None => { + // Hit a whiteout or opaque marker, stop searching lower layers + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + } + } + + // Not found in any layer + Err(io::Error::from_raw_os_error(libc::ENOENT)) + } + + /// Performs a lookup operation + pub(crate) fn do_lookup( + &self, + parent: Inode, + name: &CStr, + ) -> io::Result<(Entry, Vec>)> { + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Create path segments for lookup by appending the new name + let mut path_segments = parent_data.path.clone(); + let symbol = self.intern_name(name)?; + path_segments.push(symbol); + + self.lookup_layer_by_layer(parent_data.layer_idx, &path_segments) + } + + /// Performs a raw stat syscall without any modifications to the returned stat structure. + /// + /// This function directly calls the OS's stat syscall and returns the raw stat information + /// exactly as provided by the filesystem. It does not apply any overlayfs-specific + /// modifications like owner/permission overrides from extended attributes. + /// + /// ## Arguments + /// * `file` - A FileId containing either a path or file descriptor to stat + /// + /// ## Returns + /// * `io::Result` - The raw stat information from the filesystem + /// + /// ## Safety + /// This function performs raw syscalls but handles all unsafe operations internally. + fn unpatched_stat(file: &FileId) -> io::Result { + let mut st = MaybeUninit::::zeroed(); + + let ret = unsafe { + match file { + FileId::Path(path) => { + libc::lstat(path.as_ptr(), st.as_mut_ptr() as *mut libc::stat) + } + FileId::Fd(fd) => libc::fstat(*fd, st.as_mut_ptr() as *mut libc::stat), + } + }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(unsafe { st.assume_init() }) + } + + /// Performs a stat syscall and patches the returned stat structure with overlayfs metadata. + /// + /// This function extends unpatched_stat by applying overlayfs-specific modifications: + /// 1. Gets the raw stat information using unpatched_stat + /// 2. Reads extended attributes storing overlayfs owner/permission overrides + /// 3. Updates the stat structure with any owner (uid/gid) overrides found + /// 4. Updates the permission bits with any mode overrides found + /// + /// This provides the overlayfs view of file metadata, where file ownership and permissions + /// can be modified independently of the underlying filesystem. + /// + /// ## Arguments + /// * `file` - A FileId containing either a path or file descriptor to stat + /// + /// ## Returns + /// * `io::Result` - The stat information with overlayfs patches applied + /// + /// ## Safety + /// This function performs raw syscalls but handles all unsafe operations internally. + fn patched_stat(file: &FileId) -> io::Result { + let mut stat = Self::unpatched_stat(file)?; + + // Get owner and permissions from xattr + if let Ok(Some((uid, gid, mode))) = Self::get_owner_perms_attr(file, &stat) { + // Update the stat with the xattr values if available + stat.st_uid = uid; + stat.st_gid = gid; + // Make sure we only modify the permission bits (lower 12 bits) + stat.st_mode = (stat.st_mode & !0o7777u16) | mode; + } + + Ok(stat) + } + + fn get_owner_perms_attr( + file: &FileId, + st: &bindings::stat64, + ) -> io::Result> { + // Try to get the owner and permissions from xattr + let mut buf: Vec = vec![0; 32]; + + // Get options based on file type + let options = if (st.st_mode & libc::S_IFMT) == libc::S_IFLNK { + libc::XATTR_NOFOLLOW + } else { + 0 + }; + + // Helper function to convert byte slice to u32 value + fn item_to_value(item: &[u8], radix: u32) -> Option { + match std::str::from_utf8(item) { + Ok(val) => match u32::from_str_radix(val, radix) { + Ok(i) => Some(i), + Err(_) => None, + }, + Err(_) => None, + } + } + + // Get the xattr + let res = match file { + FileId::Path(path) => unsafe { + libc::getxattr( + path.as_ptr(), + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + 0, + options, + ) + }, + FileId::Fd(fd) => unsafe { + libc::fgetxattr( + *fd, + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + 0, + options, + ) + }, + }; + + if res < 0 { + let err = io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::ENOATTR) { + return Ok(None); + } + return Err(err); + } + + let len = res as usize; + buf.truncate(len); + + // Parse the xattr value + let parts: Vec<&[u8]> = buf.split(|&b| b == b':').collect(); + if parts.len() != 3 { + return Ok(None); + } + + let uid = item_to_value(parts[0], 10).unwrap_or(st.st_uid); + let gid = item_to_value(parts[1], 10).unwrap_or(st.st_gid); + let mode = item_to_value(parts[2], 8).unwrap_or(st.st_mode as u32) as u16; + + Ok(Some((uid, gid, mode))) + } + + fn set_owner_perms_attr( + file: &FileId, + st: &bindings::stat64, + owner: Option<(u32, u32)>, + mode: Option, + ) -> io::Result<()> { + // Get the current values to use as defaults + let (uid, gid) = if let Some((uid, gid)) = owner { + (uid, gid) + } else { + (st.st_uid, st.st_gid) + }; + + let mode = mode.unwrap_or(st.st_mode); + + // Format the xattr value + let value = format!("{}:{}:{:o}", uid, gid, mode & 0o7777); + let value_bytes = value.as_bytes(); + + // Get options based on file type + let options = if (st.st_mode & libc::S_IFMT) == libc::S_IFLNK { + libc::XATTR_NOFOLLOW + } else { + 0 + }; + + // Set the xattr + let res = match file { + FileId::Path(path) => unsafe { + libc::setxattr( + path.as_ptr(), + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + value_bytes.as_ptr() as *const libc::c_void, + value_bytes.len(), + 0, + options, + ) + }, + FileId::Fd(fd) => unsafe { + libc::fsetxattr( + *fd, + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + value_bytes.as_ptr() as *const libc::c_void, + value_bytes.len(), + 0, + options, + ) + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + /// Copies up a file or directory from a lower layer to the top layer + pub(crate) fn copy_up(&self, path_inodes: &[Arc]) -> io::Result<()> { + // Get the top layer root + let top_layer_idx = self.get_top_layer_idx(); + let top_layer_root = self.get_layer_root(top_layer_idx)?; + + // Start from root and copy up each segment that's not in the top layer + let mut parent_dev = top_layer_root.dev; + let mut parent_ino = top_layer_root.ino; + + // Skip the root inode + for inode_data in path_inodes.iter().skip(1) { + // Skip if this segment is already in the top layer + if inode_data.layer_idx == top_layer_idx { + parent_dev = inode_data.dev; + parent_ino = inode_data.ino; + continue; + } + + // Get the current segment name + let segment_name = { + let name = inode_data.path.last().unwrap(); + let filenames = self.filenames.read().unwrap(); + filenames.get(*name).unwrap().to_owned() + }; + + // Get source and destination paths + let src_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + let dst_path = + self.dev_ino_and_name_to_vol_path(parent_dev, parent_ino, &segment_name)?; + + // Get source file/directory stats + let src_stat = Self::patched_stat(&FileId::Path(src_path.clone()))?; + let file_type = src_stat.st_mode & libc::S_IFMT; + + // Copy up the file/directory + match file_type { + libc::S_IFREG => { + // Regular file: use clonefile for COW semantics if available + // Use clonefile for COW semantics + let result = unsafe { clonefile(src_path.as_ptr(), dst_path.as_ptr(), 0) }; + + if result < 0 { + let err = io::Error::last_os_error(); + // If clonefile fails (e.g., across filesystems), fall back to regular copy + if err.raw_os_error() == Some(libc::EXDEV) + || err.raw_os_error() == Some(libc::ENOTSUP) + { + // Fall back to regular copy + self.copy_file_contents( + &src_path, + &dst_path, + (src_stat.st_mode & 0o777) as u32, + )?; + } else { + return Err(err); + } + } + } + libc::S_IFDIR => { + // Directory: just create it with the same permissions + unsafe { + if libc::mkdir(dst_path.as_ptr(), src_stat.st_mode & 0o777) < 0 { + return Err(io::Error::last_os_error()); + } + + // Explicitly set directory permissions to match source + if libc::chmod(dst_path.as_ptr(), src_stat.st_mode & 0o777) < 0 { + return Err(io::Error::last_os_error()); + } + } + } + libc::S_IFLNK => { + // Symbolic link: read target and recreate link + let mut buf = vec![0u8; libc::PATH_MAX as usize]; + let len = unsafe { + libc::readlink(src_path.as_ptr(), buf.as_mut_ptr() as *mut _, buf.len()) + }; + if len < 0 { + return Err(io::Error::last_os_error()); + } + buf.truncate(len as usize); + + unsafe { + if libc::symlink(buf.as_ptr() as *const _, dst_path.as_ptr()) < 0 { + return Err(io::Error::last_os_error()); + } + + // Note: macOS doesn't allow setting permissions on symlinks directly + // The permissions of symlinks are typically ignored by the system + } + } + _ => { + // Other types (devices, sockets, etc.) are not supported + return Err(io::Error::new( + io::ErrorKind::Unsupported, + "unsupported file type for copy up", + )); + } + } + + // Update parent dev/ino for next iteration + let new_stat = Self::unpatched_stat(&FileId::Path(dst_path))?; + parent_dev = new_stat.st_dev as i32; + parent_ino = new_stat.st_ino; + + // Update the inode entry to point to the new copy in the top layer + let alt_key = InodeAltKey::new(new_stat.st_ino, new_stat.st_dev as i32); + let mut inodes = self.inodes.write().unwrap(); + + // Create new inode data with updated dev/ino/layer_idx but same path and refcount + let new_data = Arc::new(InodeData { + inode: inode_data.inode, + ino: new_stat.st_ino, + dev: new_stat.st_dev as i32, + refcount: AtomicU64::new(inode_data.refcount.load(Ordering::SeqCst)), + path: inode_data.path.clone(), + layer_idx: top_layer_idx, + }); + + // Replace the old entry with the new one + inodes.insert(inode_data.inode, alt_key, new_data); + } + + Ok(()) + } + + /// Helper method to copy file contents when clonefile is not available or fails + fn copy_file_contents( + &self, + src_path: &CString, + dst_path: &CString, + mode: u32, + ) -> io::Result<()> { + unsafe { + let src_file = libc::open(src_path.as_ptr(), libc::O_RDONLY); + if src_file < 0 { + return Err(io::Error::last_os_error()); + } + + let dst_file = libc::open( + dst_path.as_ptr(), + libc::O_WRONLY | libc::O_CREAT | libc::O_EXCL, + mode, + ); + if dst_file < 0 { + libc::close(src_file); + return Err(io::Error::last_os_error()); + } + + // Copy file contents + let mut buf = [0u8; 8192]; + loop { + let n_read = libc::read(src_file, buf.as_mut_ptr() as *mut _, buf.len()); + if n_read <= 0 { + break; + } + let mut pos = 0; + while pos < n_read { + let n_written = libc::write( + dst_file, + buf.as_ptr().add(pos as usize) as *const _, + (n_read - pos) as usize, + ); + if n_written <= 0 { + libc::close(src_file); + libc::close(dst_file); + return Err(io::Error::last_os_error()); + } + pos += n_written; + } + } + + // Explicitly set permissions to match source file + // This will override any effects from the umask + if libc::fchmod(dst_file, mode as libc::mode_t) < 0 { + libc::close(src_file); + libc::close(dst_file); + return Err(io::Error::last_os_error()); + } + + libc::close(src_file); + libc::close(dst_file); + } + + Ok(()) + } + + /// Ensures the file is in the top layer by copying it up if necessary. + /// + /// This function: + /// 1. Checks if the file is already in the top layer + /// 2. If not, looks up the complete path to the file + /// 3. Copies the file and all its parent directories to the top layer + /// 4. Returns the inode data for the copied file + /// + /// ### Arguments + /// * `inode_data` - The inode data for the file to copy up + /// + /// ### Returns + /// * `Ok(InodeData)` - The inode data for the file in the top layer + /// * `Err(io::Error)` - If the copy-up operation fails + fn ensure_top_layer(&self, inode_data: Arc) -> io::Result> { + let top_layer_idx = self.get_top_layer_idx(); + + // If already in top layer, return early + if inode_data.layer_idx == top_layer_idx { + return Ok(inode_data); + } + + // Build the path segments + let path_segments = inode_data.path.clone(); + + // Lookup the file to get all path inodes + let (_, path_inodes) = self.lookup_layer_by_layer(top_layer_idx, &path_segments)?; + + // Copy up the file + self.copy_up(&path_inodes)?; + + // Get the inode data for the copied file + self.get_inode_data(inode_data.inode) + } + + /// Creates a whiteout file for a given parent directory and name. + /// This is used to hide files that exist in lower layers. + /// + /// # Arguments + /// * `parent` - The inode of the parent directory + /// * `name` - The name of the file to create a whiteout for + /// + /// # Returns + /// * `Ok(())` if the whiteout was created successfully + /// * `Err(io::Error)` if there was an error creating the whiteout + fn create_whiteout_for_lower(&self, parent: Inode, name: &CStr) -> io::Result<()> { + if let Ok((_, mut path_inodes)) = self.do_lookup(parent, name) { + // Copy up the parent directory if needed + path_inodes.pop(); + self.copy_up(&path_inodes)?; + let parent_data = self.get_inode_data(parent)?; + + // Create the whiteout file + let whiteout_path = + self.dev_ino_and_name_to_vol_whiteout_path(parent_data.dev, parent_data.ino, name)?; + + let fd = unsafe { + libc::open( + whiteout_path.as_ptr(), + libc::O_CREAT | libc::O_WRONLY | libc::O_EXCL, + 0o000, // Whiteout files have no permissions + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + unsafe { libc::close(fd) }; + } + + Ok(()) + } + + /// Returns an iterator over all valid entries in the directory across all layers. + /// + /// Note: OverlayFs is a high-level, layered filesystem. A simple readdir on a single directory does not produce the complete view. + /// This function traverses the directory across multiple layers, merging entries while handling duplicates, + /// whiteout files, and opaque markers. + /// + /// ## Arguments + /// * `dir` - The inode of the directory to iterate over. + /// * `add_entry` - A callback function that processes each directory entry. If the callback returns 0, + /// it signals that the directory buffer is full and iteration should stop. + /// + /// ## Returns + /// * `Ok(())` if the directory was iterated successfully. + /// * `Err(io::Error)` if an error occurred during iteration. + pub(super) fn process_dir_entries(&self, dir: Inode, mut add_entry: F) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + // Local state to track iteration over layers + struct LazyReaddirState { + current_layer: isize, // current layer (top-down) + inode_data: Option>, + current_iter: Option, + seen: HashSet>, + } + + let inode_data = self.get_inode_data(dir)?; + let top_layer = self.get_top_layer_idx() as isize; + let path = inode_data.path.clone(); + let mut state = LazyReaddirState { + current_layer: top_layer, + inode_data: None, + current_iter: None, + seen: HashSet::new(), + }; + + let mut current_offset = 0u64; + let mut opaque_marker_found = false; + loop { + // If no current iterator, attempt to initialize one for the current layer + if state.current_iter.is_none() { + if state.current_layer < 0 { + break; // All layers exhausted + } + + let layer_root = self.get_layer_root(state.current_layer as usize)?; + let mut path_inodes = vec![layer_root.clone()]; + + match self.lookup_segment_by_segment(&layer_root, &path, &mut path_inodes) { + Some(Ok(_)) => { + let last_inode = path_inodes.last().unwrap(); + let vol_path = self.inode_number_to_vol_path((**last_inode).inode)?; + let dir_str = vol_path.as_c_str().to_str().map_err(|_| { + io::Error::new(io::ErrorKind::Other, "Invalid path string") + })?; + + state.inode_data = Some(last_inode.clone()); + state.current_iter = Some(std::fs::read_dir(dir_str)?); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + state.current_layer -= 1; + continue; + } + Some(Err(e)) => return Err(e), + None => { + state.current_layer = -1; + continue; + } + } + } + + if let Some(iter) = state.current_iter.as_mut() { + if let Some(entry_result) = iter.next() { + let entry = entry_result?; + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + let inode_data = state.inode_data.as_ref().unwrap(); + + if state.seen.contains(name.as_bytes()) { + continue; + } + + // Handle opaque marker and whiteout files + if name_str == OPAQUE_MARKER { + // Opaque marker found; mark it and skip this entry + opaque_marker_found = true; + continue; + } else if name_str.starts_with(WHITEOUT_PREFIX) { + // Whiteout file: extract the actual name + let actual = &name_str[WHITEOUT_PREFIX.len()..]; + let actual_bytes = actual.as_bytes(); + if state.seen.contains(actual_bytes) { + continue; + } + + let actual_cstring = CString::new(actual).map_err(|_| { + io::Error::new(io::ErrorKind::Other, "Invalid whiteout name") + })?; + + let vol_path = self.dev_ino_and_name_to_vol_path( + inode_data.dev, + inode_data.ino, + &actual_cstring, + )?; + + match Self::unpatched_stat(&FileId::Path(vol_path)) { + Ok(_) => continue, + Err(e) if e.kind() == io::ErrorKind::NotFound => { + state.seen.insert(actual_bytes.to_vec()); + continue; + } + Err(e) => return Err(e), + } + } else { + state.seen.insert(name.as_bytes().to_vec()); + } + + let metadata = entry.metadata()?; + let mode = metadata.mode() as u32; + let s_ifmt = libc::S_IFMT as u32; + let type_ = if mode & s_ifmt == (libc::S_IFDIR as u32) { + libc::DT_DIR + } else if mode & s_ifmt == (libc::S_IFREG as u32) { + libc::DT_REG + } else if mode & s_ifmt == (libc::S_IFLNK as u32) { + libc::DT_LNK + } else if mode & s_ifmt == (libc::S_IFIFO as u32) { + libc::DT_FIFO + } else if mode & s_ifmt == (libc::S_IFCHR as u32) { + libc::DT_CHR + } else if mode & s_ifmt == (libc::S_IFBLK as u32) { + libc::DT_BLK + } else if mode & s_ifmt == (libc::S_IFSOCK as u32) { + libc::DT_SOCK + } else { + libc::DT_UNKNOWN + }; + + current_offset += 1; + + let dir_entry = DirEntry { + ino: metadata.ino(), + offset: current_offset, + type_: type_ as u32, + name: name.as_bytes(), + }; + + if add_entry(dir_entry)? == 0 { + return Ok(()); + } + } else { + state.current_iter = None; + if opaque_marker_found { + break; + } + state.current_layer -= 1; + continue; + } + } + } + + Ok(()) + } + + /// Reads directory entries for the given inode by merging entries from all underlying layers. + /// + /// Unlike conventional filesystems that simply call readdir on a directory file descriptor, + /// OverlayFs must aggregate entries from multiple layers. The `offset` parameter specifies the starting + /// index in the merged list of directory entries. The provided `add_entry` callback is invoked for each + /// entry; a return value of 0 indicates that the directory buffer is full and reading should cease. + /// + /// NOTE: The current implementation of offset does not entirely follow FUSE expected behaviors. + /// Changes to entries in the write layer can affect the offset, potentially causing inconsistencies + /// in directory listing between calls. + /// + /// TODO: Implement a more robust offset handling mechanism that maintains consistency even when + /// the underlying directory structure changes. One way is making offset a composite value of + /// layer (1 MSB) + offset (7 LSB). This will also require having multiple open dirs from lower layers + /// in [HandleData]. + pub(super) fn do_readdir( + &self, + inode: Inode, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + if size == 0 { + return Ok(()); + } + + let mut current_offset = 0u64; + self.process_dir_entries(inode, |entry| { + if current_offset < offset { + current_offset += 1; + return Ok(1); + } + + add_entry(entry) + }) + } + + /// Performs an open operation + fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option, OpenOptions)> { + // Parse and normalize the open flags + let flags = self.parse_open_flags(flags as i32); + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer + let inode_data = self.ensure_top_layer(inode_data)?; + + // Open the file with the appropriate flags and generate a new unique handle ID + let file = RwLock::new(self.open_inode(inode_data.inode, flags)?); + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + + // Create handle data structure with file and empty dirstream + let data = HandleData { inode, file }; + + // Store the handle data in the handles map + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + // Set up OpenOptions based on the cache policy configuration + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + // For CachePolicy::Never, set DIRECT_IO to bypass kernel caching for files (not directories) + CachePolicy::Never => opts.set(OpenOptions::DIRECT_IO, flags & libc::O_DIRECTORY == 0), + + // For CachePolicy::Always, set different caching options based on whether it's a file or directory + CachePolicy::Always => { + if flags & libc::O_DIRECTORY == 0 { + // For files: KEEP_CACHE maintains kernel cache between open/close operations + opts |= OpenOptions::KEEP_CACHE; + } else { + // For directories: CACHE_DIR enables caching of directory entries + opts |= OpenOptions::CACHE_DIR; + } + } + + // For CachePolicy::Auto, use default caching behavior + _ => {} + }; + + // Return the handle and options + Ok((Some(handle), opts)) + } + + /// Performs a release operation + fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> { + let mut handles = self.handles.write().unwrap(); + + if let btree_map::Entry::Occupied(e) = handles.entry(handle) { + if e.get().inode == inode { + // We don't need to close the file here because that will happen automatically when + // the last `Arc` is dropped. + e.remove(); + return Ok(()); + } + } + + Err(ebadf()) + } + + /// Performs a getattr operation + fn do_getattr(&self, inode: Inode) -> io::Result<(bindings::stat64, Duration)> { + let c_path = self.inode_number_to_vol_path(inode)?; + let st = Self::patched_stat(&FileId::Path(c_path))?; + + Ok((st, self.config.attr_timeout)) + } + + /// Performs a setattr operation, copying up the file if needed + fn do_setattr( + &self, + inode: Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get the file identifier - either from handle or path + let file_id = if let Some(handle) = handle { + // Get the handle data + let handles = self.handles.read().unwrap(); + let handle_data = handles.get(&handle).ok_or_else(ebadf)?; + let file = handle_data.file.read().unwrap(); + FileId::Fd(file.as_raw_fd()) + } else { + // Use path if no handle available + let c_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + FileId::Path(c_path) + }; + + // Consolidate attribute changes using a single setattrlist call + let current_stat = Self::patched_stat(&file_id)?; + + // Handle ownership changes + if valid.intersects(SetattrValid::UID | SetattrValid::GID) { + let uid = if valid.contains(SetattrValid::UID) { + Some(attr.st_uid) + } else { + None + }; + + let gid = if valid.contains(SetattrValid::GID) { + Some(attr.st_gid) + } else { + None + }; + + if let Some((uid, gid)) = uid + .zip(gid) + .or_else(|| uid.map(|u| (u, current_stat.st_gid))) + .or_else(|| gid.map(|g| (current_stat.st_uid, g))) + { + Self::set_owner_perms_attr(&file_id, ¤t_stat, Some((uid, gid)), None)?; + } + } + + // Handle mode changes + if valid.contains(SetattrValid::MODE) { + let mode = attr.st_mode & 0o7777; + Self::set_owner_perms_attr(&file_id, ¤t_stat, None, Some(mode))?; + } + + // Handle size changes + if valid.contains(SetattrValid::SIZE) { + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::ftruncate(fd, attr.st_size) }, + FileId::Path(ref c_path) => unsafe { + libc::truncate(c_path.as_ptr(), attr.st_size) + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle timestamp changes + if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) { + let mut tvs = [ + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + ]; + + if valid.contains(SetattrValid::ATIME_NOW) { + tvs[0].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::ATIME) { + tvs[0].tv_sec = attr.st_atime; + tvs[0].tv_nsec = attr.st_atime_nsec; + } + + if valid.contains(SetattrValid::MTIME_NOW) { + tvs[1].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::MTIME) { + tvs[1].tv_sec = attr.st_mtime; + tvs[1].tv_nsec = attr.st_mtime_nsec; + } + + // Safe because this doesn't modify any memory and we check the return value + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::futimens(fd, tvs.as_ptr()) }, + FileId::Path(ref c_path) => unsafe { + let fd = libc::open(c_path.as_ptr(), libc::O_SYMLINK | libc::O_CLOEXEC); + let res = libc::futimens(fd, tvs.as_ptr()); + libc::close(fd); + res + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Return the updated attributes and timeout + self.do_getattr(inode) + } + + fn do_mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + // Create the directory with initial permissions + let res = unsafe { libc::mkdir(c_path.as_ptr(), 0o700) }; + if res == 0 { + // Set security context if provided + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Path(c_path.clone()), secctx, false)?; + } + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + Self::set_owner_perms_attr( + &FileId::Path(c_path.clone()), + &stat, + Some((ctx.uid, ctx.gid)), + Some((mode & !umask) as u16), + )?; + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + return Ok(entry); + } + + // Return the error + Err(linux_error(io::Error::last_os_error())) + } + + /// Performs an unlink operation + fn do_unlink(&self, parent: Inode, name: &CStr) -> io::Result<()> { + let top_layer_idx = self.get_top_layer_idx(); + let (entry, _) = self.do_lookup(parent, name)?; + + // If the inode is in the top layer, we need to unlink it. + let entry_data = self.get_inode_data(entry.inode)?; + if entry_data.layer_idx == top_layer_idx { + // Get the path for the inode + let c_path = self.inode_number_to_vol_path(entry.inode)?; + + // Remove the inode from the overlayfs + let res = unsafe { libc::unlink(c_path.as_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // If after an unlink, the entry still exists in a lower layer, we need to add a whiteout + self.create_whiteout_for_lower(parent, name)?; + + Ok(()) + } + + /// Performs an rmdir operation + fn do_rmdir(&self, parent: Inode, name: &CStr) -> io::Result<()> { + let top_layer_idx = self.get_top_layer_idx(); + let (entry, _) = self.do_lookup(parent, name)?; + + // If the inode is in the top layer, we need to unlink it. + let entry_data = self.get_inode_data(entry.inode)?; + if entry_data.layer_idx == top_layer_idx { + // Get the path for the inode + let c_path = self.inode_number_to_vol_path(entry.inode)?; + + // Remove the inode from the overlayfs + let res = unsafe { libc::rmdir(c_path.as_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // If after an rmdir, the entry still exists in a lower layer, we need to add a whiteout + self.create_whiteout_for_lower(parent, name)?; + + Ok(()) + } + + /// Performs a symlink operation + fn do_symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + // Create the directory with initial permissions + let res = unsafe { libc::symlink(linkname.as_ptr(), c_path.as_ptr()) }; + if res == 0 { + // Set security context if provided + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Path(c_path.clone()), secctx, true)?; + } + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + let mode = libc::S_IFLNK | 0o777; + Self::set_owner_perms_attr( + &FileId::Path(c_path.clone()), + &stat, + Some((ctx.uid, ctx.gid)), + Some(mode), + )?; + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + return Ok(entry); + } + + // Return the error + Err(linux_error(io::Error::last_os_error())) + } + + fn do_rename( + &self, + old_parent: Inode, + old_name: &CStr, + new_parent: Inode, + new_name: &CStr, + flags: u32, + ) -> io::Result<()> { + // Copy up the old path to the top layer if not already in the top layer + let (_, old_path_inodes) = self.do_lookup(old_parent, old_name)?; + self.copy_up(&old_path_inodes)?; + let old_parent_data = self.get_inode_data(old_parent)?; + + // Copy up the new parent to the top layer if not already in the top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; + + // Get the paths for rename operation + let old_path = + self.dev_ino_and_name_to_vol_path(old_parent_data.dev, old_parent_data.ino, old_name)?; + let new_path = + self.dev_ino_and_name_to_vol_path(new_parent_data.dev, new_parent_data.ino, new_name)?; + + // Set up rename flags + let mut mflags: u32 = 0; + if ((flags as i32) & bindings::LINUX_RENAME_NOREPLACE) != 0 { + mflags |= libc::RENAME_EXCL; + } + if ((flags as i32) & bindings::LINUX_RENAME_EXCHANGE) != 0 { + mflags |= libc::RENAME_SWAP; + } + + // Check for invalid flag combinations + if ((flags as i32) & bindings::LINUX_RENAME_WHITEOUT) != 0 + && ((flags as i32) & bindings::LINUX_RENAME_EXCHANGE) != 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + // Perform the rename + let res = unsafe { libc::renamex_np(old_path.as_ptr(), new_path.as_ptr(), mflags) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // After successful rename, check if we need to add a whiteout for the old path + self.create_whiteout_for_lower(old_parent, old_name)?; + + // If LINUX_RENAME_WHITEOUT is set, create a character device at the old path location + if ((flags as i32) & bindings::LINUX_RENAME_WHITEOUT) != 0 { + let fd = unsafe { + libc::open( + old_path.as_ptr(), + libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + 0o600, + ) + }; + + let stat = Self::unpatched_stat(&FileId::Fd(fd))?; + Self::set_owner_perms_attr(&FileId::Fd(fd), &stat, None, Some(libc::S_IFCHR | 0o600))?; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + unsafe { libc::close(fd) }; + } + + Ok(()) + } + + fn do_link(&self, inode: Inode, new_parent: Inode, new_name: &CStr) -> io::Result { + // Get the inode data for the source file + let inode_data = self.get_inode_data(inode)?; + + // Copy up the source file to the top layer if needed + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get and ensure new parent is in top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; + + // Get source and destination paths + let src_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + + let dst_path = + self.dev_ino_and_name_to_vol_path(new_parent_data.dev, new_parent_data.ino, new_name)?; + + // Create the hard link + let res = unsafe { libc::link(src_path.as_ptr(), dst_path.as_ptr()) }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Get the entry for the newly created link + let mut path = new_parent_data.path.clone(); + path.push(self.intern_name(new_name)?); + + // Get stats for the new link + let stat = Self::patched_stat(&FileId::Path(dst_path))?; + + // Create new inode for the link pointing to same dev/ino as source + let (inode, _) = self.create_inode( + stat.st_ino, + stat.st_dev as i32, + path, + new_parent_data.layer_idx, + ); + + Ok(self.create_entry(inode, stat)) + } + + /// Decrements the reference count for an inode and removes it if the count reaches zero + fn do_forget(&self, inode: Inode, count: u64) { + // Skip forgetting the root inode + if inode == self.init_inode { + return; + } + + let mut inodes = self.inodes.write().unwrap(); + if let Some(data) = inodes.get(&inode) { + // Acquiring the write lock on the inode map prevents new lookups from incrementing the + // refcount but there is the possibility that a previous lookup already acquired a + // reference to the inode data and is in the process of updating the refcount so we need + // to loop here until we can decrement successfully. + loop { + let refcount = data.refcount.load(Ordering::Relaxed); + + // Saturating sub because it doesn't make sense for a refcount to go below zero and + // we don't want misbehaving clients to cause integer overflow. + let new_count = refcount.saturating_sub(count); + + if data + .refcount + .compare_exchange(refcount, new_count, Ordering::Release, Ordering::Relaxed) + .unwrap() + == refcount + { + if new_count == 0 { + // We just removed the last refcount for this inode. There's no need for an + // acquire fence here because we hold a write lock on the inode map and any + // thread that is waiting to do a forget on the same inode will have to wait + // until we release the lock. So there's is no other release store for us to + // synchronize with before deleting the entry. + inodes.remove(&inode); + } + break; + } + } + } + } + + fn do_readlink(&self, inode: Inode) -> io::Result> { + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Allocate a buffer for the link target + let mut buf = vec![0; libc::PATH_MAX as usize]; + + // Call readlink to get the symlink target + let res = unsafe { + libc::readlink( + c_path.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + buf.len(), + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Resize the buffer to the actual length of the link target + buf.resize(res as usize, 0); + Ok(buf) + } + + fn do_setxattr(&self, inode: Inode, name: &CStr, value: &[u8], flags: u32) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Don't allow setting the owner/permissions attribute + if name.to_bytes() == OWNER_PERMS_XATTR_KEY { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Convert flags to mflags + let mut mflags: i32 = 0; + if (flags as i32) & bindings::LINUX_XATTR_CREATE != 0 { + mflags |= libc::XATTR_CREATE; + } + + if (flags as i32) & bindings::LINUX_XATTR_REPLACE != 0 { + mflags |= libc::XATTR_REPLACE; + } + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode_data.inode)?; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::setxattr( + c_path.as_ptr(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + 0, + mflags as libc::c_int, + ) + }; + + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn do_getxattr(&self, inode: Inode, name: &CStr, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Don't allow getting attributes for the root inode + if inode == self.init_inode { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENODATA))); + } + + // Don't allow getting the owner/permissions attribute + if name.to_bytes() == OWNER_PERMS_XATTR_KEY { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Safe because this will only modify the contents of `buf` + let mut buf = vec![0; size as usize]; + let res = unsafe { + if size == 0 { + libc::getxattr( + c_path.as_ptr(), + name.as_ptr(), + std::ptr::null_mut(), + size as libc::size_t, + 0, + 0, + ) + } else { + libc::getxattr( + c_path.as_ptr(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + 0, + 0, + ) + } + }; + + if res < 0 { + let last_error = io::Error::last_os_error(); + if last_error.raw_os_error() == Some(libc::ERANGE) { + return Err(io::Error::from_raw_os_error(LINUX_ERANGE)); + } + + return Err(linux_error(last_error)); + } + + if size == 0 { + Ok(GetxattrReply::Count(res as u32)) + } else { + // Truncate the buffer to the actual length of the value + buf.resize(res as usize, 0); + Ok(GetxattrReply::Value(buf)) + } + } + + fn do_listxattr(&self, inode: Inode, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Safe because this will only modify the contents of `buf`. + let mut buf = vec![0; 512_usize]; + let res = unsafe { + libc::listxattr( + c_path.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + 512, + 0, + ) + }; + + if res < 0 { + let last_error = io::Error::last_os_error(); + if last_error.raw_os_error() == Some(libc::ERANGE) { + return Err(io::Error::from_raw_os_error(LINUX_ERANGE)); + } + + return Err(linux_error(last_error)); + } + + // Truncate the buffer to the actual length of the list of attributes + buf.truncate(res as usize); + + if size == 0 { + let mut clean_size = res as usize; + + // Remove the owner/permissions attribute from the list of attributes + for attr in buf.split(|c| *c == 0) { + if attr.starts_with(&OWNER_PERMS_XATTR_KEY[..OWNER_PERMS_XATTR_KEY.len() - 1]) { + clean_size -= OWNER_PERMS_XATTR_KEY.len(); + } + } + + Ok(ListxattrReply::Count(clean_size as u32)) + } else { + let mut clean_buf = Vec::new(); + + // Remove the owner/permissions attribute from the list of attributes + for attr in buf.split(|c| *c == 0) { + if attr.is_empty() + || attr.starts_with(&OWNER_PERMS_XATTR_KEY[..OWNER_PERMS_XATTR_KEY.len() - 1]) + { + continue; + } + + clean_buf.extend_from_slice(attr); + clean_buf.push(0); + } + + // Shrink the buffer to the actual length of the list of attributes + clean_buf.shrink_to_fit(); + + // Return an error if the buffer exceeds the requested size + if clean_buf.len() > size as usize { + return Err(io::Error::from_raw_os_error(LINUX_ERANGE)); + } + + Ok(ListxattrReply::Names(clean_buf)) + } + } + + fn do_removexattr(&self, inode: Inode, name: &CStr) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Don't allow setting the owner/permissions attribute + if name.to_bytes() == OWNER_PERMS_XATTR_KEY { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode_data.inode)?; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::removexattr(c_path.as_ptr(), name.as_ptr(), 0) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn do_create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + let flags = self.parse_open_flags(flags as i32); + let hostmode = if (flags & libc::O_DIRECTORY) != 0 { + 0o700 + } else { + 0o600 + }; + + // Safe because this doesn't modify any memory and we check the return value. We don't + // really check `flags` because if the kernel can't handle poorly specified flags then we + // have much bigger problems. + let fd = unsafe { + libc::open( + c_path.as_ptr(), + flags | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + hostmode, + ) + }; + + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Set security context + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Fd(fd), secctx, false)? + }; + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + if let Err(e) = Self::set_owner_perms_attr( + &FileId::Fd(fd), + &stat, + Some((ctx.uid, ctx.gid)), + Some((libc::S_IFREG as u32 | (mode & !(umask & 0o777))) as u16), + ) { + unsafe { libc::close(fd) }; + return Err(e); + } + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + // Safe because we just opened this fd. + let file = RwLock::new(unsafe { File::from_raw_fd(fd) }); + + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + let data = HandleData { + inode: entry.inode, + file, + }; + + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, + CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, + _ => {} + }; + + Ok((entry, Some(handle), opts)) + } + + fn do_mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + // NOTE: file nodes are created as regular file on macos following the passthroughfs + // behavior. + let fd = unsafe { + libc::open( + c_path.as_ptr(), + libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + 0o600, + ) + }; + + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Set security context + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Fd(fd), secctx, false)? + }; + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + if let Err(e) = Self::set_owner_perms_attr( + &FileId::Fd(fd), + &stat, + Some((ctx.uid, ctx.gid)), + Some((mode & !umask) as u16), + ) { + unsafe { libc::close(fd) }; + return Err(e); + } + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + unsafe { libc::close(fd) }; + + Ok(entry) + } + + fn do_fallocate( + &self, + inode: Inode, + handle: Handle, + offset: u64, + length: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + let fd = data.file.write().unwrap().as_raw_fd(); + let proposed_length = (offset + length) as i64; + let mut fs = libc::fstore_t { + fst_flags: libc::F_ALLOCATECONTIG, + fst_posmode: libc::F_PEOFPOSMODE, + fst_offset: 0, + fst_length: proposed_length, + fst_bytesalloc: 0, + }; + + let res = unsafe { libc::fcntl(fd, libc::F_PREALLOCATE, &mut fs as *mut _) }; + if res < 0 { + fs.fst_flags = libc::F_ALLOCATEALL; + let res = unsafe { libc::fcntl(fd, libc::F_PREALLOCATE, &mut fs as &mut _) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + } + + let st = Self::unpatched_stat(&FileId::Fd(fd))?; + if st.st_size >= proposed_length { + // fallocate should not shrink the file. The file is already larger than needed. + return Ok(()); + } + + let res = unsafe { libc::ftruncate(fd, proposed_length) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn do_lseek(&self, inode: Inode, handle: Handle, offset: u64, whence: u32) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + + // SEEK_DATA and SEEK_HOLE have slightly different semantics + // in Linux vs. macOS, which means we can't support them. + let mwhence = if whence == 3 { + // SEEK_DATA + return Ok(offset); + } else if whence == 4 { + // SEEK_HOLE + libc::SEEK_END + } else { + whence as i32 + }; + + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lseek(fd, offset as bindings::off64_t, mwhence as libc::c_int) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(res as u64) + } + + fn do_setupmapping( + &self, + inode: Inode, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + if map_sender.is_none() { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + let prot_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + + if (moffset + len) > shm_size { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + let guest_addr = guest_shm_base + moffset; + + let file = self.open_inode(inode, libc::O_RDWR)?; + let fd = file.as_raw_fd(); + + let host_addr = unsafe { + libc::mmap( + null_mut(), + len as usize, + prot_flags, + libc::MAP_SHARED, + fd, + foffset as libc::off_t, + ) + }; + if host_addr == libc::MAP_FAILED { + return Err(linux_error(io::Error::last_os_error())); + } + + let ret = unsafe { libc::close(fd) }; + if ret == -1 { + return Err(linux_error(io::Error::last_os_error())); + } + + // We've checked that map_sender is something above. + let sender = map_sender.as_ref().unwrap(); + let (reply_sender, reply_receiver) = unbounded(); + sender + .send(MemoryMapping::AddMapping( + reply_sender, + host_addr as u64, + guest_addr, + len, + )) + .unwrap(); + if !reply_receiver.recv().unwrap() { + error!("Error requesting HVF the addition of a DAX window"); + unsafe { libc::munmap(host_addr, len as usize) }; + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + self.map_windows + .lock() + .unwrap() + .insert(guest_addr, host_addr as u64); + + Ok(()) + } + + fn do_removemapping( + &self, + requests: Vec, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + if map_sender.is_none() { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + for req in requests { + let guest_addr = guest_shm_base + req.moffset; + if (req.moffset + req.len) > shm_size { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + let host_addr = match self.map_windows.lock().unwrap().remove(&guest_addr) { + Some(a) => a, + None => return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))), + }; + debug!( + "removemapping: guest_addr={:x} len={:?}", + guest_addr, req.len + ); + + let sender = map_sender.as_ref().unwrap(); + let (reply_sender, reply_receiver) = unbounded(); + sender + .send(MemoryMapping::RemoveMapping( + reply_sender, + guest_addr, + req.len, + )) + .unwrap(); + if !reply_receiver.recv().unwrap() { + error!("Error requesting HVF the removal of a DAX window"); + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + let ret = unsafe { libc::munmap(host_addr as *mut libc::c_void, req.len as usize) }; + if ret == -1 { + error!("Error unmapping DAX window"); + return Err(linux_error(io::Error::last_os_error())); + } + } + + Ok(()) + } +} + +//-------------------------------------------------------------------------------------------------- +// Functions +//-------------------------------------------------------------------------------------------------- + +/// Returns a "bad file descriptor" error +fn ebadf() -> io::Error { + io::Error::from_raw_os_error(libc::EBADF) +} + +/// Returns an "invalid argument" error +fn einval() -> io::Error { + io::Error::from_raw_os_error(libc::EINVAL) +} + +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl FileSystem for OverlayFs { + type Inode = u64; + type Handle = u64; + + fn init(&self, capable: FsOptions) -> io::Result { + // Set the umask to 0 to ensure that all file permissions are set correctly + unsafe { libc::umask(0o000) }; + + let mut opts = FsOptions::empty(); + + // Enable writeback caching if requested and supported + if self.config.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { + opts |= FsOptions::WRITEBACK_CACHE; + self.writeback.store(true, Ordering::SeqCst); + } + + // Enable posix ACLs if supported + if capable.contains(FsOptions::POSIX_ACL) { + opts |= FsOptions::POSIX_ACL; + } + + Ok(opts) + } + + fn destroy(&self) { + // Clear all handles + self.handles.write().unwrap().clear(); + + // Clear all inodes + self.inodes.write().unwrap().clear(); + + // Clear any memory-mapped windows + self.map_windows.lock().unwrap().clear(); + } + + fn statfs(&self, _ctx: Context, inode: Self::Inode) -> io::Result { + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Call statvfs64 to get filesystem statistics + // Safe because this will only modify `out` and we check the return value. + let mut out = MaybeUninit::::zeroed(); + let res = unsafe { bindings::statvfs64(c_path.as_ptr(), out.as_mut_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because statvfs64 initialized the struct + Ok(unsafe { out.assume_init() }) + } + + fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result { + Self::validate_name(name)?; + let (entry, _) = self.do_lookup(parent, name)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) { + self.do_forget(inode, count); + } + + fn getattr( + &self, + _ctx: Context, + inode: Self::Inode, + _handle: Option, + ) -> io::Result<(bindings::stat64, Duration)> { + self.do_getattr(inode) + } + + fn setattr( + &self, + _ctx: Context, + inode: Self::Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + self.do_setattr(inode, attr, handle, valid) + } + + fn readlink(&self, _ctx: Context, inode: Self::Inode) -> io::Result> { + self.do_readlink(inode) + } + + fn mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mkdir(ctx, parent, name, mode, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + Self::validate_name(name)?; + self.do_unlink(parent, name) + } + + fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + Self::validate_name(name)?; + self.do_rmdir(parent, name) + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_symlink(ctx, linkname, parent, name, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn rename( + &self, + _ctx: Context, + old_parent: Self::Inode, + old_name: &CStr, + new_parent: Self::Inode, + new_name: &CStr, + flags: u32, + ) -> io::Result<()> { + Self::validate_name(old_name)?; + Self::validate_name(new_name)?; + self.do_rename(old_parent, old_name, new_parent, new_name, flags) + } + + fn link( + &self, + _ctx: Context, + inode: Self::Inode, + new_parent: Self::Inode, + new_name: &CStr, + ) -> io::Result { + Self::validate_name(new_name)?; + let entry = self.do_link(inode, new_parent, new_name)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn open( + &self, + _ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + if inode == self.init_inode { + Ok((Some(self.init_handle), OpenOptions::empty())) + } else { + self.do_open(inode, flags) + } + } + + fn read( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + mut w: W, + size: u32, + offset: u64, + _lock_owner: Option, + _flags: u32, + ) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + + #[cfg(not(feature = "efi"))] + if inode == self.init_inode { + println!("init inode"); + return w.write(&INIT_BINARY[offset as usize..(offset + (size as u64)) as usize]); + } + + let f = data.file.read().unwrap(); + w.write_from(&f, size as usize, offset) + } + + fn write( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + mut r: R, + size: u32, + offset: u64, + _lock_owner: Option, + _delayed_write: bool, + _kill_priv: bool, + _flags: u32, + ) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + let f = data.file.read().unwrap(); + r.read_to(&f, size as usize, offset) + } + + fn flush( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + _lock_owner: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + // Since this method is called whenever an fd is closed in the client, we can emulate that + // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe + // because this doesn't modify any memory and we check the return values. + unsafe { + let newfd = libc::dup(data.file.write().unwrap().as_raw_fd()); + if newfd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + if libc::close(newfd) < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + } + + fn release( + &self, + _ctx: Context, + inode: Self::Inode, + _flags: u32, + handle: Self::Handle, + _flush: bool, + _flock_release: bool, + _lock_owner: Option, + ) -> io::Result<()> { + self.do_release(inode, handle) + } + + fn fsync( + &self, + _ctx: Context, + inode: Self::Inode, + _datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + // Safe because this doesn't modify any memory and we check the return values. + let res = unsafe { libc::fsync(data.file.write().unwrap().as_raw_fd()) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn opendir( + &self, + _ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.do_open(inode, flags | libc::O_DIRECTORY as u32) + } + + fn readdir( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_readdir(inode, size, offset, add_entry) + } + + fn readdirplus( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry, Entry) -> io::Result, + { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_readdir(inode, size, offset, |dir_entry| { + let (entry, _) = self.do_lookup(inode, &CString::new(dir_entry.name).unwrap())?; + add_entry(dir_entry, entry) + }) + } + + fn releasedir( + &self, + _ctx: Context, + inode: Self::Inode, + _flags: u32, + handle: Self::Handle, + ) -> io::Result<()> { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_release(inode, handle) + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Self::Inode, + datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + self.fsync(ctx, inode, datasync, handle) + } + + fn setxattr( + &self, + _ctx: Context, + inode: Self::Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + self.do_setxattr(inode, name, value, flags) + } + + fn getxattr( + &self, + _ctx: Context, + inode: Self::Inode, + name: &CStr, + size: u32, + ) -> io::Result { + self.do_getxattr(inode, name, size) + } + + fn listxattr( + &self, + _ctx: Context, + inode: Self::Inode, + size: u32, + ) -> io::Result { + self.do_listxattr(inode, size) + } + + fn removexattr(&self, _ctx: Context, inode: Self::Inode, name: &CStr) -> io::Result<()> { + self.do_removexattr(inode, name) + } + + fn access(&self, ctx: Context, inode: Self::Inode, mask: u32) -> io::Result<()> { + let c_path = self.inode_number_to_vol_path(inode)?; + + let st = Self::patched_stat(&FileId::Path(c_path))?; + + println!("st: {:?}", st); + + let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); + + if mode == libc::F_OK { + // The file exists since we were able to call `stat(2)` on it. + return Ok(()); + } + + if (mode & libc::R_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0) + && st.st_mode & 0o004 == 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + if (mode & libc::W_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0) + && st.st_mode & 0o002 == 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // root can only execute something if it is executable by one of the owner, the group, or + // everyone. + if (mode & libc::X_OK) != 0 + && (ctx.uid != 0 || st.st_mode & 0o111 == 0) + && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0) + && st.st_mode & 0o001 == 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + Ok(()) + } + + fn create( + &self, + ctx: Context, + parent: Self::Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + Self::validate_name(name)?; + self.do_create(ctx, parent, name, mode, flags, umask, extensions) + } + + fn mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + _rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + self.do_mknod(ctx, parent, name, mode, umask, extensions) + } + + fn fallocate( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + _mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + self.do_fallocate(inode, handle, offset, length) + } + + fn lseek( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + offset: u64, + whence: u32, + ) -> io::Result { + self.do_lseek(inode, handle, offset, whence) + } + + fn setupmapping( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + self.do_setupmapping( + inode, + foffset, + len, + flags, + moffset, + guest_shm_base, + shm_size, + map_sender, + ) + } + + fn removemapping( + &self, + _ctx: Context, + requests: Vec, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + self.do_removemapping(requests, guest_shm_base, shm_size, map_sender) + } +} + +impl Default for Config { + fn default() -> Self { + Self { + entry_timeout: Duration::from_secs(5), + attr_timeout: Duration::from_secs(5), + cache_policy: CachePolicy::default(), // Use the default cache policy (Auto) + writeback: false, + xattr: false, + proc_sfd_rawfd: None, + export_fsid: 0, + export_table: None, + } + } +} + +// Add Default implementation for Context +impl Default for Context { + fn default() -> Self { + Context { + uid: 0, + gid: 0, + pid: 0, + } + } +} + +//-------------------------------------------------------------------------------------------------- +// External Functions +//-------------------------------------------------------------------------------------------------- + +extern "C" { + /// macOS system call for cloning a file with COW semantics + /// + /// Creates a copy-on-write clone of a file. + /// + /// ## Arguments + /// + /// * `src` - Path to the source file + /// * `dst` - Path to the destination file + /// * `flags` - Currently unused, must be 0 + /// + /// ## Returns + /// + /// * `0` on success + /// * `-1` on error with errno set + fn clonefile( + src: *const libc::c_char, + dst: *const libc::c_char, + flags: libc::c_int, + ) -> libc::c_int; +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/mod.rs b/src/devices/src/virtio/fs/macos/overlayfs/mod.rs new file mode 100644 index 000000000..074462f86 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/mod.rs @@ -0,0 +1,5 @@ +mod fs; +#[cfg(test)] +mod tests; + +pub use fs::*; diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests.rs new file mode 100644 index 000000000..c89c77f7c --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests.rs @@ -0,0 +1,191 @@ +#[cfg(test)] +mod create; + +#[cfg(test)] +mod lookup; + +#[cfg(test)] +mod metadata; + +#[cfg(test)] +mod misc; + +#[cfg(test)] +mod open; + +#[cfg(test)] +mod read; + +#[cfg(test)] +mod remove; + +#[cfg(test)] +mod write; + +//-------------------------------------------------------------------------------------------------- +// Modules: Helper +//-------------------------------------------------------------------------------------------------- + +mod helper { + use std::{ + fs::{self, File}, + io, + os::unix::fs::PermissionsExt, + process::Command, + }; + + use crate::virtio::{ + fs::filesystem::{ZeroCopyReader, ZeroCopyWriter}, + macos::overlayfs::{Config, OverlayFs}, + }; + + use tempfile::TempDir; + + //-------------------------------------------------------------------------------------------------- + // Types + //-------------------------------------------------------------------------------------------------- + + pub(super) struct TestContainer(pub(super) Vec); + + //-------------------------------------------------------------------------------------------------- + // Trait Implementations + //-------------------------------------------------------------------------------------------------- + + impl io::Write for TestContainer { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.0.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + impl ZeroCopyWriter for TestContainer { + fn write_from(&mut self, f: &File, count: usize, off: u64) -> io::Result { + use std::os::unix::fs::FileExt; + + // Pre-allocate space in our vector to avoid reallocations + let original_len = self.0.len(); + self.0.resize(original_len + count, 0); + + // Read directly into our vector's buffer + let bytes_read = f.read_at(&mut self.0[original_len..original_len + count], off)?; + + // Adjust the size to match what was actually read + self.0.truncate(original_len + bytes_read); + + if bytes_read == 0 && count > 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "unexpected EOF", + )); + } + + Ok(bytes_read) + } + } + + impl io::Read for TestContainer { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let available = self.0.len(); + if available == 0 { + return Ok(0); + } + + let amt = std::cmp::min(buf.len(), available); + buf[..amt].copy_from_slice(&self.0[..amt]); + Ok(amt) + } + } + + impl ZeroCopyReader for TestContainer { + fn read_to(&mut self, f: &File, count: usize, off: u64) -> io::Result { + use std::os::unix::fs::FileExt; + + let available = self.0.len(); + if available == 0 { + return Ok(0); + } + + let to_write = std::cmp::min(count, available); + let written = f.write_at(&self.0[..to_write], off)?; + Ok(written) + } + } + + //-------------------------------------------------------------------------------------------------- + // Functions + //-------------------------------------------------------------------------------------------------- + + // Helper function to create a temporary directory with specified files + pub(super) fn setup_test_layer(files: &[(&str, bool, u32)]) -> io::Result { + let dir = TempDir::new().unwrap(); + + for (path, is_dir, mode) in files { + let full_path = dir.path().join(path); + if let Some(parent) = full_path.parent() { + fs::create_dir_all(parent)?; + } + + if *is_dir { + fs::create_dir(&full_path)?; + } else { + File::create(&full_path)?; + } + + fs::set_permissions(&full_path, fs::Permissions::from_mode(*mode))?; + } + + Ok(dir) + } + + // Helper function to create an overlayfs with specified layers + pub(super) fn create_overlayfs( + layers: Vec>, + ) -> io::Result<(OverlayFs, Vec)> { + let mut temp_dirs = Vec::new(); + let mut layer_paths = Vec::new(); + + for layer in layers { + let temp_dir = setup_test_layer(&layer)?; + layer_paths.push(temp_dir.path().to_path_buf()); + temp_dirs.push(temp_dir); + } + + let cfg = Config::default(); + let overlayfs = OverlayFs::new(layer_paths, cfg)?; + Ok((overlayfs, temp_dirs)) + } + + // Debug utility to print the directory structure of each layer using tree command + pub(super) fn debug_print_layers(temp_dirs: &[TempDir], show_perms: bool) -> io::Result<()> { + println!("\n=== Layer Directory Structures ==="); + + for (i, dir) in temp_dirs.iter().enumerate() { + println!("\nLayer {}: {}", i, dir.path().display()); + + let path = dir.path(); + let mut tree_cmd = Command::new("tree"); + tree_cmd.arg("-a"); // show hidden files + if show_perms { + tree_cmd.arg("-p"); + } + let output = tree_cmd.arg(path).output()?; + + if output.status.success() { + println!("{}", String::from_utf8_lossy(&output.stdout)); + } else { + println!( + "Error running tree command: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + } + + println!("================================\n"); + + Ok(()) + } +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs new file mode 100644 index 000000000..6079add5e --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs @@ -0,0 +1,1494 @@ +use std::{ + ffi::CString, + fs::{self, FileType}, + io, + os::unix::fs::FileTypeExt, + path::Path, +}; + +use crate::virtio::{ + bindings, + fs::filesystem::{Context, Extensions, FileSystem}, + fuse::FsOptions, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_mkdir_basic() -> io::Result<()> { + // Create test layers: + // Single layer with a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Create a new directory + let dir_name = CString::new("new_dir").unwrap(); + let ctx = Context::default(); + let entry = fs.mkdir(ctx, 1, &dir_name, 0o755, 0, Extensions::default())?; + + // Verify the directory was created with correct mode + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + assert_eq!(entry.attr.st_mode & 0o777, 0o755); + + // Verify we can look it up + let lookup_entry = fs.lookup(ctx, 1, &dir_name)?; + assert_eq!(lookup_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Verify the directory exists on disk in the top layer + let dir_path = temp_dirs.last().unwrap().path().join("new_dir"); + assert!(dir_path.exists()); + assert!(dir_path.is_dir()); + + Ok(()) +} + +#[test] +fn test_mkdir_nested() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/bottom_file + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + // - dir3/top_file + // - dir1/.wh.subdir (whiteout) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![ + ("dir3", true, 0o755), + ("dir3/top_file", false, 0o644), + ("dir1/.wh.subdir", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create nested directory in dir1 (should trigger copy-up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let nested_name = CString::new("new_nested").unwrap(); + let nested_entry = fs.mkdir( + ctx, + dir1_entry.inode, + &nested_name, + 0o700, + 0, + Extensions::default(), + )?; + assert_eq!(nested_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Test 2: Create directory inside the newly created nested directory + let deep_name = CString::new("deep_dir").unwrap(); + let deep_entry = fs.mkdir( + ctx, + nested_entry.inode, + &deep_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(deep_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Test 3: Create directory in dir2 (middle layer, should trigger copy-up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_nested_name = CString::new("middle_nested").unwrap(); + let middle_nested_entry = fs.mkdir( + ctx, + dir2_entry.inode, + &middle_nested_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!( + middle_nested_entry.attr.st_mode & libc::S_IFMT, + libc::S_IFDIR + ); + + // Test 4: Create directory in dir3 (top layer, no copy-up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + let top_nested_name = CString::new("top_nested").unwrap(); + let top_nested_entry = fs.mkdir( + ctx, + dir3_entry.inode, + &top_nested_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(top_nested_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + helper::debug_print_layers(&temp_dirs, false)?; + + // Verify all directories exist in appropriate layers + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir1/new_nested").exists()); + assert!(top_layer.join("dir1/new_nested/deep_dir").exists()); + assert!(top_layer.join("dir2/middle_nested").exists()); + assert!(top_layer.join("dir3/top_nested").exists()); + + // Verify the original files are still accessible + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + assert_eq!(file1_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_mkdir_with_umask() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/subdir/ (0o755) + // - dir1/subdir/file1 + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ (0o777) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file1", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![("dir3", true, 0o777)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create directory with different umasks in root + let dir_names = vec![ + ("dir_umask_022", 0o777, 0o022, 0o755), // Common umask + ("dir_umask_077", 0o777, 0o077, 0o700), // Strict umask + ("dir_umask_002", 0o777, 0o002, 0o775), // Group writable + ("dir_umask_000", 0o777, 0o000, 0o777), // No umask + ]; + + let test_cases = dir_names.clone(); + for (name, mode, umask, expected) in test_cases { + let dir_name = CString::new(name).unwrap(); + let entry = fs.mkdir(ctx, 1, &dir_name, mode, umask, Extensions::default())?; + assert_eq!( + entry.attr.st_mode & 0o777, + expected, + "Directory {} has wrong permissions", + name + ); + } + + // Test 2: Create nested directories with umask in different layers + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let nested_name = CString::new("nested_umask").unwrap(); + let nested_entry = fs.mkdir( + ctx, + dir1_entry.inode, + &nested_name, + 0o777, + 0o027, + Extensions::default(), + )?; + assert_eq!(nested_entry.attr.st_mode & 0o777, 0o750); + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_name = CString::new("middle_umask").unwrap(); + let middle_entry = fs.mkdir( + ctx, + dir2_entry.inode, + &middle_name, + 0o777, + 0o077, + Extensions::default(), + )?; + assert_eq!(middle_entry.attr.st_mode & 0o777, 0o700); + + Ok(()) +} + +#[test] +fn test_mkdir_existing_name() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/file2 + // Layer 1 (middle): + // - dir2/ + // - dir2/file3 + // - dir1/another_file + // Layer 2 (top): + // - dir3/ + // - dir3/file4 + // - .wh.dir1/subdir (whiteout) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/file3", false, 0o644), + ("dir1/another_file", false, 0o644), + ], + vec![ + ("dir3", true, 0o755), + ("dir3/file4", false, 0o644), + ("dir1/.wh.subdir", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Try to create directory with name of existing file in bottom layer + let file1_name = CString::new("file1").unwrap(); + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + match fs.mkdir( + ctx, + dir1_entry.inode, + &file1_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => { + helper::debug_print_layers(&temp_dirs, false)?; + panic!("Expected mkdir with existing file name to fail"); + } + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + // Test 2: Try to create directory with name of existing file in middle layer + let file3_name = CString::new("file3").unwrap(); + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + match fs.mkdir( + ctx, + dir2_entry.inode, + &file3_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => panic!("Expected mkdir with existing file name to fail"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + // Test 3: Try to create directory with name of existing directory + let dir3_name = CString::new("dir3").unwrap(); + match fs.mkdir(ctx, 1, &dir3_name, 0o755, 0, Extensions::default()) { + Ok(_) => panic!("Expected mkdir with existing directory name to fail"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + // Test 4: Try to create directory with name that exists in lower layer but is whited out + let subdir_name = CString::new("subdir").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // This should succeed because the original subdir is whited out + let new_subdir = fs.mkdir( + ctx, + dir1_entry.inode, + &subdir_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(new_subdir.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + Ok(()) +} + +#[test] +fn test_mkdir_invalid_parent() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // - .wh.dir1 (whiteout entire dir1) + // Layer 2 (top): + // - dir3/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/file2", false, 0o644), + (".wh.dir1", false, 0o644), // Whiteout entire dir1 + ], + vec![("dir3", true, 0o755)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&_temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Try to create directory with non-existent parent inode + let dir_name = CString::new("new_dir").unwrap(); + let invalid_inode = 999999; + match fs.mkdir( + ctx, + invalid_inode, + &dir_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => panic!("Expected mkdir with invalid parent to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + // Test 2: Try to create directory in whited-out directory + let dir1_name = CString::new("dir1").unwrap(); + match fs.lookup(ctx, 1, &dir1_name) { + Ok(_) => panic!("Expected lookup of whited-out directory to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Test 3: Try to create directory with file as parent + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(ctx, dir2_entry.inode, &file2_name)?; + + let nested_name = CString::new("nested").unwrap(); + match fs.mkdir( + ctx, + file2_entry.inode, + &nested_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => panic!("Expected mkdir with file as parent to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOTDIR)), + } + + Ok(()) +} + +#[test] +fn test_mkdir_invalid_name() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/.hidden_file + // - dir1/subdir/ + // Layer 1 (middle): + // - dir2/ + // - dir2/.wh..wh..opq (opaque directory) + // Layer 2 (top): + // - dir3/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/.hidden_file", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/.wh..wh..opq", false, 0o644), // Opaque directory marker + ], + vec![("dir3", true, 0o755)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test various invalid names + let test_cases = vec![ + ("", io::ErrorKind::InvalidInput, "empty name"), + ( + "..", + io::ErrorKind::PermissionDenied, + "parent dir traversal", + ), + ("foo/bar", io::ErrorKind::PermissionDenied, "contains slash"), + ( + "foo\\bar", + io::ErrorKind::PermissionDenied, + "contains backslash", + ), + ( + "foo\0bar", + io::ErrorKind::InvalidInput, + "contains null byte", + ), + (".wh.foo", io::ErrorKind::InvalidInput, "whiteout prefix"), + (".wh..wh..opq", io::ErrorKind::InvalidInput, "opaque marker"), + ]; + + for (name, expected_kind, desc) in test_cases { + let name = CString::new(name.as_bytes().to_vec()).unwrap_or_default(); + match fs.mkdir(ctx, 1, &name, 0o755, 0, Extensions::default()) { + Ok(_) => panic!("Expected mkdir with {} to fail", desc), + Err(e) => assert_eq!( + e.kind(), + expected_kind, + "Wrong error kind for {}: expected {:?}, got {:?}", + desc, + expected_kind, + e.kind() + ), + } + } + + // Test invalid UTF-8 separately since it can't be represented as a string literal + let invalid_utf8 = vec![0x66, 0x6f, 0x6f, 0x80, 0x62, 0x61, 0x72]; // "foobar" + let name = CString::new(invalid_utf8).unwrap(); + match fs.mkdir(ctx, 1, &name, 0o755, 0, Extensions::default()) { + Ok(_) => panic!("Expected mkdir with invalid UTF-8 to fail"), + Err(e) => assert_eq!( + e.kind(), + io::ErrorKind::InvalidInput, + "Wrong error kind for invalid UTF-8: expected {:?}, got {:?}", + io::ErrorKind::InvalidInput, + e.kind() + ), + } + + // Test with valid but unusual names + let valid_cases = vec![ + "very_long_name_that_is_valid_but_unusual_and_tests_length_limits", + " leading_space", + "trailing_space ", + "!@#$%^&*()_+-=", + ]; + + for name in valid_cases { + let name = CString::new(name).unwrap(); + // These should succeed + let entry = fs.mkdir(ctx, 1, &name, 0o755, 0, Extensions::default())?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + } + + Ok(()) +} + +#[test] +fn test_mkdir_multiple_layers() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/bottom_file + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + // - dir3/top_file + // - .wh.dir1 (whiteout) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![ + ("dir3", true, 0o755), + ("dir3/top_file", false, 0o644), + (".wh.dir1", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create directory in each layer and verify copy-up behavior + let dir_names = vec![("dir2", "new_dir2"), ("dir3", "new_dir3")]; + + for (parent, new_dir) in dir_names { + let parent_name = CString::new(parent).unwrap(); + let parent_entry = fs.lookup(ctx, 1, &parent_name)?; + + let new_name = CString::new(new_dir).unwrap(); + let entry = fs.mkdir( + ctx, + parent_entry.inode, + &new_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Create a nested directory inside + let nested_name = CString::new(format!("nested_in_{}", new_dir)).unwrap(); + let nested_entry = fs.mkdir( + ctx, + entry.inode, + &nested_name, + 0o700, + 0, + Extensions::default(), + )?; + assert_eq!(nested_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + } + + // Test 2: Verify all directories exist in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir2/new_dir2").exists()); + assert!(top_layer.join("dir2/new_dir2/nested_in_new_dir2").exists()); + assert!(top_layer.join("dir3/new_dir3").exists()); + assert!(top_layer.join("dir3/new_dir3/nested_in_new_dir3").exists()); + + // Test 3: Try to create directory in whited-out dir1 (should fail) + let dir1_name = CString::new("dir1").unwrap(); + match fs.lookup(ctx, 1, &dir1_name) { + Ok(_) => panic!("Expected lookup of whited-out directory to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + Ok(()) +} + +#[test] +fn test_symlink_basic() -> io::Result<()> { + // Create test layers: + // Single layer with a file + let layers = vec![vec![("target_file", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Create a new symlink + let link_name = CString::new("link").unwrap(); + let target_name = CString::new("target_file").unwrap(); + let ctx = Context::default(); + let entry = fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default())?; + + // Verify the symlink was created with correct mode + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + assert_eq!(entry.attr.st_mode & 0o777, 0o777); // Symlinks are typically 0777 + + // Verify we can look it up + let lookup_entry = fs.lookup(ctx, 1, &link_name)?; + assert_eq!(lookup_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Verify the symlink exists on disk in the top layer + let link_path = temp_dirs.last().unwrap().path().join("link"); + assert!(link_path.exists()); + assert!(link_path.is_symlink()); + + // Verify the symlink points to the correct target + let target = fs.readlink(ctx, lookup_entry.inode)?; + assert_eq!(target, target_name.to_bytes()); + + Ok(()) +} + +#[test] +fn test_symlink_nested() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/bottom_file + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + // - dir3/top_file + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![("dir3", true, 0o755), ("dir3/top_file", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create symlink in dir1 (should trigger copy-up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let link_name = CString::new("link_to_file1").unwrap(); + let target_name = CString::new("file1").unwrap(); + let link_entry = fs.symlink( + ctx, + &target_name, + dir1_entry.inode, + &link_name, + Extensions::default(), + )?; + assert_eq!(link_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Test 2: Create symlink in dir2 (middle layer, should trigger copy-up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_link_name = CString::new("link_to_file2").unwrap(); + let middle_target = CString::new("file2").unwrap(); + let middle_link_entry = fs.symlink( + ctx, + &middle_target, + dir2_entry.inode, + &middle_link_name, + Extensions::default(), + )?; + assert_eq!(middle_link_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Test 3: Create symlink in dir3 (top layer, no copy-up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + let top_link_name = CString::new("link_to_top_file").unwrap(); + let top_target = CString::new("top_file").unwrap(); + let top_link_entry = fs.symlink( + ctx, + &top_target, + dir3_entry.inode, + &top_link_name, + Extensions::default(), + )?; + assert_eq!(top_link_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Verify all symlinks exist in appropriate layers + let top_layer = temp_dirs.last().unwrap().path(); + assert!(fs::symlink_metadata(top_layer.join("dir1/link_to_file1")).is_ok()); + assert!(fs::symlink_metadata(top_layer.join("dir2/link_to_file2")).is_ok()); + assert!(fs::symlink_metadata(top_layer.join("dir3/link_to_top_file")).is_ok()); + + // Verify symlink targets + let link1_target = fs.readlink(ctx, link_entry.inode)?; + assert_eq!(link1_target, target_name.to_bytes()); + + let link2_target = fs.readlink(ctx, middle_link_entry.inode)?; + assert_eq!(link2_target, middle_target.to_bytes()); + + let link3_target = fs.readlink(ctx, top_link_entry.inode)?; + assert_eq!(link3_target, top_target.to_bytes()); + + Ok(()) +} + +#[test] +fn test_symlink_existing_name() -> io::Result<()> { + // Create test layers with a file and directory + let layers = vec![vec![ + ("target_file", false, 0o644), + ("existing_name", false, 0o644), + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + let link_name = CString::new("existing_name").unwrap(); + let target_name = CString::new("target_file").unwrap(); + + // Try to create a symlink with an existing name + match fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default()) { + Ok(_) => panic!("Expected error when creating symlink with existing name"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + Ok(()) +} + +#[test] +fn test_symlink_multiple_layers() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): base files + // Layer 1 (middle): some files + // Layer 2 (top): more files + let layers = vec![ + vec![ + ("bottom_dir", true, 0o755), + ("bottom_dir/target1", false, 0o644), + ], + vec![ + ("middle_dir", true, 0o755), + ("middle_dir/target2", false, 0o644), + ], + vec![("top_dir", true, 0o755), ("top_dir/target3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Create symlinks to files in different layers + let test_cases = vec![ + ("link_to_bottom", "bottom_dir/target1"), + ("link_to_middle", "middle_dir/target2"), + ("link_to_top", "top_dir/target3"), + ]; + + for (link, target) in test_cases.clone() { + let link_name = CString::new(link).unwrap(); + let target_name = CString::new(target).unwrap(); + + let entry = fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default())?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Verify symlink target + let target_bytes = fs.readlink(ctx, entry.inode)?; + assert_eq!(target_bytes, target_name.to_bytes()); + } + + // Verify all symlinks exist in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + for (link, _) in test_cases { + assert!(fs::symlink_metadata(top_layer.join(link)).is_ok()); + } + + Ok(()) +} + +#[test] +fn test_symlink_invalid_name() -> io::Result<()> { + // Create a simple test layer + let layers = vec![vec![("target_file", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + let target_name = CString::new("target_file").unwrap(); + + // Test cases with invalid names + let invalid_names = vec![ + "..", // Path traversal attempt + "invalid/name", // Contains slash + ".wh.name", // Contains whiteout prefix + ".wh..wh..opq", // Opaque directory marker + ]; + + for name in invalid_names { + let link_name = CString::new(name).unwrap(); + match fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default()) { + Ok(_) => panic!("Expected error for invalid name: {}", name), + Err(e) => { + assert!( + e.kind() == io::ErrorKind::InvalidInput + || e.kind() == io::ErrorKind::PermissionDenied, + "Unexpected error kind for name {}: {:?}", + name, + e.kind() + ); + } + } + } + + Ok(()) +} + +#[test] +fn test_rename_basic() -> io::Result<()> { + // Create test layers + let files = vec![("file1.txt", false, 0o644), ("file2.txt", false, 0o644)]; + let layers = vec![files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Lookup source and destination parents (root in this case) + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Perform rename + overlayfs.rename(Context::default(), root, &old_name, root, &new_name, 0)?; + + // Verify old name doesn't exist + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_whiteout() -> io::Result<()> { + // Create test layers with file in lower layer + let lower_files = vec![("file1.txt", false, 0o644)]; + let upper_files = vec![]; + let layers = vec![lower_files, upper_files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename file from lower layer + overlayfs.rename(Context::default(), root, &old_name, root, &new_name, 0)?; + + // Verify old name is whited out + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists in upper layer + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_multiple_layers() -> io::Result<()> { + // Create test layers + let lower_files = vec![("file1.txt", false, 0o644), ("file2.txt", false, 0o644)]; + let middle_files = vec![("file3.txt", false, 0o644)]; + let upper_files = vec![("file4.txt", false, 0o644)]; + let layers = vec![lower_files, middle_files, upper_files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename file from lowest layer + overlayfs.rename(Context::default(), root, &old_name, root, &new_name, 0)?; + + // Verify old name is whited out + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists in upper layer + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_errors() -> io::Result<()> { + // Create test layers + let files = vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("file2.txt", false, 0o644), + ]; + let layers = vec![files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let dir1_name = CString::new("dir1")?; + let _ = overlayfs.lookup(Context::default(), root, &dir1_name)?; + + // Test renaming non-existent file + let nonexistent = CString::new("nonexistent.txt")?; + let new_name = CString::new("renamed.txt")?; + assert!(overlayfs + .rename(Context::default(), root, &nonexistent, root, &new_name, 0,) + .is_err()); + + // Test renaming to invalid parent + let file2_name = CString::new("file2.txt")?; + let invalid_parent = 99999; + assert!(overlayfs + .rename( + Context::default(), + root, + &file2_name, + invalid_parent, + &new_name, + 0, + ) + .is_err()); + + // Test renaming directory to non-empty directory + let _ = CString::new("dir1_new")?; + assert!(overlayfs + .rename(Context::default(), root, &dir1_name, root, &file2_name, 0,) + .is_err()); + + Ok(()) +} + +#[test] +fn test_rename_whiteout_flag() -> io::Result<()> { + // Create test layers with file in lower layer + let lower_files = vec![("file1.txt", false, 0o644)]; + let upper_files = vec![]; + let layers = vec![lower_files, upper_files]; + let (overlayfs, temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Use the whiteout flag + let flags = bindings::LINUX_RENAME_WHITEOUT; + overlayfs.rename( + Context::default(), + root, + &old_name, + root, + &new_name, + flags as u32, + )?; + + // Verify that lookup for the old name fails + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Check that a whiteout file is created in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + // For root parent, the whiteout should be at the top layer root with prefix '.wh.' + let whiteout_path = top_layer.join(".wh.file1.txt"); + let meta = fs::metadata(&whiteout_path)?; + // Updated check: expect a regular file with mode 0o600 + assert!( + meta.file_type().is_file(), + "Expected whiteout to be a regular file" + ); + + Ok(()) +} + +#[test] +fn test_rename_nested_files() -> io::Result<()> { + // Create test layers with nested structure + let files = vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir2", true, 0o755), + ]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(vec![files])?; + + let root = 1; + let dir1_name = CString::new("dir1")?; + let dir2_name = CString::new("dir2")?; + + // Lookup directory inodes + let dir1_entry = overlayfs.lookup(Context::default(), root, &dir1_name)?; + let dir2_entry = overlayfs.lookup(Context::default(), root, &dir2_name)?; + + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename file between directories + overlayfs.rename( + Context::default(), + dir1_entry.inode, + &old_name, + dir2_entry.inode, + &new_name, + 0, + )?; + + // Verify old location is empty + assert!(overlayfs + .lookup(Context::default(), dir1_entry.inode, &old_name) + .is_err()); + + // Verify new location has the file + let entry = overlayfs.lookup(Context::default(), dir2_entry.inode, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_complex_layers() -> io::Result<()> { + // Create test layers with complex structure + let lower_files = vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2.txt", false, 0o644), + ]; + let middle_files = vec![("dir3", true, 0o755), ("dir3/file3.txt", false, 0o644)]; + let upper_files = vec![("dir4", true, 0o755), ("dir4/file4.txt", false, 0o644)]; + let layers = vec![lower_files, middle_files, upper_files]; + let (overlayfs, temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + + // Test renaming between different layer directories + let dir1_name = CString::new("dir1")?; + let dir4_name = CString::new("dir4")?; + let dir1_entry = overlayfs.lookup(Context::default(), root, &dir1_name)?; + let dir4_entry = overlayfs.lookup(Context::default(), root, &dir4_name)?; + + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename from lower to upper layer directory + overlayfs.rename( + Context::default(), + dir1_entry.inode, + &old_name, + dir4_entry.inode, + &new_name, + 0, + )?; + + // Verify file moved correctly + assert!(overlayfs + .lookup(Context::default(), dir1_entry.inode, &old_name) + .is_err()); + let entry = overlayfs.lookup(Context::default(), dir4_entry.inode, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Check whiteout file in the old parent's directory (dir1) in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + let whiteout_path = top_layer.join("dir1").join(".wh.file1.txt"); + assert!( + fs::metadata(&whiteout_path).is_ok(), + "Expected whiteout file at {:?}", + whiteout_path + ); + + Ok(()) +} + +#[test] +fn test_create_basic() -> io::Result<()> { + // Create test layers: + // Single layer with a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Create a new file in root + let file_name = CString::new("new_file.txt").unwrap(); + let ctx = Context::default(); + let (entry, handle, _) = + fs.create(ctx, 1, &file_name, 0o644, 0, 0o022, Extensions::default())?; + + // Verify the file was created with correct mode + let entry_mode = entry.attr.st_mode as u32; + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, 0o644 & !0o022); + + // Verify we can look it up + let lookup_entry = fs.lookup(ctx, 1, &file_name)?; + let lookup_mode = lookup_entry.attr.st_mode as u32; + assert_eq!(lookup_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Verify the file exists on disk in the top layer + let file_path = temp_dirs.last().unwrap().path().join("new_file.txt"); + assert!(file_path.exists()); + assert!(file_path.is_file()); + + // If we got a handle, release it + if let Some(h) = handle { + fs.release(ctx, entry.inode, 0, h, false, false, None)?; + } + + Ok(()) +} + +#[test] +fn test_create_nested() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![("dir3", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create file in dir1 (should trigger copy-up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let file_name = CString::new("new_file.txt").unwrap(); + let (entry, handle, _) = fs.create( + ctx, + dir1_entry.inode, + &file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + )?; + let entry_mode = entry.attr.st_mode as u32; + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Test 2: Create file in dir2 (middle layer, should trigger copy-up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_file_name = CString::new("middle_file.txt").unwrap(); + let (middle_entry, middle_handle, _) = fs.create( + ctx, + dir2_entry.inode, + &middle_file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + )?; + let middle_mode = middle_entry.attr.st_mode as u32; + assert_eq!(middle_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Test 3: Create file in dir3 (top layer, no copy-up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + let top_file_name = CString::new("top_file.txt").unwrap(); + let (top_entry, top_handle, _) = fs.create( + ctx, + dir3_entry.inode, + &top_file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + )?; + let top_mode = top_entry.attr.st_mode as u32; + assert_eq!(top_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Verify all files exist in appropriate layers + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir1/new_file.txt").exists()); + assert!(top_layer.join("dir2/middle_file.txt").exists()); + assert!(top_layer.join("dir3/top_file.txt").exists()); + + // Release handles + if let Some(h) = handle { + fs.release(ctx, entry.inode, 0, h, false, false, None)?; + } + if let Some(h) = middle_handle { + fs.release(ctx, middle_entry.inode, 0, h, false, false, None)?; + } + if let Some(h) = top_handle { + fs.release(ctx, top_entry.inode, 0, h, false, false, None)?; + } + + Ok(()) +} + +#[test] +fn test_create_with_flags() -> io::Result<()> { + // Create test layers with a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test different flag combinations + let test_cases = vec![ + ("file_rdonly.txt", libc::O_RDONLY, 0o644), + ("file_wronly.txt", libc::O_WRONLY, 0o644), + ("file_rdwr.txt", libc::O_RDWR, 0o644), + ("file_append.txt", libc::O_WRONLY | libc::O_APPEND, 0o644), + ("file_trunc.txt", libc::O_WRONLY | libc::O_TRUNC, 0o644), + ("file_excl.txt", libc::O_WRONLY | libc::O_EXCL, 0o644), + ]; + + for (name, flags, mode) in test_cases { + let file_name = CString::new(name).unwrap(); + let (entry, handle, _) = fs.create( + ctx, + 1, + &file_name, + mode, + flags as u32, + 0o022, + Extensions::default(), + )?; + + // Verify file creation + let entry_mode = entry.attr.st_mode as u32; + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, mode & !0o022); + + // Verify file exists + let file_path = temp_dirs.last().unwrap().path().join(name); + assert!(file_path.exists()); + assert!(file_path.is_file()); + + // Release handle if we got one + if let Some(h) = handle { + fs.release(ctx, entry.inode, 0, h, false, false, None)?; + } + } + + Ok(()) +} + +#[test] +fn test_create_existing_name() -> io::Result<()> { + // Create test layers with existing files + let layers = vec![vec![ + ("dir1", true, 0o755), + ("existing_file.txt", false, 0o644), + ]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + let file_name = CString::new("existing_file.txt").unwrap(); + + // Try to create a file with existing name without O_EXCL + match fs.create( + ctx, + 1, + &file_name, + 0o644, + libc::O_WRONLY as u32, + 0o022, + Extensions::default(), + ) { + Ok(_) => panic!("Expected create with existing name to fail"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + Ok(()) +} + +#[test] +fn test_create_invalid_parent() -> io::Result<()> { + // Create test layers + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test with invalid parent inode + let file_name = CString::new("test.txt").unwrap(); + let invalid_inode = 999999; + match fs.create( + ctx, + invalid_inode, + &file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + ) { + Ok(_) => panic!("Expected create with invalid parent to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + Ok(()) +} + +#[test] +fn test_mknod_basic() -> io::Result<()> { + // Create test layers with a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test creating different types of nodes + let test_cases: Vec<(&str, u32, &str)> = vec![ + ("fifo1", libc::S_IFIFO as u32 | 0o644, "named pipe"), + ("sock1", libc::S_IFSOCK as u32 | 0o644, "unix domain socket"), + ]; + + for (name, mode, node_type) in test_cases { + let node_name = CString::new(name).unwrap(); + let entry = fs.mknod(ctx, 1, &node_name, mode, 0, 0o022, Extensions::default())?; + + // Verify node creation + let entry_mode = entry.attr.st_mode as u32; + #[cfg(target_os = "linux")] + assert_eq!(entry_mode & libc::S_IFMT as u32, mode & libc::S_IFMT as u32); + #[cfg(target_os = "macos")] + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, (0o644 & !0o022) as u32); + + // Verify node exists with correct type + let node_path = temp_dirs.last().unwrap().path().join(name); + assert!(node_path.exists()); + } + + Ok(()) +} + +#[test] +fn test_mknod_nested() -> io::Result<()> { + // Create test layers with complex structure + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![("dir2", true, 0o755)], + vec![("dir3", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Create nodes in different directories + let test_cases = vec![ + ("dir1", "fifo1", libc::S_IFIFO as u32 | 0o644), + ("dir2", "sock1", libc::S_IFSOCK as u32 | 0o644), + ("dir3", "fifo2", libc::S_IFIFO as u32 | 0o644), + ]; + + for (dir, name, mode) in test_cases { + let dir_name = CString::new(dir).unwrap(); + let dir_entry = fs.lookup(ctx, 1, &dir_name)?; + let node_name = CString::new(name).unwrap(); + + let entry = fs.mknod( + ctx, + dir_entry.inode, + &node_name, + mode, + 0, + 0o022, + Extensions::default(), + )?; + + // Verify node creation + let entry_mode = entry.attr.st_mode as u32; + #[cfg(target_os = "linux")] + assert_eq!(entry_mode & libc::S_IFMT as u32, mode & libc::S_IFMT as u32); + #[cfg(target_os = "macos")] + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, (0o644 & !0o022) as u32); + + // Verify node exists in the top layer + let node_path = temp_dirs.last().unwrap().path().join(dir).join(name); + assert!(node_path.exists()); + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs new file mode 100644 index 000000000..3cc80511b --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs @@ -0,0 +1,455 @@ +use std::{ffi::CString, io}; + +use crate::virtio::{fs::filesystem::{Context, FileSystem}, fuse::FsOptions}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_lookup_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: file1, dir1/file2 + // Upper layer: file3 + let layers = vec![ + vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("dir1/file2", false, 0o644), + ], + vec![("file3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test lookup in top layer + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test lookup in lower layer + let file1_name = CString::new("file1").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file1_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test lookup of directory + let dir1_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(Context::default(), 1, &dir1_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + Ok(()) +} + +#[test] +fn test_lookup_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: file1, file2 + // Upper layer: .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644), ("file2", false, 0o644)], + vec![(".wh.file1", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test lookup of whited-out file + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(Context::default(), 1, &file1_name).is_err()); + + // Test lookup of non-whited-out file + let file2_name = CString::new("file2").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file2_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_opaque_dir() -> io::Result<()> { + // Create test layers: + // Lower layer: dir1/file1, dir1/file2 + // Upper layer: dir1/.wh..wh..opq, dir1/file3 + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), + ("dir1/file3", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Lookup dir1 first + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + + // Test lookup of file in opaque directory + // file1 and file2 should not be visible + let file1_name = CString::new("file1").unwrap(); + assert!(fs + .lookup(Context::default(), dir1_entry.inode, &file1_name) + .is_err()); + + let file2_name = CString::new("file2").unwrap(); + assert!(fs + .lookup(Context::default(), dir1_entry.inode, &file2_name) + .is_err()); + + // file3 should be visible + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), dir1_entry.inode, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_multiple_layers() -> io::Result<()> { + // Create test layers: + // Lower layer 1: file1 + // Lower layer 2: file2 + // Upper layer: file3 + let layers = vec![ + vec![("file1", false, 0o644)], + vec![("file2", false, 0o644)], + vec![("file3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test lookup in each layer + let file1_name = CString::new("file1").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file1_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + let file2_name = CString::new("file2").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file2_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_nested_whiteouts() -> io::Result<()> { + // Create test layers: + // Lower layer: dir1/file1, dir2/file2 + // Middle layer: dir1/.wh.file1, .wh.dir2 + // Upper layer: dir1/file3 + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh.file1", false, 0o644), + (".wh.dir2", false, 0o644), + ], + vec![("dir1", true, 0o755), ("dir1/file3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Lookup dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + + // file1 should be whited out + let file1_name = CString::new("file1").unwrap(); + assert!(fs + .lookup(Context::default(), dir1_entry.inode, &file1_name) + .is_err()); + + // file3 should be visible + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), dir1_entry.inode, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // dir2 should be whited out + let dir2_name = CString::new("dir2").unwrap(); + assert!(fs.lookup(Context::default(), 1, &dir2_name).is_err()); + + Ok(()) +} + +#[test] +fn test_lookup_complex_layers() -> io::Result<()> { + // Create test layers with complex directory structure: + // Layer 0 (bottom): bar, bar/hi, bar/hi/txt + // Layer 1: foo, foo/hello, bar + // Layer 2: bar, bar/hi, bar/hi/xml + // Layer 3 (top): bar, bar/hello, bar/hi, bar/hi/json + let layers = vec![ + vec![ + ("bar", true, 0o755), + ("bar/hi", true, 0o755), + ("bar/hi/txt", false, 0o644), + ], + vec![ + ("foo", true, 0o755), + ("foo/hello", false, 0o644), + ("bar", true, 0o755), + ], + vec![ + ("bar", true, 0o755), + ("bar/hi", true, 0o755), + ("bar/hi/xml", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/hello", false, 0o644), + ("bar/hi", true, 0o755), + ("bar/hi/json", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // First lookup 'bar' directory + let bar_name = CString::new("bar").unwrap(); + let bar_entry = fs.lookup(Context::default(), 1, &bar_name)?; + assert_eq!(bar_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Then lookup 'hi' in bar directory + let hi_name = CString::new("hi").unwrap(); + let hi_entry = fs.lookup(Context::default(), bar_entry.inode, &hi_name)?; + assert_eq!(hi_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Finally lookup 'txt' in bar/hi directory - should find it in layer 0 + let txt_name = CString::new("txt").unwrap(); + let txt_entry = fs.lookup(Context::default(), hi_entry.inode, &txt_name)?; + assert_eq!(txt_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Verify we can also find files from other layers + // Lookup 'json' in bar/hi - should find it in layer 3 (top) + let json_name = CString::new("json").unwrap(); + let json_entry = fs.lookup(Context::default(), hi_entry.inode, &json_name)?; + assert_eq!(json_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'xml' in bar/hi - should find it in layer 2 + let xml_name = CString::new("xml").unwrap(); + let xml_entry = fs.lookup(Context::default(), hi_entry.inode, &xml_name)?; + assert_eq!(xml_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'hello' in bar - should find it in layer 3 + let hello_name = CString::new("hello").unwrap(); + let hello_entry = fs.lookup(Context::default(), bar_entry.inode, &hello_name)?; + assert_eq!(hello_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'foo' in root - should find it in layer 1 + let foo_name = CString::new("foo").unwrap(); + let foo_entry = fs.lookup(Context::default(), 1, &foo_name)?; + assert_eq!(foo_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'hello' in foo - should find it in layer 1 + let foo_hello_name = CString::new("hello").unwrap(); + let foo_hello_entry = fs.lookup(Context::default(), foo_entry.inode, &foo_hello_name)?; + assert_eq!(foo_hello_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_complex_opaque_dirs() -> io::Result<()> { + // Create test layers with complex directory structure and opaque directories: + // Layer 0 (bottom): + // - bar/ + // - bar/file1 + // - bar/subdir/ + // - bar/subdir/bottom_file + // - other/ + // - other/file + // Layer 1: + // - bar/ (with opaque marker) + // - bar/file2 + // - extra/ + // - extra/data + // Layer 2 (top): + // - bar/ + // - bar/file3 + // - bar/subdir/ + // - bar/subdir/top_file + // - other/ + // - other/new_file + + let layers = vec![ + vec![ + ("bar", true, 0o755), + ("bar/file1", false, 0o644), + ("bar/subdir", true, 0o755), + ("bar/subdir/bottom_file", false, 0o644), + ("other", true, 0o755), + ("other/file", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/.wh..wh..opq", false, 0o644), + ("bar/file2", false, 0o644), + ("extra", true, 0o755), + ("extra/data", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/file3", false, 0o644), + ("bar/subdir", true, 0o755), + ("bar/subdir/top_file", false, 0o644), + ("other", true, 0o755), + ("other/new_file", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // First lookup 'bar' directory + let bar_name = CString::new("bar").unwrap(); + let bar_entry = fs.lookup(Context::default(), 1, &bar_name)?; + assert_eq!(bar_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'file1' in bar - should NOT be found due to opaque marker in layer 1 + let file1_name = CString::new("file1").unwrap(); + let file1_result = fs.lookup(Context::default(), bar_entry.inode, &file1_name); + assert!( + file1_result.is_err(), + "file1 should be hidden by opaque directory" + ); + + // Lookup 'file2' in bar - should be found in layer 1 + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), bar_entry.inode, &file2_name)?; + assert_eq!(file2_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'file3' in bar - should be found in layer 2 + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(Context::default(), bar_entry.inode, &file3_name)?; + assert_eq!(file3_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'subdir' in bar - should be found in layer 2, not layer 0 + // because of the opaque marker in layer 1 + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(Context::default(), bar_entry.inode, &subdir_name)?; + assert_eq!(subdir_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'bottom_file' in bar/subdir - should NOT be found due to opaque marker + let bottom_file_name = CString::new("bottom_file").unwrap(); + let bottom_file_result = fs.lookup(Context::default(), subdir_entry.inode, &bottom_file_name); + assert!( + bottom_file_result.is_err(), + "bottom_file should be hidden by opaque directory" + ); + + // Lookup 'top_file' in bar/subdir - should be found in layer 2 + let top_file_name = CString::new("top_file").unwrap(); + let top_file_entry = fs.lookup(Context::default(), subdir_entry.inode, &top_file_name)?; + assert_eq!(top_file_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'other' in root - should be found + let other_name = CString::new("other").unwrap(); + let other_entry = fs.lookup(Context::default(), 1, &other_name)?; + assert_eq!(other_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'file' in other - should be found in layer 0 + // (other directory is not affected by the opaque marker in bar) + let other_file_name = CString::new("file").unwrap(); + let other_file_entry = fs.lookup(Context::default(), other_entry.inode, &other_file_name)?; + assert_eq!(other_file_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'extra' in root - should be found in layer 1 + let extra_name = CString::new("extra").unwrap(); + let extra_entry = fs.lookup(Context::default(), 1, &extra_name)?; + assert_eq!(extra_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + Ok(()) +} + +#[test] +fn test_lookup_opaque_with_empty_subdir() -> io::Result<()> { + // Create test layers: + // Lower layer: + // - bar/ + // - bar/hello/ + // - bar/hello/txt + // Upper layer: + // - bar/ + // - bar/.wh..wh..opq + // - bar/hello/ (empty directory) + let layers = vec![ + vec![ + ("bar", true, 0o755), + ("bar/hello", true, 0o755), + ("bar/hello/txt", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/.wh..wh..opq", false, 0o644), + ("bar/hello", true, 0o755), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // First lookup 'bar' directory + let bar_name = CString::new("bar").unwrap(); + let bar_entry = fs.lookup(Context::default(), 1, &bar_name)?; + assert_eq!(bar_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Then lookup 'hello' in bar directory + let hello_name = CString::new("hello").unwrap(); + let hello_entry = fs.lookup(Context::default(), bar_entry.inode, &hello_name)?; + assert_eq!(hello_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Finally lookup 'txt' in bar/hello directory + // This should fail because the opaque marker in bar/ hides everything from lower layers + let txt_name = CString::new("txt").unwrap(); + let txt_result = fs.lookup(Context::default(), hello_entry.inode, &txt_name); + assert!( + txt_result.is_err(), + "txt should be hidden by opaque directory marker in bar/" + ); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs new file mode 100644 index 000000000..355fc7d33 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs @@ -0,0 +1,904 @@ +use std::{collections::HashSet, ffi::CString, fs, io}; + +use crate::virtio::{bindings::{self, LINUX_ENODATA, LINUX_ENOSYS}, fs::filesystem::{Context, FileSystem, GetxattrReply, ListxattrReply}, fuse::{FsOptions, SetattrValid}, linux_errno::LINUX_ERANGE, macos::overlayfs::{Config, OverlayFs}}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_getattr_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 (mode 0644), dir1 (mode 0755), shadowed (mode 0644) + // Upper layer: file2 (mode 0600), shadowed (mode 0600) - shadows lower layer's shadowed + let layers = vec![ + vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("shadowed", false, 0o644), + ], + vec![ + ("file2", false, 0o600), + ("shadowed", false, 0o600), // This shadows the lower layer's shadowed file + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test getattr on file in lower layer + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + let (file1_attr, _) = fs.getattr(Context::default(), file1_entry.inode, None)?; + assert_eq!(file1_attr.st_mode & 0o777, 0o644); + assert_eq!(file1_attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test getattr on directory + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let (dir1_attr, _) = fs.getattr(Context::default(), dir1_entry.inode, None)?; + assert_eq!(dir1_attr.st_mode & 0o777, 0o755); + assert_eq!(dir1_attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Test getattr on file in upper layer + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), 1, &file2_name)?; + let (file2_attr, _) = fs.getattr(Context::default(), file2_entry.inode, None)?; + assert_eq!(file2_attr.st_mode & 0o777, 0o600); + assert_eq!(file2_attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test getattr on shadowed file - should get attributes from upper layer + let shadowed_name = CString::new("shadowed").unwrap(); + let shadowed_entry = fs.lookup(Context::default(), 1, &shadowed_name)?; + let (shadowed_attr, _) = fs.getattr(Context::default(), shadowed_entry.inode, None)?; + assert_eq!( + shadowed_attr.st_mode & 0o777, + 0o600, + "Should get mode from upper layer's shadowed file" + ); + assert_eq!(shadowed_attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_getattr_invalid_inode() -> io::Result<()> { + // Create a simple test layer + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test getattr with invalid inode + let invalid_inode = 999999; + let result = fs.getattr(Context::default(), invalid_inode, None); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().raw_os_error(), Some(libc::EBADF)); + + Ok(()) +} + +#[test] +fn test_getattr_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 + // Upper layer: .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Try to lookup and getattr whited-out file + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(Context::default(), 1, &file1_name).is_err()); + + Ok(()) +} + +#[test] +fn test_getattr_timestamps() -> io::Result<()> { + // Create test layers with a single file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Get the file's attributes + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + let (file1_attr, timeout) = fs.getattr(Context::default(), file1_entry.inode, None)?; + + // Verify that timestamps are present + assert!(file1_attr.st_atime > 0); + assert!(file1_attr.st_mtime > 0); + assert!(file1_attr.st_ctime > 0); + + // Verify that the timeout matches the configuration + assert_eq!(timeout, fs.get_config().attr_timeout); + + Ok(()) +} + +#[test] +fn test_getattr_complex() -> io::Result<()> { + // Create test layers with complex directory structure and various shadowing/opaque scenarios: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (mode 0644) + // - dir1/subdir/ + // - dir1/subdir/bottom_file (mode 0644) + // - dir2/ + // - dir2/file2 (mode 0644) + // Layer 1 (middle): + // - dir1/ (with opaque marker) + // - dir1/file1 (mode 0600) - shadows bottom but visible due to opaque + // - dir1/middle_file (mode 0600) + // - dir2/file2 (mode 0600) - shadows bottom + // Layer 2 (top): + // - dir1/ + // - dir1/top_file (mode 0666) + // - dir2/ (with opaque marker) + // - dir2/new_file (mode 0666) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), // Makes dir1 opaque + ("dir1/file1", false, 0o600), // Shadows but visible due to opaque + ("dir1/middle_file", false, 0o600), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o600), // Shadows bottom layer + ], + vec![ + ("dir1", true, 0o755), + ("dir1/top_file", false, 0o666), + ("dir2", true, 0o755), + ("dir2/.wh..wh..opq", false, 0o644), // Makes dir2 opaque + ("dir2/new_file", false, 0o666), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test 1: Files in dir1 (with opaque marker in middle layer) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + + // 1a. file1 should have mode 0600 from middle layer (due to opaque marker), not 0644 from bottom + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), dir1_entry.inode, &file1_name)?; + let (file1_attr, _) = fs.getattr(Context::default(), file1_entry.inode, None)?; + assert_eq!( + file1_attr.st_mode & 0o777, + 0o600, + "file1 should have mode from middle layer due to opaque marker" + ); + + // 1b. bottom_file should not be visible due to opaque marker in middle layer + let bottom_file_name = CString::new("bottom_file").unwrap(); + assert!( + fs.lookup(Context::default(), dir1_entry.inode, &bottom_file_name) + .is_err(), + "bottom_file should be hidden by opaque marker" + ); + + // 1c. middle_file should be visible with mode 0600 + let middle_file_name = CString::new("middle_file").unwrap(); + let middle_file_entry = fs.lookup(Context::default(), dir1_entry.inode, &middle_file_name)?; + let (middle_file_attr, _) = fs.getattr(Context::default(), middle_file_entry.inode, None)?; + assert_eq!(middle_file_attr.st_mode & 0o777, 0o600); + + // 1d. top_file should be visible with mode 0666 + let top_file_name = CString::new("top_file").unwrap(); + let top_file_entry = fs.lookup(Context::default(), dir1_entry.inode, &top_file_name)?; + let (top_file_attr, _) = fs.getattr(Context::default(), top_file_entry.inode, None)?; + assert_eq!(top_file_attr.st_mode & 0o777, 0o666); + + // Test 2: Files in dir2 (with opaque marker in top layer) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + + // 2a. file2 from bottom and middle layers should not be visible due to opaque marker in top + let file2_name = CString::new("file2").unwrap(); + assert!( + fs.lookup(Context::default(), dir2_entry.inode, &file2_name) + .is_err(), + "file2 should be hidden by opaque marker in top layer" + ); + + // 2b. new_file should be visible with mode 0666 + let new_file_name = CString::new("new_file").unwrap(); + let new_file_entry = fs.lookup(Context::default(), dir2_entry.inode, &new_file_name)?; + let (new_file_attr, _) = fs.getattr(Context::default(), new_file_entry.inode, None)?; + assert_eq!(new_file_attr.st_mode & 0o777, 0o666); + + // Test 3: Directory attributes + // 3a. dir1 should exist and be a directory + let (dir1_attr, _) = fs.getattr(Context::default(), dir1_entry.inode, None)?; + assert_eq!(dir1_attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + assert_eq!(dir1_attr.st_mode & 0o777, 0o755); + + // 3b. dir2 should exist and be a directory + let (dir2_attr, _) = fs.getattr(Context::default(), dir2_entry.inode, None)?; + assert_eq!(dir2_attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + assert_eq!(dir2_attr.st_mode & 0o777, 0o755); + + Ok(()) +} + +#[test] +fn test_setattr_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 (mode 0644) + // Upper layer: file2 (mode 0600) + let layers = vec![vec![("file1", false, 0o644)], vec![("file2", false, 0o600)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, true)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test setattr on file in upper layer + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), 1, &file2_name)?; + + // Change mode to 0640 + let mut attr = file2_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o640; + let valid = SetattrValid::MODE; + let (new_attr, _) = fs.setattr(Context::default(), file2_entry.inode, attr, None, valid)?; + assert_eq!(new_attr.st_mode & 0o777, 0o640); + + // Verify the change was applied to the filesystem + let (verify_attr, _) = fs.getattr(Context::default(), file2_entry.inode, None)?; + assert_eq!(verify_attr.st_mode & 0o777, 0o640); + + Ok(()) +} + +#[test] +fn test_setattr_copy_up() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 (mode 0644) + // Upper layer: empty (file1 will be copied up) + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, true)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test setattr on file in lower layer (should trigger copy_up) + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + + // Change mode to 0640 + let mut attr = file1_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o640; + let valid = SetattrValid::MODE; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + assert_eq!(new_attr.st_mode & 0o777, 0o640); + + Ok(()) +} + +#[test] +fn test_setattr_timestamps() -> io::Result<()> { + // Create test layers with a single file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Get the file's entry + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + + // Set specific timestamps + let mut attr = file1_entry.attr; + attr.st_atime = 12345; + attr.st_atime_nsec = 67890; + attr.st_mtime = 98765; + attr.st_mtime_nsec = 43210; + + let valid = SetattrValid::ATIME | SetattrValid::MTIME; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + + // Verify timestamps were set + assert_eq!(new_attr.st_atime, 12345); + assert_eq!(new_attr.st_atime_nsec, 67890); + assert_eq!(new_attr.st_mtime, 98765); + assert_eq!(new_attr.st_mtime_nsec, 43210); + + Ok(()) +} + +#[test] +fn test_setattr_size() -> io::Result<()> { + // Create test layers with a single file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Get the file's entry + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + + // Set file size to 1000 bytes + let mut attr = file1_entry.attr; + attr.st_size = 1000; + let valid = SetattrValid::SIZE; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + + // Verify size was set + assert_eq!(new_attr.st_size, 1000); + + // Verify the actual file size on disk + let file_path = temp_dirs[0].path().join("file1"); + let metadata = fs::metadata(file_path)?; + assert_eq!(metadata.len(), 1000); + + Ok(()) +} + +#[test] +fn test_setattr_complex() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (mode 0644) + // - dir1/subdir/ + // - dir1/subdir/bottom_file (mode 0644) + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 (mode 0600) + // Layer 2 (top): + // - dir3/ + // - dir3/file3 (mode 0666) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o600)], + vec![("dir3", true, 0o755), ("dir3/file3", false, 0o666)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test 1: Modify file in bottom layer (should trigger copy_up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), dir1_entry.inode, &file1_name)?; + + // Change mode and size + let mut attr = file1_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o640; + attr.st_size = 2000; + let valid = SetattrValid::MODE | SetattrValid::SIZE; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + + // Verify changes + assert_eq!(new_attr.st_mode & 0o777, 0o640); + assert_eq!(new_attr.st_size, 2000); + + // Test 2: Modify file in middle layer (should trigger copy_up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), dir2_entry.inode, &file2_name)?; + + // Change timestamps + let mut attr = file2_entry.attr; + attr.st_atime = 12345; + attr.st_mtime = 67890; + let valid = SetattrValid::ATIME | SetattrValid::MTIME; + let (new_attr, _) = fs.setattr(Context::default(), file2_entry.inode, attr, None, valid)?; + + // Verify changes + assert_eq!(new_attr.st_atime, 12345); + assert_eq!(new_attr.st_mtime, 67890); + + // Verify file was copied up + let top_file2_path = temp_dirs[2].path().join("dir2").join("file2"); + assert!(top_file2_path.exists()); + + // Test 3: Modify file in top layer (no copy_up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(Context::default(), 1, &dir3_name)?; + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(Context::default(), dir3_entry.inode, &file3_name)?; + + // Change mode + let mut attr = file3_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o644; + let valid = SetattrValid::MODE; + let (new_attr, _) = fs.setattr(Context::default(), file3_entry.inode, attr, None, valid)?; + + // Verify changes + assert_eq!(new_attr.st_mode & 0o777, 0o644); + + Ok(()) +} + +#[test] +fn test_xattrs() -> io::Result<()> { + // Create test layers with nested structure: + // Layer 0 (bottom): dir1/file1.txt, dir2/file2.txt + // Layer 1 (middle): dir1/file3.txt, dir3/file4.txt + // Layer 2 (top): dir1/file5.txt, dir2/dir4/file6.txt + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2.txt", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file3.txt", false, 0o644), + ("dir3", true, 0o755), + ("dir3/file4.txt", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file5.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/dir4", true, 0o755), + ("dir2/dir4/file6.txt", false, 0o644), + ], + ]; + + // Enable xattr in config + let mut cfg = Config::default(); + cfg.xattr = true; + + // Create overlay filesystem with the specified layers + let temp_dirs = layers + .iter() + .map(|layer| helper::setup_test_layer(layer).unwrap()) + .collect::>(); + + let layer_paths = temp_dirs + .iter() + .map(|dir| dir.path().to_path_buf()) + .collect::>(); + + let overlayfs = OverlayFs::new(layer_paths, cfg)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + overlayfs.init(FsOptions::empty())?; + let ctx = Context::default(); + + // ---------- Test setting, getting, listing, and removing xattrs on files in different layers ---------- + + // Look up dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = overlayfs.lookup(ctx, 1, &dir1_name)?; + + // Test file in top layer (dir1/file5.txt) + let file5_name = CString::new("file5.txt").unwrap(); + let file5_entry = overlayfs.lookup(ctx, dir1_entry.inode, &file5_name)?; + + // Test setxattr on top layer file + let xattr_name = CString::new("user.test_attr").unwrap(); + let xattr_value = b"test_value_123"; + overlayfs.setxattr(ctx, file5_entry.inode, &xattr_name, xattr_value, 0)?; + + // Test getxattr + let result = overlayfs.getxattr(ctx, file5_entry.inode, &xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // Test listxattr + let result = overlayfs.listxattr(ctx, file5_entry.inode, 100); + match result { + Ok(ListxattrReply::Names(names)) => { + let mut found = false; + let mut start = 0; + while start < names.len() { + let end = names[start..] + .iter() + .position(|&b| b == 0) + .map(|pos| start + pos) + .unwrap_or(names.len()); + + let attr_name = &names[start..end]; + if attr_name == xattr_name.to_bytes() { + found = true; + break; + } + start = end + 1; + } + assert!(found, "Attribute name not found in listxattr result"); + } + Err(e) => panic!("Expected ListxattrReply::Names, got error: {:?}", e), + _ => panic!("Unexpected result from listxattr"), + } + + // Test setting another attribute + let xattr_name2 = CString::new("user.another_attr").unwrap(); + let xattr_value2 = b"another_value_456"; + overlayfs.setxattr(ctx, file5_entry.inode, &xattr_name2, xattr_value2, 0)?; + + // Verify both attributes are listed + let result = overlayfs.listxattr(ctx, file5_entry.inode, 200); + match result { + Ok(ListxattrReply::Names(names)) => { + let mut attrs = HashSet::new(); + let mut start = 0; + while start < names.len() { + let end = names[start..] + .iter() + .position(|&b| b == 0) + .map(|pos| start + pos) + .unwrap_or(names.len()); + + let attr_name = &names[start..end]; + attrs.insert(attr_name.to_vec()); + start = end + 1; + } + assert!( + attrs.contains(&xattr_name.to_bytes().to_vec()), + "First attribute not found" + ); + assert!( + attrs.contains(&xattr_name2.to_bytes().to_vec()), + "Second attribute not found" + ); + } + Err(e) => panic!("Expected ListxattrReply::Names, got error: {:?}", e), + _ => panic!("Unexpected result from listxattr"), + } + + // Test removexattr + overlayfs.removexattr(ctx, file5_entry.inode, &xattr_name)?; + + // Verify the attribute was removed + let result = overlayfs.listxattr(ctx, file5_entry.inode, 100); + match result { + Ok(ListxattrReply::Names(names)) => { + let mut found = false; + let mut start = 0; + while start < names.len() { + let end = names[start..] + .iter() + .position(|&b| b == 0) + .map(|pos| start + pos) + .unwrap_or(names.len()); + + let attr_name = &names[start..end]; + if attr_name == xattr_name.to_bytes() { + found = true; + break; + } + start = end + 1; + } + assert!(!found, "Attribute should have been removed"); + } + Err(e) => panic!("Expected ListxattrReply::Names, got error: {:?}", e), + _ => panic!("Unexpected result from listxattr"), + } + + // ---------- Test xattrs on files in middle layer (should trigger copy-up) ---------- + + // Look up dir3 + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = overlayfs.lookup(ctx, 1, &dir3_name)?; + + // Test file in middle layer (dir3/file4.txt) + let file4_name = CString::new("file4.txt").unwrap(); + let file4_entry = overlayfs.lookup(ctx, dir3_entry.inode, &file4_name)?; + + // Verify file exists in middle layer before copy-up + let middle_layer_file = temp_dirs[1].path().join("dir3").join("file4.txt"); + assert!( + middle_layer_file.exists(), + "File should exist in middle layer before copy-up" + ); + assert!( + !temp_dirs[2].path().join("dir3").join("file4.txt").exists(), + "File should not exist in top layer before copy-up" + ); + + // This should cause a copy-up operation since the file is in a lower layer + let middle_xattr_name = CString::new("user.middle_attr").unwrap(); + let middle_xattr_value = b"middle_layer_value"; + overlayfs.setxattr( + ctx, + file4_entry.inode, + &middle_xattr_name, + middle_xattr_value, + 0, + )?; + + // Verify file was copied up to top layer + let top_layer_file = temp_dirs[2].path().join("dir3").join("file4.txt"); + assert!( + top_layer_file.exists(), + "File should be copied up to top layer" + ); + + // Verify the attribute was set on the top layer file + let result = overlayfs.getxattr(ctx, file4_entry.inode, &middle_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, middle_xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // Verify the middle layer file still exists and is unchanged (no xattr) + assert!( + middle_layer_file.exists(), + "Original file should still exist in middle layer" + ); + let result = overlayfs.getxattr(ctx, file4_entry.inode, &middle_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!( + value, middle_xattr_value, + "Xattr should be accessible through overlay" + ); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // Try to read the xattr directly from the middle layer file (should not exist) + let middle_layer_path = CString::new(middle_layer_file.to_str().unwrap()).unwrap(); + let mut buf = vec![0; 100]; + let res = unsafe { + libc::getxattr( + middle_layer_path.as_ptr(), + middle_xattr_name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + 0, + 0, + ) + }; + assert!(res < 0, "Xattr should not exist on middle layer file"); + let err = io::Error::last_os_error(); + assert!( + err.raw_os_error().unwrap() == libc::ENOATTR + || err.raw_os_error().unwrap() == libc::ENODATA, + "Expected ENOATTR or ENODATA when reading xattr from middle layer file" + ); + + // ---------- Test xattrs on nested directories ---------- + + // Look up dir2/dir4 + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = overlayfs.lookup(ctx, 1, &dir2_name)?; + + let dir4_name = CString::new("dir4").unwrap(); + let dir4_entry = overlayfs.lookup(ctx, dir2_entry.inode, &dir4_name)?; + + // Set xattr on a nested directory + let dir_xattr_name = CString::new("user.dir_attr").unwrap(); + let dir_xattr_value = b"directory_attribute"; + overlayfs.setxattr(ctx, dir4_entry.inode, &dir_xattr_name, dir_xattr_value, 0)?; + + // Verify the attribute was set + let result = overlayfs.getxattr(ctx, dir4_entry.inode, &dir_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, dir_xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // ---------- Test xattrs on file in deeply nested directory ---------- + + // Get file in nested directory (dir2/dir4/file6.txt) + let file6_name = CString::new("file6.txt").unwrap(); + let file6_entry = overlayfs.lookup(ctx, dir4_entry.inode, &file6_name)?; + + // Set xattr on the nested file + let nested_xattr_name = CString::new("user.nested_attr").unwrap(); + let nested_xattr_value = b"nested_file_value"; + overlayfs.setxattr( + ctx, + file6_entry.inode, + &nested_xattr_name, + nested_xattr_value, + 0, + )?; + + // Verify the attribute was set + let result = overlayfs.getxattr(ctx, file6_entry.inode, &nested_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, nested_xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // ---------- Test error cases ---------- + + // Test getxattr on non-existent attribute + let nonexistent_attr = CString::new("user.nonexistent").unwrap(); + let result = overlayfs.getxattr(ctx, file6_entry.inode, &nonexistent_attr, 100); + match result { + Err(e) => { + let err_code = e.raw_os_error().unwrap(); + assert!( + err_code == LINUX_ENODATA, + "Expected ENODATA, got: {}", + err_code + ); + } + Ok(_) => panic!("Expected error for non-existent attribute"), + } + + // Test getxattr with buffer too small + let result = overlayfs.getxattr(ctx, file6_entry.inode, &nested_xattr_name, 5); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ERANGE, + "Expected ERANGE error" + ); + } + Ok(_) => panic!("Expected ERANGE error for small buffer"), + } + + // Test removexattr on non-existent attribute + let result = overlayfs.removexattr(ctx, file6_entry.inode, &nonexistent_attr); + match result { + Err(e) => { + let err_code = e.raw_os_error().unwrap(); + assert!( + err_code == LINUX_ENODATA, + "Expected ENODATA, got: {}", + err_code + ); + } + Ok(_) => panic!("Expected error for non-existent attribute"), + } + + // Test setting xattr with invalid flags (flag value 2 is XATTR_CREATE, which should fail if attr exists) + let result = overlayfs.setxattr( + ctx, + file6_entry.inode, + &nested_xattr_name, + nested_xattr_value, + bindings::LINUX_XATTR_CREATE as u32, // XATTR_CREATE - should fail on existing attr + ); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + libc::EEXIST, + "Expected EEXIST error" + ); + } + Ok(_) => panic!("Expected EEXIST error for XATTR_CREATE on existing attribute"), + } + + // ---------- Test disabling xattr functionality ---------- + + // Create a new overlayfs with xattr disabled + let mut cfg_no_xattr = Config::default(); + cfg_no_xattr.xattr = false; + + let overlayfs_no_xattr = OverlayFs::new( + temp_dirs + .iter() + .map(|dir| dir.path().to_path_buf()) + .collect(), + cfg_no_xattr, + )?; + + overlayfs_no_xattr.init(FsOptions::empty())?; + + // Look up a file again + let dir1_entry = overlayfs_no_xattr.lookup(ctx, 1, &dir1_name)?; + let file5_entry = overlayfs_no_xattr.lookup(ctx, dir1_entry.inode, &file5_name)?; + + // All xattr operations should return ENOSYS + let result = overlayfs_no_xattr.setxattr(ctx, file5_entry.inode, &xattr_name, b"test", 0); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + let result = overlayfs_no_xattr.getxattr(ctx, file5_entry.inode, &xattr_name, 100); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + let result = overlayfs_no_xattr.listxattr(ctx, file5_entry.inode, 100); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + let result = overlayfs_no_xattr.removexattr(ctx, file5_entry.inode, &xattr_name); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs new file mode 100644 index 000000000..34797bd20 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs @@ -0,0 +1,555 @@ +use std::{ffi::CString, fs, io, os::unix::fs::PermissionsExt, path::PathBuf}; + +use tempfile::TempDir; + +use crate::virtio::{fs::filesystem::{Context, FileSystem}, fuse::FsOptions, macos::overlayfs::{Config, OverlayFs}}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_copy_up_complex() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (mode 0644) + // - dir1/subdir/ + // - dir1/subdir/bottom_file (mode 0644) + // - dir1/symlink -> file1 + // - dir2/ + // - dir2/file2 (mode 0600) + // Layer 1 (middle): + // - dir3/ + // - dir3/middle_file (mode 0666) + // - dir3/nested/ + // - dir3/nested/data (mode 0644) + // Layer 2 (top - initially empty): + // (empty - will be populated by copy_up operations) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o600), + ], + vec![ + ("dir3", true, 0o755), + ("dir3/middle_file", false, 0o666), + ("dir3/nested", true, 0o755), + ("dir3/nested/data", false, 0o644), + ], + vec![], // Empty top layer + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Create symlink in bottom layer + let symlink_path = temp_dirs[0].path().join("dir1").join("symlink"); + std::os::unix::fs::symlink("file1", &symlink_path)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test 1: Copy up a regular file from bottom layer + // First lookup dir1/file1 to get its path_inodes + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let file1_name = CString::new("file1").unwrap(); + let (_, path_inodes) = fs.do_lookup(dir1_entry.inode, &file1_name)?; + + // Perform copy_up + fs.copy_up(&path_inodes)?; + + // Verify the file was copied up correctly + let top_file1_path = temp_dirs[2].path().join("dir1").join("file1"); + let metadata = fs::metadata(&top_file1_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o644); + assert!(top_file1_path.exists()); + + // Test 2: Copy up a directory with nested content + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(Context::default(), 1, &dir3_name)?; + let nested_name = CString::new("nested").unwrap(); + let (nested_entry, nested_path_inodes) = fs.do_lookup(dir3_entry.inode, &nested_name)?; + + // Copy up the nested directory + fs.copy_up(&nested_path_inodes)?; + + // Verify the directory structure was copied + let top_nested_path = temp_dirs[2].path().join("dir3").join("nested"); + assert!(top_nested_path.exists()); + assert!(top_nested_path.is_dir()); + let metadata = fs::metadata(&top_nested_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o755); + + // Test 3: Copy up a file from the middle layer + let middle_file_name = CString::new("middle_file").unwrap(); + let (_, middle_file_path_inodes) = fs.do_lookup(dir3_entry.inode, &middle_file_name)?; + + // Perform copy_up + fs.copy_up(&middle_file_path_inodes)?; + + // Verify the file was copied up correctly + let top_middle_file_path = temp_dirs[2].path().join("dir3").join("middle_file"); + let metadata = fs::metadata(&top_middle_file_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o666); + assert!(top_middle_file_path.exists()); + + // Test 4: Copy up a nested file + let data_name = CString::new("data").unwrap(); + let (_, data_path_inodes) = fs.do_lookup(nested_entry.inode, &data_name)?; + + // Perform copy_up + fs.copy_up(&data_path_inodes)?; + + // Verify the nested file was copied up correctly + let top_data_path = temp_dirs[2].path().join("dir3").join("nested").join("data"); + let metadata = fs::metadata(&top_data_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o644); + assert!(top_data_path.exists()); + + // Test 5: Verify parent directories are created as needed + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + let file2_name = CString::new("file2").unwrap(); + let (_, file2_path_inodes) = fs.do_lookup(dir2_entry.inode, &file2_name)?; + + // Perform copy_up + fs.copy_up(&file2_path_inodes)?; + + // Verify the directory structure + let top_dir2_path = temp_dirs[2].path().join("dir2"); + assert!(top_dir2_path.exists()); + assert!(top_dir2_path.is_dir()); + let top_file2_path = top_dir2_path.join("file2"); + let metadata = fs::metadata(&top_file2_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o600); + assert!(top_file2_path.exists()); + + // Test 6: Copy up a symbolic link + let symlink_name = CString::new("symlink").unwrap(); + let (_, symlink_path_inodes) = fs.do_lookup(dir1_entry.inode, &symlink_name)?; + + // Perform copy_up + fs.copy_up(&symlink_path_inodes)?; + + // Verify the symlink was copied up correctly + let top_symlink_path = temp_dirs[2].path().join("dir1").join("symlink"); + assert!(top_symlink_path.exists()); + assert!(fs::symlink_metadata(&top_symlink_path)? + .file_type() + .is_symlink()); + + // Read the symlink target + let target = fs::read_link(&top_symlink_path)?; + assert_eq!(target.to_str().unwrap(), "file1"); + + Ok(()) +} + +#[test] +fn test_copy_up_with_content() -> io::Result<()> { + // Create test layers with files containing specific content: + // Layer 0 (bottom): + // - file1 (contains "bottom layer content") + // - dir1/nested_file1 (contains "nested bottom content") + // Layer 1 (middle): + // - file2 (contains "middle layer content") + // - dir1/nested_file2 (contains "nested middle content") + // Layer 2 (top): + // - file3 (contains "top layer content") + // - dir1/nested_file3 (contains "nested top content") + + // Create temporary directories for each layer + let temp_dirs: Vec = vec![ + TempDir::new().unwrap(), + TempDir::new().unwrap(), + TempDir::new().unwrap(), + ]; + + // Create directory structure in each layer + for dir in &temp_dirs { + fs::create_dir_all(dir.path().join("dir1"))?; + } + + // Create files with content in bottom layer + fs::write(temp_dirs[0].path().join("file1"), "bottom layer content")?; + fs::write( + temp_dirs[0].path().join("dir1").join("nested_file1"), + "nested bottom content", + )?; + + // Create files with content in middle layer + fs::write(temp_dirs[1].path().join("file2"), "middle layer content")?; + fs::write( + temp_dirs[1].path().join("dir1").join("nested_file2"), + "nested middle content", + )?; + + // Create files with content in top layer + fs::write(temp_dirs[2].path().join("file3"), "top layer content")?; + fs::write( + temp_dirs[2].path().join("dir1").join("nested_file3"), + "nested top content", + )?; + + // Set permissions + for dir in &temp_dirs { + fs::set_permissions(dir.path().join("dir1"), fs::Permissions::from_mode(0o755)).ok(); + } + fs::set_permissions( + temp_dirs[0].path().join("file1"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[0].path().join("dir1").join("nested_file1"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[1].path().join("file2"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[1].path().join("dir1").join("nested_file2"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[2].path().join("file3"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[2].path().join("dir1").join("nested_file3"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + + // Create layer paths + let layer_paths: Vec = temp_dirs.iter().map(|d| d.path().to_path_buf()).collect(); + + // Create the overlayfs + let cfg = Config::default(); + let fs = OverlayFs::new(layer_paths, cfg)?; + let ctx = Context::default(); + + // Test 1: Open file1 from bottom layer with write access (should trigger copy-up) + let file1_name = CString::new("file1").unwrap(); + let (_, path_inodes) = fs.do_lookup(1, &file1_name)?; + fs.copy_up(&path_inodes)?; + + // Verify file1 was copied up to the top layer with correct content + let top_file1 = temp_dirs[2].path().join("file1"); + assert!(top_file1.exists()); + let content = fs::read_to_string(&top_file1)?; + assert_eq!(content, "bottom layer content"); + + // Test 2: Open nested_file1 from bottom layer with write access + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let nested_file1_name = CString::new("nested_file1").unwrap(); + let (_, path_inodes) = fs.do_lookup(dir1_entry.inode, &nested_file1_name)?; + fs.copy_up(&path_inodes)?; + + // Verify nested_file1 was copied up to the top layer with correct content + let top_nested_file1 = temp_dirs[2].path().join("dir1").join("nested_file1"); + assert!(top_nested_file1.exists()); + let content = fs::read_to_string(&top_nested_file1)?; + assert_eq!(content, "nested bottom content"); + + // Test 3: Open file2 from middle layer with write access + let file2_name = CString::new("file2").unwrap(); + let (_, path_inodes) = fs.do_lookup(1, &file2_name)?; + fs.copy_up(&path_inodes)?; + + // Verify file2 was copied up to the top layer with correct content + let top_file2 = temp_dirs[2].path().join("file2"); + assert!(top_file2.exists()); + let content = fs::read_to_string(&top_file2)?; + assert_eq!(content, "middle layer content"); + + // Test 4: Open file3 from top layer (no copy-up needed) + let file3_name = CString::new("file3").unwrap(); + let (_, path_inodes) = fs.do_lookup(1, &file3_name)?; + fs.copy_up(&path_inodes)?; + + // Verify file3 content is unchanged + let content = fs::read_to_string(temp_dirs[2].path().join("file3"))?; + assert_eq!(content, "top layer content"); + + // Clean up + fs.destroy(); + + Ok(()) +} + +#[test] +fn test_link_basic() -> io::Result<()> { + // Create test layers with simple structure: + // Layer 0 (bottom): + // - file1 + // Layer 1 (top): + // - dir1/ + let layers = vec![vec![("file1", false, 0o644)], vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Create hard link from file1 to dir1/link1 + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let link1_name = CString::new("link1").unwrap(); + let link1_entry = fs.link(ctx, file1_entry.inode, dir1_entry.inode, &link1_name)?; + + // Verify the link was created + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir1/link1").exists()); + + // Verify the link has the same inode number as the original file + let updated_file1_entry = fs.lookup(ctx, 1, &file1_name)?; + assert_eq!(link1_entry.attr.st_ino, updated_file1_entry.attr.st_ino); + assert_eq!(link1_entry.attr.st_nlink, updated_file1_entry.attr.st_nlink); + + Ok(()) +} + +#[test] +fn test_link_multiple_layers() -> io::Result<()> { + // Create test layers with multiple files: + // Layer 0 (bottom): + // - file1 + // - dir1/ + // - dir1/file2 + // Layer 1 (middle): + // - file3 + // Layer 2 (top): + // - dir2/ + let layers = vec![ + vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("dir1/file2", false, 0o644), + ], + vec![("file3", false, 0o644)], + vec![("dir2", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Create links to files from different layers + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(ctx, 1, &file3_name)?; + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + + // Create links in top layer + let link1_name = CString::new("link1").unwrap(); + let link2_name = CString::new("link2").unwrap(); + + let link1_entry = fs.link(ctx, file1_entry.inode, dir2_entry.inode, &link1_name)?; + let link2_entry = fs.link(ctx, file3_entry.inode, dir2_entry.inode, &link2_name)?; + + // Verify the links were created in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir2/link1").exists()); + assert!(top_layer.join("dir2/link2").exists()); + + // Verify source files were copied up + assert!(top_layer.join("file1").exists()); + assert!(top_layer.join("file3").exists()); + + // Verify link attributes + let updated_file1_entry = fs.lookup(ctx, 1, &file1_name)?; + let updated_file3_entry = fs.lookup(ctx, 1, &file3_name)?; + assert_eq!(link1_entry.attr.st_ino, updated_file1_entry.attr.st_ino); + assert_eq!(link2_entry.attr.st_ino, updated_file3_entry.attr.st_ino); + + Ok(()) +} + +#[test] +fn test_link_errors() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): + // - file1 + // - dir1/ + let layers = vec![vec![("file1", false, 0o644), ("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Test linking to invalid parent + let invalid_name = CString::new("link1").unwrap(); + assert!(fs + .link(ctx, file1_entry.inode, 999999, &invalid_name) + .is_err()); + + // Test linking with invalid source inode + assert!(fs + .link(ctx, 999999, dir1_entry.inode, &invalid_name) + .is_err()); + + // Test linking with invalid name + let invalid_name = CString::new("../link1").unwrap(); + assert!(fs + .link(ctx, file1_entry.inode, dir1_entry.inode, &invalid_name) + .is_err()); + + Ok(()) +} + +#[test] +fn test_link_nested() -> io::Result<()> { + // Create test layers with nested structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/file2 + // Layer 1 (top): + // - dir2/ + // - dir2/subdir/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/subdir", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Create links to nested files + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(ctx, subdir_entry.inode, &file2_name)?; + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + + let dir2_subdir_entry = fs.lookup(ctx, dir2_entry.inode, &subdir_name)?; + + // Create links in different locations + let link1_name = CString::new("link1").unwrap(); + let link2_name = CString::new("link2").unwrap(); + + let link1_entry = fs.link(ctx, file1_entry.inode, dir2_entry.inode, &link1_name)?; + let link2_entry = fs.link(ctx, file2_entry.inode, dir2_subdir_entry.inode, &link2_name)?; + + // Verify the links were created + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir2/link1").exists()); + assert!(top_layer.join("dir2/subdir/link2").exists()); + + // Verify source files were copied up + assert!(top_layer.join("dir1/file1").exists()); + assert!(top_layer.join("dir1/subdir/file2").exists()); + + // Verify link attributes + let updated_file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + let updated_file2_entry = fs.lookup(ctx, subdir_entry.inode, &file2_name)?; + assert_eq!(link1_entry.attr.st_ino, updated_file1_entry.attr.st_ino); + assert_eq!(link2_entry.attr.st_ino, updated_file2_entry.attr.st_ino); + + Ok(()) +} + +#[test] +fn test_link_existing_name() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): + // - file1 + // - dir1/ + // - dir1/existing + let layers = vec![vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("dir1/existing", false, 0o644), + ]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Try to create a link with an existing name + let existing_name = CString::new("existing").unwrap(); + assert!(fs + .link(ctx, file1_entry.inode, dir1_entry.inode, &existing_name) + .is_err()); + + Ok(()) +} + +#[test] +fn test_readlink_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link1 -> target1 + // Upper layer: .wh.link1 (whiteout for link1) + let layers = vec![ + vec![("target1", false, 0o644)], + vec![(".wh.link1", false, 0o644)], // Whiteout file + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Create symlink in bottom layer + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link1"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Try to lookup whited-out symlink (should fail) + let link_name = CString::new("link1").unwrap(); + match fs.lookup(Context::default(), 1, &link_name) { + Ok(_) => panic!("Expected lookup of whited-out symlink to fail"), + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::ENOENT), + "Looking up whited-out symlink should return ENOENT" + ); + } + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs new file mode 100644 index 000000000..c09d6fb83 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs @@ -0,0 +1,380 @@ +use std::{ffi::CString, io}; + +use crate::virtio::fs::filesystem::{Context, Extensions, FileSystem}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_open_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open the file with read-only flags + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_directory() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Open the directory + let (handle, _opts) = fs.open( + ctx, + entry.inode, + (libc::O_RDONLY | libc::O_DIRECTORY) as u32, + )?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_nonexistent() -> io::Result<()> { + // Create a simple overlayfs with a single layer + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to open a non-existent inode + let result = fs.open(ctx, 999, libc::O_RDONLY as u32); + + // Verify it fails with ENOENT + assert!(result.is_err()); + assert_eq!(result.unwrap_err().raw_os_error(), Some(libc::EBADF)); + + Ok(()) +} +#[test] +fn test_open_with_copy_up() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 + // Layer 1 (top): empty + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open the file with write flags, which should trigger copy-up + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDWR as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Verify the file was copied up to the top layer + let top_layer_file = temp_dirs[1].path().join("file1"); + assert!(top_layer_file.exists()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_whiteout() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 + // Layer 1 (top): .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o000)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to lookup the file (should fail because it's whited out) + let file_name = CString::new("file1").unwrap(); + let result = fs.lookup(ctx, 1, &file_name); + + // Verify lookup fails + assert!(result.is_err()); + + // Since we can't directly check the error code with assert_eq! due to Debug trait issues, + // we'll just verify the file doesn't exist by trying to open a non-existent inode + let non_existent_inode = 999; // Use a high number that shouldn't exist + let open_result = fs.open(ctx, non_existent_inode, libc::O_RDONLY as u32); + assert!(open_result.is_err()); + + Ok(()) +} + +#[test] +fn test_open_and_release_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open and close the file multiple times + for _ in 0..5 { + // Open the file + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + // Verify we can still open the file after multiple open/release cycles + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_with_different_flags() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Test different open flags + let flags = [ + libc::O_RDONLY, + libc::O_WRONLY, + libc::O_RDWR, + libc::O_RDONLY | libc::O_NONBLOCK, + libc::O_WRONLY | libc::O_APPEND, + ]; + + for flag in flags.iter() { + // Open the file with the current flag + let (handle, _opts) = fs.open(ctx, entry.inode, *flag as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + Ok(()) +} + +#[test] +fn test_opendir_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Open the directory + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_opendir_nonexistent() -> io::Result<()> { + // Create a simple overlayfs with a single layer + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to open a non-existent inode + let result = fs.opendir(ctx, 999, libc::O_RDONLY as u32); + + // Verify it fails with EBADF + match result { + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::EBADF)); + } + Ok(_) => panic!("Expected error for non-existent inode"), + } + + Ok(()) +} + +#[test] +fn test_opendir_whiteout() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): dir1/ + // Layer 1 (top): .wh.dir1 (whiteout for dir1) + let layers = vec![ + vec![("dir1", true, 0o755)], + vec![(".wh.dir1", false, 0o000)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to lookup the directory (should fail because it's whited out) + let dir_name = CString::new("dir1").unwrap(); + let result = fs.lookup(ctx, 1, &dir_name); + + // Verify lookup fails with ENOENT + match result { + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::ENOENT)); + } + Ok(_) => panic!("Expected error for whited-out directory"), + } + + Ok(()) +} + +#[test] +fn test_opendir_with_copy_up() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): dir1/ + // Layer 1 (top): empty + let layers = vec![vec![("dir1", true, 0o755)], vec![]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // First open the directory normally + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + // Trigger copy-up by creating a new file in the directory + let new_file = CString::new("newfile").unwrap(); + fs.mkdir(ctx, entry.inode, &new_file, 0o755, 0, Extensions::default())?; + + // Verify the directory was copied up to the top layer + let top_layer_dir = temp_dirs[1].path().join("dir1"); + assert!(top_layer_dir.exists()); + assert!(top_layer_dir.is_dir()); + + // Verify we can still open the directory after copy-up + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_opendir_and_release_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Open and close the directory multiple times + for _ in 0..5 { + // Open the directory + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + // Verify we can still open the directory after multiple open/release cycles + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_opendir_with_different_flags() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Test different open flags - only use read-only flags since directories can't be opened for writing + let flags = [ + libc::O_RDONLY | libc::O_DIRECTORY, + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_NONBLOCK, + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_CLOEXEC, + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_NONBLOCK | libc::O_CLOEXEC, + ]; + + for flag in flags.iter() { + // Open the directory with the current flag + let (handle, _opts) = fs.opendir(ctx, entry.inode, *flag as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs new file mode 100644 index 000000000..6d997e1dd --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs @@ -0,0 +1,1266 @@ +use std::{ffi::CString, fs, io}; + +use crate::virtio::{ + fs::filesystem::{Context, FileSystem}, fuse::FsOptions, macos::overlayfs::tests::helper::TestContainer, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_readlink_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: target_file, link -> target_file + let layers = vec![vec![ + ("target_file", false, 0o644), + // Note: symlinks will be created separately below + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Create symlink in bottom layer + let symlink_path = temp_dirs[0].path().join("link"); + std::os::unix::fs::symlink("target_file", &symlink_path)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink + let link_name = CString::new("link").unwrap(); + let link_entry = fs.lookup(Context::default(), 1, &link_name)?; + let target = fs.readlink(Context::default(), link_entry.inode)?; + + assert_eq!(target, b"target_file"); + + Ok(()) +} + +#[test] +fn test_readlink_multiple_layers() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link1 -> target1 + // Middle layer: target2, link2 -> target2 + // Upper layer: target3, link3 -> target3 + let layers = vec![ + vec![("target1", false, 0o644)], + vec![("target2", false, 0o644)], + vec![("target3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + // Create symlinks in each layer + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link1"))?; + std::os::unix::fs::symlink("target2", temp_dirs[1].path().join("link2"))?; + std::os::unix::fs::symlink("target3", temp_dirs[2].path().join("link3"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink for symlink in bottom layer + let link1_name = CString::new("link1").unwrap(); + let link1_entry = fs.lookup(Context::default(), 1, &link1_name)?; + let target1 = fs.readlink(Context::default(), link1_entry.inode)?; + assert_eq!(target1, b"target1"); + + // Test readlink for symlink in middle layer + let link2_name = CString::new("link2").unwrap(); + let link2_entry = fs.lookup(Context::default(), 1, &link2_name)?; + let target2 = fs.readlink(Context::default(), link2_entry.inode)?; + assert_eq!(target2, b"target2"); + + // Test readlink for symlink in top layer + let link3_name = CString::new("link3").unwrap(); + let link3_entry = fs.lookup(Context::default(), 1, &link3_name)?; + let target3 = fs.readlink(Context::default(), link3_entry.inode)?; + assert_eq!(target3, b"target3"); + + Ok(()) +} + +#[test] +fn test_readlink_shadowed() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link -> target1 + // Upper layer: link -> target2 (shadows lower layer's link) + let layers = vec![ + vec![("target1", false, 0o644)], + vec![("target2", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Create symlinks + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link"))?; + std::os::unix::fs::symlink("target2", temp_dirs[1].path().join("link"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink - should get the symlink from upper layer + let link_name = CString::new("link").unwrap(); + let link_entry = fs.lookup(Context::default(), 1, &link_name)?; + let target = fs.readlink(Context::default(), link_entry.inode)?; + + assert_eq!(target, b"target2", "Should read symlink from upper layer"); + + Ok(()) +} + +#[test] +fn test_readlink_nested() -> io::Result<()> { + // Create test layers with nested directory structure: + // Lower layer: + // - dir1/target1 + // - dir1/link1 -> target1 + // - dir2/target2 + // - dir2/subdir/link2 -> ../target2 + let layers = vec![vec![ + ("dir1", true, 0o755), + ("dir1/target1", false, 0o644), + ("dir2", true, 0o755), + ("dir2/target2", false, 0o644), + ("dir2/subdir", true, 0o755), + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + // Create symlinks + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("dir1/link1"))?; + std::os::unix::fs::symlink("../target2", temp_dirs[0].path().join("dir2/subdir/link2"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink for simple symlink in directory + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let link1_name = CString::new("link1").unwrap(); + let link1_entry = fs.lookup(Context::default(), dir1_entry.inode, &link1_name)?; + let target1 = fs.readlink(Context::default(), link1_entry.inode)?; + assert_eq!(target1, b"target1"); + + // Test readlink for symlink with relative path + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(Context::default(), dir2_entry.inode, &subdir_name)?; + let link2_name = CString::new("link2").unwrap(); + let link2_entry = fs.lookup(Context::default(), subdir_entry.inode, &link2_name)?; + let target2 = fs.readlink(Context::default(), link2_entry.inode)?; + assert_eq!(target2, b"../target2"); + + Ok(()) +} + +#[test] +fn test_readlink_errors() -> io::Result<()> { + // Create test layers: + // Lower layer: regular_file, directory + let layers = vec![vec![ + ("regular_file", false, 0o644), + ("directory", true, 0o755), + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink on regular file (should fail) + let file_name = CString::new("regular_file").unwrap(); + let file_entry = fs.lookup(Context::default(), 1, &file_name)?; + let result = fs.readlink(Context::default(), file_entry.inode); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::EINVAL), + "Reading link of regular file should return EINVAL" + ); + } + Ok(_) => panic!("Expected error for regular file"), + } + + // Test readlink on directory (should fail) + let dir_name = CString::new("directory").unwrap(); + let dir_entry = fs.lookup(Context::default(), 1, &dir_name)?; + let result = fs.readlink(Context::default(), dir_entry.inode); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::EINVAL), + "Reading link of directory should return EINVAL" + ); + } + Ok(_) => panic!("Expected error for directory"), + } + + // Test readlink with invalid inode + let result = fs.readlink(Context::default(), 999999); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::EBADF), + "Reading link with invalid inode should return EBADF" + ); + } + Ok(_) => panic!("Expected error for invalid inode"), + } + + Ok(()) +} + +#[test] +fn test_readlink_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link1 -> target1 + // Upper layer: .wh.link1 (whiteout for link1) + let layers = vec![ + vec![("target1", false, 0o644)], + vec![(".wh.link1", false, 0o644)], // Whiteout file + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Create symlink in bottom layer + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link1"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Try to lookup whited-out symlink (should fail) + let link_name = CString::new("link1").unwrap(); + match fs.lookup(Context::default(), 1, &link_name) { + Ok(_) => panic!("Expected lookup of whited-out symlink to fail"), + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::ENOENT), + "Looking up whited-out symlink should return ENOENT" + ); + } + } + + Ok(()) +} + +#[test] +fn test_read_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the entire content + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, entry.inode, handle, &mut writer, 100, 0, None, 0)?; + + assert_eq!(bytes_read, 13); // Length of "Hello, World!" + assert_eq!(&writer.0, b"Hello, World!"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_with_offset() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read with offset + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read( + ctx, + entry.inode, + handle, + &mut writer, + 100, + 7, // Start at offset 7 (after "Hello, ") + None, + 0, + )?; + + assert_eq!(bytes_read, 6); // Length of "World!" + assert_eq!(&writer.0, b"World!"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_partial() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read only first 5 bytes + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read( + ctx, + entry.inode, + handle, + &mut writer, + 5, // Only read 5 bytes + 0, + None, + 0, + )?; + + assert_eq!(bytes_read, 5); + assert_eq!(&writer.0, b"Hello"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_whiteout() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 with content + // Layer 1 (top): .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o000)], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file in bottom layer + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Try to lookup the file (should fail because it's whited out) + let file_name = CString::new("file1").unwrap(); + assert!(fs.lookup(ctx, 1, &file_name).is_err()); + + Ok(()) +} + +#[test] +fn test_read_after_copy_up() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 with content + // Layer 1 (top): empty + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file in bottom layer + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open with write flag to trigger copy-up + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDWR as u32)?; + let handle = handle.unwrap(); + + // Verify the file was copied up + assert!(temp_dirs[1].path().join("file1").exists()); + + // Read the content after copy-up + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, entry.inode, handle, &mut writer, 100, 0, None, 0)?; + + assert_eq!(bytes_read, 13); + assert_eq!(&writer.0, b"Hello, World!"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_invalid_handle() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, _) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to read with an invalid handle + let mut writer = TestContainer(Vec::new()); + let result = fs.read( + ctx, + 1, + 999, // Invalid handle + &mut writer, + 100, + 0, + None, + 0, + ); + + assert!(result.is_err()); + assert_eq!(result.unwrap_err().raw_os_error(), Some(libc::EBADF)); + + Ok(()) +} + +#[test] +fn test_read_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the file multiple times with different offsets + let test_cases: Vec<(u64, u32, &[u8])> = + vec![(0, 5, b"Hello"), (7, 5, b"World"), (12, 1, b"!")]; + + for (offset, size, expected) in test_cases { + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, entry.inode, handle, &mut writer, size, offset, None, 0)?; + + assert_eq!(bytes_read, expected.len()); + assert_eq!(&writer.0, expected); + } + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_nested_directories() -> io::Result<()> { + // Create test layers with nested structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (content: "bottom file1") + // - dir1/subdir/ + // - dir1/subdir/file2 (content: "bottom file2") + // Layer 1 (middle): + // - dir1/file3 (content: "middle file3") + // - dir1/subdir/file4 (content: "middle file4") + // Layer 2 (top): + // - dir1/file1 (content: "top file1") + // - dir1/subdir/file5 (content: "top file5") + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file3", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file4", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file5", false, 0o644), + ], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write content to files in different layers + std::fs::write(temp_dirs[0].path().join("dir1/file1"), b"bottom file1")?; + std::fs::write( + temp_dirs[0].path().join("dir1/subdir/file2"), + b"bottom file2", + )?; + std::fs::write(temp_dirs[1].path().join("dir1/file3"), b"middle file3")?; + std::fs::write( + temp_dirs[1].path().join("dir1/subdir/file4"), + b"middle file4", + )?; + std::fs::write(temp_dirs[2].path().join("dir1/file1"), b"top file1")?; + std::fs::write(temp_dirs[2].path().join("dir1/subdir/file5"), b"top file5")?; + + let ctx = Context::default(); + + // First lookup dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Test 1: Read file1 (should get content from top layer) + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + let (handle, _) = fs.open(ctx, file1_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file1_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 9); + assert_eq!(&writer.0, b"top file1"); + fs.release(ctx, file1_entry.inode, 0, handle, false, false, None)?; + + // Test 2: Read file3 (from middle layer) + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(ctx, dir1_entry.inode, &file3_name)?; + let (handle, _) = fs.open(ctx, file3_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file3_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 12); + assert_eq!(&writer.0, b"middle file3"); + fs.release(ctx, file3_entry.inode, 0, handle, false, false, None)?; + + // Lookup subdir + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + // Test 3: Read file2 (from bottom layer) + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(ctx, subdir_entry.inode, &file2_name)?; + let (handle, _) = fs.open(ctx, file2_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file2_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 12); + assert_eq!(&writer.0, b"bottom file2"); + fs.release(ctx, file2_entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_with_whiteouts_and_opaque_dirs() -> io::Result<()> { + // Create test layers with whiteouts and opaque directories: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (content: "file1") + // - dir1/subdir/ + // - dir1/subdir/file2 (content: "file2") + // Layer 1 (middle): + // - dir1/ + // - dir1/.wh.file1 (whiteout file1) + // - dir1/subdir/ + // - dir1/subdir/.wh..wh..opq (opaque dir) + // - dir1/subdir/file3 (content: "file3") + // Layer 2 (top): + // - dir1/ + // - dir1/file4 (content: "file4") + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh.file1", false, 0o000), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/.wh..wh..opq", false, 0o000), + ("dir1/subdir/file3", false, 0o644), + ], + vec![("dir1", true, 0o755), ("dir1/file4", false, 0o644)], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write content to files + std::fs::write(temp_dirs[0].path().join("dir1/file1"), b"file1")?; + std::fs::write(temp_dirs[0].path().join("dir1/subdir/file2"), b"file2")?; + std::fs::write(temp_dirs[1].path().join("dir1/subdir/file3"), b"file3")?; + std::fs::write(temp_dirs[2].path().join("dir1/file4"), b"file4")?; + + let ctx = Context::default(); + + // First lookup dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Test 1: Try to read whited-out file1 (should fail) + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(ctx, dir1_entry.inode, &file1_name).is_err()); + + // Test 2: Read file4 from top layer + let file4_name = CString::new("file4").unwrap(); + let file4_entry = fs.lookup(ctx, dir1_entry.inode, &file4_name)?; + let (handle, _) = fs.open(ctx, file4_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file4_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 5); + assert_eq!(&writer.0, b"file4"); + fs.release(ctx, file4_entry.inode, 0, handle, false, false, None)?; + + // Lookup subdir + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + // Test 3: Try to read file2 through opaque directory (should fail) + let file2_name = CString::new("file2").unwrap(); + assert!(fs.lookup(ctx, subdir_entry.inode, &file2_name).is_err()); + + // Test 4: Read file3 through opaque directory (should succeed) + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(ctx, subdir_entry.inode, &file3_name)?; + let (handle, _) = fs.open(ctx, file3_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file3_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 5); + assert_eq!(&writer.0, b"file3"); + fs.release(ctx, file3_entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_readdir_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory with files + let layers = vec![vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ]]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file2".to_string())); + assert_eq!(entries.len(), 2); + + Ok(()) +} + +#[test] +fn test_readdir_with_offset() -> io::Result<()> { + // Create an overlayfs with multiple layers containing overlapping directories and files + // Layer 0 (lowest): Some initial files + // Layer 1 (middle): Some additional files and modifications + // Layer 2 (top): More files and potential whiteouts + let layers = vec![ + // Layer 0 (lowest) + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("dir1/common", false, 0o644), + ], + // Layer 1 (middle) + vec![ + ("dir1", true, 0o755), + ("dir1/file3", false, 0o644), + ("dir1/file4", false, 0o644), + ("dir1/common", false, 0o644), // This overlays the one in layer 0 + ], + // Layer 2 (top) + vec![ + ("dir1", true, 0o755), + ("dir1/file5", false, 0o644), + ("dir1/file6", false, 0o644), + ("dir1/file7", false, 0o644), + ], + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the first batch of directory entries and save the offset + let mut entries = Vec::new(); + let mut last_offset = 0; + fs.readdir( + ctx, + entry.inode, + handle, + 1024, // Small buffer to force multiple reads + 0, + |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + last_offset = dir_entry.offset; + Ok(0) + }, + )?; + + println!("entries: {:?}", entries); + + // Read the second batch of directory entries starting from the last offset + let mut more_entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, last_offset, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + more_entries.push(name); + Ok(1) + })?; + + println!("more_entries: {:?}", more_entries); + + // Verify that we got all entries between the two reads + let all_entries: Vec<_> = entries + .into_iter() + .chain(more_entries.into_iter()) + .collect(); + + println!("all_entries: {:?}", all_entries); + assert!(all_entries.contains(&"file1".to_string())); + assert!(all_entries.contains(&"file2".to_string())); + assert!(all_entries.contains(&"file3".to_string())); + assert!(all_entries.contains(&"file4".to_string())); + assert!(all_entries.contains(&"file5".to_string())); + assert!(all_entries.contains(&"file6".to_string())); + assert!(all_entries.contains(&"file7".to_string())); + assert!(all_entries.contains(&"common".to_string())); + + // Verify we have the right number of entries + assert_eq!(all_entries.len(), 8); + + Ok(()) +} + +#[test] +fn test_readdir_empty_directory() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty directory + let layers = vec![vec![("empty_dir", true, 0o755)]]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("empty_dir").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(0) + })?; + + // Verify the entries (should be empty since "." and ".." are handled by the kernel) + assert_eq!(entries.len(), 0); + + Ok(()) +} + +#[test] +fn test_readdir_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers: + // Layer 0 (bottom): dir1 with file1, file2, file3 + // Layer 1 (top): dir1 with file2 whited out + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("dir1/file3", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh.file2", false, 0o644), // Whiteout for file2 + ], + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1" and "file3", but not "file2") + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file3".to_string())); + assert!(!entries.contains(&"file2".to_string())); // Should be whited out + assert_eq!(entries.len(), 2); + + Ok(()) +} + +#[test] +fn test_readdir_multiple_layers() -> io::Result<()> { + let layers = vec![ + vec![("dir1", true, 0o755), ("dir1/file1", false, 0o644)], + vec![ + ("dir1", true, 0o755), + ("dir1/file2", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file1", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file3", false, 0o644), + ("dir2/file2", false, 0o644), + ("dir3", true, 0o755), + ("dir3/file1", false, 0o644), + ], + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the dir1 + let entry = fs.lookup(ctx, 1, &CString::new("dir1").unwrap())?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1", "file2", and "file3") + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file2".to_string())); + assert!(entries.contains(&"file3".to_string())); + assert_eq!(entries.len(), 3); + + // Lookup and open the dir2 + let entry = fs.lookup(ctx, 1, &CString::new("dir2").unwrap())?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1", and "file2") + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file2".to_string())); + assert_eq!(entries.len(), 2); + + // Lookup and open the dir3 + let entry = fs.lookup(ctx, 1, &CString::new("dir3").unwrap())?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1") + assert!(entries.contains(&"file1".to_string())); + assert_eq!(entries.len(), 1); + + Ok(()) +} + +#[test] +fn test_readdir_opaque_marker() -> io::Result<()> { + // Create an overlayfs with three layers: + // Layer 0 (bottom): dir1 with file1, file2, file3 + // Layer 1 (middle): dir1 with opaque marker, file4, file5 + // Layer 2 (top): dir1 with file5 (shadows middle), file6, file7 + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("dir1/file3", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), // Opaque marker for dir1 + ("dir1/file4", false, 0o644), + ("dir1/file5", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file5", false, 0o644), // Shadows file5 from layer 1 + ("dir1/file6", false, 0o644), + ("dir1/file7", false, 0o644), + ], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Sort entries for consistent comparison + entries.sort(); + + // Due to the opaque marker in the middle layer, we should only see: + // - files from the top layer (file5, file6, file7) + // - files from the middle layer that aren't shadowed by the top (file4) + // - NO files from the bottom layer (file1, file2, file3 should be hidden) + let expected_entries = vec![ + "file4".to_string(), + "file5".to_string(), + "file6".to_string(), + "file7".to_string(), + ]; + + assert_eq!(entries, expected_entries, "Unexpected directory entries"); + + // Release the directory handle + fs.releasedir(ctx, entry.inode, 0, handle)?; + + // Additional test: Create a second directory with opaque marker in top layer + let layers2 = vec![ + vec![ + ("dir2", true, 0o755), + ("dir2/bottom1", false, 0o644), + ("dir2/bottom2", false, 0o644), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/middle1", false, 0o644), + ("dir2/middle2", false, 0o644), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/.wh..wh..opq", false, 0o644), // Opaque marker in top layer + ("dir2/top1", false, 0o644), + ], + ]; + + let (fs2, _temp_dirs2) = helper::create_overlayfs(layers2)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir2").unwrap(); + let entry = fs2.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs2.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs2.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Sort entries for consistent comparison + entries.sort(); + + // With opaque marker in the top layer, we should only see: + // - files from the top layer (top1) + // - NO files from middle or bottom layers + assert_eq!( + entries, + vec!["top1".to_string()], + "Unexpected entries in dir2" + ); + + // Release the directory handle + fs2.releasedir(ctx, entry.inode, 0, handle)?; + + Ok(()) +} + +#[test] +fn test_readdir_shadow() -> io::Result<()> { + // Create an overlayfs with three layers with shadowing: + // Layer 0 (bottom): dir1 with common, only_bottom, shadowed1, shadowed2 + // Layer 1 (middle): dir1 with common, only_middle, shadowed1 + // Layer 2 (top): dir1 with common, only_top, shadowed2 + // + // Each file has different content to verify proper shadowing + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/common", false, 0o644), + ("dir1/only_bottom", false, 0o644), + ("dir1/shadowed1", false, 0o644), + ("dir1/shadowed2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/common", false, 0o644), + ("dir1/only_middle", false, 0o644), + ("dir1/shadowed1", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/common", false, 0o644), + ("dir1/only_top", false, 0o644), + ("dir1/shadowed2", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write different content to each layer's files + // Bottom layer + fs::write( + temp_dirs[0].path().join("dir1/common"), + "bottom layer common content", + )?; + fs::write( + temp_dirs[0].path().join("dir1/only_bottom"), + "only in bottom layer", + )?; + fs::write( + temp_dirs[0].path().join("dir1/shadowed1"), + "shadowed1 bottom content", + )?; + fs::write( + temp_dirs[0].path().join("dir1/shadowed2"), + "shadowed2 bottom content", + )?; + + // Middle layer + fs::write( + temp_dirs[1].path().join("dir1/common"), + "middle layer common content", + )?; + fs::write( + temp_dirs[1].path().join("dir1/only_middle"), + "only in middle layer", + )?; + fs::write( + temp_dirs[1].path().join("dir1/shadowed1"), + "shadowed1 middle content", + )?; + + // Top layer + fs::write( + temp_dirs[2].path().join("dir1/common"), + "top layer common content", + )?; + fs::write( + temp_dirs[2].path().join("dir1/only_top"), + "only in top layer", + )?; + fs::write( + temp_dirs[2].path().join("dir1/shadowed2"), + "shadowed2 top content", + )?; + + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Sort entries for consistent comparison + entries.sort(); + + // Release the directory handle + fs.releasedir(ctx, entry.inode, 0, handle)?; + + // We should see all unique filenames across layers + // Each file should appear exactly once + let expected_entries = vec![ + "common".to_string(), + "only_bottom".to_string(), + "only_middle".to_string(), + "only_top".to_string(), + "shadowed1".to_string(), + "shadowed2".to_string(), + ]; + + assert_eq!(entries, expected_entries, "Unexpected directory entries"); + + // Now verify the content of each file to check shadowing + + // 1. common file - should have top layer content + let common_entry = fs.lookup(ctx, entry.inode, &CString::new("common").unwrap())?; + let (handle, _) = fs.open(ctx, common_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + common_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "top layer common content", + "common file should have top layer content" + ); + fs.release(ctx, common_entry.inode, 0, handle, false, false, None)?; + + // 2. shadowed1 file - should have middle layer content (shadowed by middle over bottom) + let shadowed1_entry = fs.lookup(ctx, entry.inode, &CString::new("shadowed1").unwrap())?; + let (handle, _) = fs.open(ctx, shadowed1_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + shadowed1_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "shadowed1 middle content", + "shadowed1 file should have middle layer content" + ); + fs.release(ctx, shadowed1_entry.inode, 0, handle, false, false, None)?; + + // 3. shadowed2 file - should have top layer content (shadowed by top over bottom) + let shadowed2_entry = fs.lookup(ctx, entry.inode, &CString::new("shadowed2").unwrap())?; + let (handle, _) = fs.open(ctx, shadowed2_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + shadowed2_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "shadowed2 top content", + "shadowed2 file should have top layer content" + ); + fs.release(ctx, shadowed2_entry.inode, 0, handle, false, false, None)?; + + // 4. only_bottom file - should exist and have bottom layer content + let only_bottom_entry = fs.lookup(ctx, entry.inode, &CString::new("only_bottom").unwrap())?; + let (handle, _) = fs.open(ctx, only_bottom_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + only_bottom_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "only in bottom layer", + "only_bottom file should have bottom layer content" + ); + fs.release(ctx, only_bottom_entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs new file mode 100644 index 000000000..38fd3087d --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs @@ -0,0 +1,508 @@ +use std::{ffi::CString, io}; + +use crate::virtio::fs::filesystem::{Context, FileSystem}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_unlink_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let (fs, temp_dirs) = helper::create_overlayfs(vec![vec![("file1.txt", false, 0o644)]])?; + let ctx = Context::default(); + + // Lookup the file to get its parent inode (root) and verify it exists + let file_name = CString::new("file1.txt").unwrap(); + let _ = fs.lookup(ctx, 1, &file_name)?; + + // Unlink the file + fs.unlink(ctx, 1, &file_name)?; + + // Verify the file is gone + match fs.lookup(ctx, 1, &file_name) { + Ok(_) => panic!("File still exists after unlink"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the file is physically removed from the filesystem + assert!(!temp_dirs[0].path().join("file1.txt").exists()); + + Ok(()) +} + +#[test] +fn test_unlink_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers: + // - Lower layer: contains file1.txt + // - Upper layer: empty + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("file1.txt", false, 0o644)], // lower layer + vec![], // upper layer + ])?; + let ctx = Context::default(); + + // Lookup the file to verify it exists + let file_name = CString::new("file1.txt").unwrap(); + let _ = fs.lookup(ctx, 1, &file_name)?; + + // Unlink the file - this should create a whiteout in the upper layer + fs.unlink(ctx, 1, &file_name)?; + + // Verify the file appears to be gone through the overlayfs + match fs.lookup(ctx, 1, &file_name) { + Ok(_) => panic!("File still exists after unlink"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the original file still exists in the lower layer + assert!(temp_dirs[0].path().join("file1.txt").exists()); + + // Verify a whiteout was created in the upper layer + assert!(temp_dirs[1].path().join(".wh.file1.txt").exists()); + + Ok(()) +} + +#[test] +fn test_unlink_multiple_layers() -> io::Result<()> { + // Create an overlayfs with three layers, each containing different files + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("lower.txt", false, 0o644)], // lowest layer + vec![("middle.txt", false, 0o644)], // middle layer + vec![("upper.txt", false, 0o644)], // upper layer + ])?; + let ctx = Context::default(); + + // Test unlinking a file from each layer + for file in &["lower.txt", "middle.txt", "upper.txt"] { + let file_name = CString::new(*file).unwrap(); + + // Verify file exists before unlink + fs.lookup(ctx, 1, &file_name)?; + + // Unlink the file + fs.unlink(ctx, 1, &file_name)?; + + // Verify file appears gone through overlayfs + match fs.lookup(ctx, 1, &file_name) { + Ok(_) => panic!("File {} still exists after unlink", file), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + } + + // Verify physical state of layers: + // - Files in lower layers should still exist + // - File in top layer should be gone + // - Whiteouts should exist in top layer for lower files + assert!(temp_dirs[0].path().join("lower.txt").exists()); + assert!(temp_dirs[1].path().join("middle.txt").exists()); + assert!(!temp_dirs[2].path().join("upper.txt").exists()); + assert!(temp_dirs[2].path().join(".wh.lower.txt").exists()); + assert!(temp_dirs[2].path().join(".wh.middle.txt").exists()); + + Ok(()) +} + +#[test] +fn test_unlink_nested_files() -> io::Result<()> { + // Create an overlayfs with nested directory structure + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2.txt", false, 0o644), + ], + vec![], // empty upper layer + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Lookup and unlink nested files + let dir1_name = CString::new("dir1").unwrap(); + let subdir_name = CString::new("subdir").unwrap(); + let file1_name = CString::new("file1.txt").unwrap(); + let file2_name = CString::new("file2.txt").unwrap(); + + // Get directory inodes + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + // Unlink file2.txt from subdir + fs.unlink(ctx, subdir_entry.inode, &file2_name)?; + + // Verify file2.txt is gone but file1.txt still exists + match fs.lookup(ctx, subdir_entry.inode, &file2_name) { + Ok(_) => panic!("file2.txt still exists after unlink"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + fs.lookup(ctx, dir1_entry.inode, &file1_name)?; // should succeed + + helper::debug_print_layers(&temp_dirs, false)?; + + // Verify whiteout was created in correct location + assert!(temp_dirs[1] + .path() + .join("dir1/subdir/.wh.file2.txt") + .exists()); + + Ok(()) +} + +#[test] +fn test_unlink_errors() -> io::Result<()> { + // Create a basic overlayfs + let (fs, _) = helper::create_overlayfs(vec![vec![("file1.txt", false, 0o644)]])?; + let ctx = Context::default(); + + // Test: Try to unlink non-existent file + let nonexistent = CString::new("nonexistent.txt").unwrap(); + match fs.unlink(ctx, 1, &nonexistent) { + Ok(_) => panic!("Unlink succeeded on non-existent file"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Test: Try to unlink with invalid parent inode + let file_name = CString::new("file1.txt").unwrap(); + match fs.unlink(ctx, 999999, &file_name) { + Ok(_) => panic!("Unlink succeeded with invalid parent inode"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + // Test: Try to unlink with invalid name (containing path traversal) + let invalid_name = CString::new("../file1.txt").unwrap(); + match fs.unlink(ctx, 1, &invalid_name) { + Ok(_) => panic!("Unlink succeeded with invalid name"), + Err(e) => { + assert_eq!( + e.kind(), + io::ErrorKind::PermissionDenied, + "Expected PermissionDenied error, got {:?}", + e.kind() + ); + } + } + + Ok(()) +} + +#[test] +fn test_unlink_complex_layers() -> io::Result<()> { + // Create an overlayfs with complex layer structure: + // - Lower layer: base files + // - Middle layer: some files deleted, some added + // - Upper layer: more modifications + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + // lower layer + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir1/file2.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file3.txt", false, 0o644), + ], + vec![ + // middle layer + ("dir1/new_file.txt", false, 0o644), + ("dir2/file4.txt", false, 0o644), + // Whiteout in middle layer for file3.txt in dir2 - placed in dir2 directory + ("dir2/.wh.file3.txt", false, 0o000), + ], + vec![ + // upper layer + ("dir3", true, 0o755), + ("dir3/file5.txt", false, 0o644), + ], + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Test 1: Unlink a file that exists in the top layer + let dir3_name = CString::new("dir3").unwrap(); + let file5_name = CString::new("file5.txt").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + fs.unlink(ctx, dir3_entry.inode, &file5_name)?; + assert!(!temp_dirs[2].path().join("dir3/file5.txt").exists()); + + // Test 2: Unlink a file from middle layer + let dir1_name = CString::new("dir1").unwrap(); + let new_file_name = CString::new("new_file.txt").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + fs.unlink(ctx, dir1_entry.inode, &new_file_name)?; + // Expect a whiteout created in the top layer for new_file.txt + assert!(temp_dirs[2].path().join("dir1/.wh.new_file.txt").exists()); + + // Test 3: Unlink a file from lowest layer + let file1_name = CString::new("file1.txt").unwrap(); + fs.unlink(ctx, dir1_entry.inode, &file1_name)?; + // // Expect a whiteout in the top layer but the original file remains in lower layer + // assert!(temp_dirs[2].path().join("dir1/.wh.file1.txt").exists()); + // assert!(temp_dirs[0].path().join("dir1/file1.txt").exists()); + + // // Test 4: Unlink a file from lowest layer that is already whiteouted + // let file2_name = CString::new("file2.txt").unwrap(); + // // First unlink to create the whiteout + // fs.unlink(ctx, dir1_entry.inode, &file2_name)?; + // assert!(temp_dirs[2].path().join("dir1/.wh.file2.txt").exists()); + // // Second attempt should fail with ENOENT + // match fs.unlink(ctx, dir1_entry.inode, &file2_name) { + // Ok(_) => panic!("Unlink succeeded on already whiteouted file"), + // Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + // } + + Ok(()) +} + +#[test] +fn test_rmdir_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty directory + let (fs, temp_dirs) = helper::create_overlayfs(vec![vec![("empty_dir", true, 0o755)]])?; + let ctx = Context::default(); + + // Lookup the directory to verify it exists + let dir_name = CString::new("empty_dir").unwrap(); + let _ = fs.lookup(ctx, 1, &dir_name)?; + + // Remove the directory + fs.rmdir(ctx, 1, &dir_name)?; + + // Verify the directory is gone + match fs.lookup(ctx, 1, &dir_name) { + Ok(_) => panic!("Directory still exists after rmdir"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the directory is physically removed from the filesystem + assert!(!temp_dirs[0].path().join("empty_dir").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers: + // - Lower layer: contains empty_dir + // - Upper layer: empty + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("empty_dir", true, 0o755)], // lower layer + vec![], // upper layer + ])?; + let ctx = Context::default(); + + // Lookup the directory to verify it exists + let dir_name = CString::new("empty_dir").unwrap(); + let _ = fs.lookup(ctx, 1, &dir_name)?; + + // Remove the directory - this should create a whiteout in the upper layer + fs.rmdir(ctx, 1, &dir_name)?; + + // Verify the directory appears to be gone through the overlayfs + match fs.lookup(ctx, 1, &dir_name) { + Ok(_) => panic!("Directory still exists after rmdir"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the original directory still exists in the lower layer + assert!(temp_dirs[0].path().join("empty_dir").exists()); + + // Verify a whiteout was created in the upper layer + assert!(temp_dirs[1].path().join(".wh.empty_dir").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_multiple_layers() -> io::Result<()> { + // Create an overlayfs with three layers, each containing different directories + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("lower_dir", true, 0o755)], // lowest layer + vec![("middle_dir", true, 0o755)], // middle layer + vec![("upper_dir", true, 0o755)], // upper layer + ])?; + let ctx = Context::default(); + + // Test removing a directory from each layer + for dir in &["lower_dir", "middle_dir", "upper_dir"] { + let dir_name = CString::new(*dir).unwrap(); + + // Verify directory exists before removal + fs.lookup(ctx, 1, &dir_name)?; + + // Remove the directory + fs.rmdir(ctx, 1, &dir_name)?; + + // Verify directory appears gone through overlayfs + match fs.lookup(ctx, 1, &dir_name) { + Ok(_) => panic!("Directory {} still exists after rmdir", dir), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + } + + // Verify physical state of layers: + // - Directories in lower layers should still exist + // - Directory in top layer should be gone + // - Whiteouts should exist in top layer for lower directories + assert!(temp_dirs[0].path().join("lower_dir").exists()); + assert!(temp_dirs[1].path().join("middle_dir").exists()); + assert!(!temp_dirs[2].path().join("upper_dir").exists()); + assert!(temp_dirs[2].path().join(".wh.lower_dir").exists()); + assert!(temp_dirs[2].path().join(".wh.middle_dir").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_nested_dirs() -> io::Result<()> { + // Create an overlayfs with nested directory structure + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/subdir1", true, 0o755), + ("dir1/subdir2", true, 0o755), + ("dir1/subdir2/nested", true, 0o755), + ], + vec![], // empty upper layer + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Lookup and remove nested directories + let dir1_name = CString::new("dir1").unwrap(); + let subdir2_name = CString::new("subdir2").unwrap(); + let nested_name = CString::new("nested").unwrap(); + + // Get directory inodes + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let subdir2_entry = fs.lookup(ctx, dir1_entry.inode, &subdir2_name)?; + + // Remove nested directory + fs.rmdir(ctx, subdir2_entry.inode, &nested_name)?; + + // Verify nested is gone but subdir1 still exists + match fs.lookup(ctx, subdir2_entry.inode, &nested_name) { + Ok(_) => panic!("nested directory still exists after rmdir"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + let subdir1_name = CString::new("subdir1").unwrap(); + fs.lookup(ctx, dir1_entry.inode, &subdir1_name)?; // should succeed + + // Verify whiteout was created in correct location + assert!(temp_dirs[1].path().join("dir1/subdir2/.wh.nested").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_errors() -> io::Result<()> { + // Create an overlayfs with a directory containing a file + let (fs, _temp_dirs) = helper::create_overlayfs(vec![vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ]])?; + let ctx = Context::default(); + + // Test: Try to remove non-existent directory + let nonexistent = CString::new("nonexistent").unwrap(); + match fs.rmdir(ctx, 1, &nonexistent) { + Ok(_) => panic!("rmdir succeeded on non-existent directory"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Test: Try to remove with invalid parent inode + let dir_name = CString::new("dir1").unwrap(); + match fs.rmdir(ctx, 999999, &dir_name) { + Ok(_) => panic!("rmdir succeeded with invalid parent inode"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + // Test: Try to remove non-empty directory + match fs.rmdir(ctx, 1, &dir_name) { + Ok(_) => panic!("rmdir succeeded on non-empty directory"), + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::ENOTEMPTY)); + } + } + + // Test: Try to remove with invalid name (containing path traversal) + let invalid_name = CString::new("../dir1").unwrap(); + match fs.rmdir(ctx, 1, &invalid_name) { + Ok(_) => panic!("rmdir succeeded with invalid name"), + Err(e) => { + assert_eq!( + e.kind(), + io::ErrorKind::PermissionDenied, + "Expected PermissionDenied error, got {:?}", + e.kind() + ); + } + } + + // Test: Try to remove a file using rmdir + let file_name = CString::new("file1.txt").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir_name)?; + match fs.rmdir(ctx, dir1_entry.inode, &file_name) { + Ok(_) => panic!("rmdir succeeded on a file"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOTDIR)), + } + + Ok(()) +} + +#[test] +fn test_rmdir_complex_layers() -> io::Result<()> { + // Create an overlayfs with complex layer structure: + // - Lower layer: base directories + // - Middle layer: some directories deleted, some added + // - Upper layer: more modifications + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + // lower layer + ("dir1", true, 0o755), + ("dir1/subdir1", true, 0o755), + ("dir2", true, 0o755), + ("dir2/subdir2", true, 0o755), + ], + vec![ + // middle layer + ("dir1/new_dir", true, 0o755), + ("dir2/subdir3", true, 0o755), + // Whiteout in middle layer for subdir2 in dir2 + ("dir2/.wh.subdir2", false, 0o000), + ], + vec![ + // upper layer + ("dir3", true, 0o755), + ("dir3/subdir4", true, 0o755), + ], + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Test 1: Remove a directory that exists in the top layer + let dir3_name = CString::new("dir3").unwrap(); + let subdir4_name = CString::new("subdir4").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + fs.rmdir(ctx, dir3_entry.inode, &subdir4_name)?; + assert!(!temp_dirs[2].path().join("dir3/subdir4").exists()); + + // Test 2: Remove a directory from middle layer + let dir1_name = CString::new("dir1").unwrap(); + let new_dir_name = CString::new("new_dir").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + fs.rmdir(ctx, dir1_entry.inode, &new_dir_name)?; + // Expect a whiteout created in the top layer for new_dir + assert!(temp_dirs[2].path().join("dir1/.wh.new_dir").exists()); + + // Test 3: Remove a directory from lowest layer + let subdir1_name = CString::new("subdir1").unwrap(); + fs.rmdir(ctx, dir1_entry.inode, &subdir1_name)?; + // Expect a whiteout in the top layer but the original directory remains in lower layer + assert!(temp_dirs[2].path().join("dir1/.wh.subdir1").exists()); + assert!(temp_dirs[0].path().join("dir1/subdir1").exists()); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs new file mode 100644 index 000000000..86e8bfb49 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs @@ -0,0 +1,428 @@ +use std::{ffi::CString, io}; + +use crate::virtio::{fs::filesystem::{Context, FileSystem}, macos::overlayfs::tests::helper::TestContainer}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_write_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, (libc::O_WRONLY | libc::O_TRUNC) as u32)?; + let handle = handle.unwrap(); + + // Write content to the file + let content = b"Hello, World!"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(file_content, content); + + Ok(()) +} + +#[test] +fn test_write_with_offset() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with initial content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some initial content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_WRONLY as u32)?; + let handle = handle.unwrap(); + + // Write content at an offset + let content = b"Rusty"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + content.len() as u32, + 7, + None, + false, + false, + 0, + )?; + + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(&file_content, b"Hello, Rusty!"); + + Ok(()) +} + +#[test] +fn test_write_partial() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, (libc::O_WRONLY | libc::O_TRUNC) as u32)?; + let handle = handle.unwrap(); + + // Write content to the file, but request to write more than we have + let content = b"Hello, World!"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + 100, + 0, + None, + false, + false, + 0, + )?; + + // Should only write what's available + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(file_content, content); + + Ok(()) +} + +#[test] +fn test_write_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers, where the top layer has a whiteout for file1 + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o644)], // Whiteout for file1 + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file (should fail because it's whited out) + let file_name = CString::new("file1").unwrap(); + let lookup_result = fs.lookup(ctx, 1, &file_name); + assert!(lookup_result.is_err()); + + Ok(()) +} + +#[test] +fn test_write_after_copy_up() -> io::Result<()> { + // Create an overlayfs with two layers, where file1 exists in the lower layer + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some initial content to the file in the lower layer + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions (should trigger copy-up) + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_WRONLY as u32)?; + let handle = handle.unwrap(); + + // Write new content to the file + let content = b"Hello, Rusty!"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly to the upper layer + let file_content = std::fs::read(temp_dirs[1].path().join("file1"))?; + assert_eq!(file_content, content); + + // The lower layer should remain unchanged + let lower_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(lower_content, b"Hello, World!"); + + Ok(()) +} + +#[test] +fn test_write_invalid_handle() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Try to write with an invalid handle + let invalid_handle = 12345; + let mut reader = TestContainer(b"Hello".to_vec()); + let result = fs.write( + ctx, + entry.inode, + invalid_handle, + &mut reader, + 5, + 0, + None, + false, + false, + 0, + ); + + // Should fail with EBADF + match result { + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::EBADF)); + } + Ok(_) => panic!("Expected error for invalid handle"), + } + + Ok(()) +} + +#[test] +fn test_write_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, (libc::O_WRONLY | libc::O_TRUNC) as u32)?; + let handle = handle.unwrap(); + + // Write content to the file in multiple operations + let content1 = b"Hello, "; + let mut reader1 = TestContainer(content1.to_vec()); + let bytes_written1 = fs.write( + ctx, + entry.inode, + handle, + &mut reader1, + content1.len() as u32, + 0, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written1, content1.len()); + + let content2 = b"World!"; + let mut reader2 = TestContainer(content2.to_vec()); + let bytes_written2 = fs.write( + ctx, + entry.inode, + handle, + &mut reader2, + content2.len() as u32, + bytes_written1 as u64, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written2, content2.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(file_content, b"Hello, World!"); + + Ok(()) +} + +#[test] +fn test_write_nested_directories() -> io::Result<()> { + // Create an overlayfs with nested directories + let layers = vec![vec![ + ("dir1", true, 0o755), + ("dir1/dir2", true, 0o755), + ("dir1/dir2/file1", false, 0o644), + ]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup the nested directories and file + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, dir1_entry.inode, &dir2_name)?; + + let file_name = CString::new("file1").unwrap(); + let file_entry = fs.lookup(ctx, dir2_entry.inode, &file_name)?; + + // Open the file with write permissions + let (handle, _opts) = fs.open( + ctx, + file_entry.inode, + (libc::O_WRONLY | libc::O_TRUNC) as u32, + )?; + let handle = handle.unwrap(); + + // Write content to the file + let content = b"Nested file content"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + file_entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, file_entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_path = temp_dirs[0].path().join("dir1").join("dir2").join("file1"); + let file_content = std::fs::read(file_path)?; + assert_eq!(file_content, content); + + Ok(()) +} + +#[test] +fn test_write_with_whiteouts_and_opaque_dirs() -> io::Result<()> { + // Create an overlayfs with multiple layers, whiteouts, and opaque directories + let layers = vec![ + // Lower layer + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("file3", false, 0o644), + ], + // Upper layer with whiteout for file2 and opaque dir1 + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), // Opaque dir marker + ("dir1/file4", false, 0o644), // New file in opaque dir + (".wh.file3", false, 0o644), // Whiteout for file3 + ], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Test 1: Write to file4 in opaque directory + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let file4_name = CString::new("file4").unwrap(); + let file4_entry = fs.lookup(ctx, dir1_entry.inode, &file4_name)?; + + let (handle, _opts) = fs.open(ctx, file4_entry.inode, libc::O_WRONLY as u32)?; + let handle = handle.unwrap(); + + let content = b"File in opaque dir"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + file4_entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written, content.len()); + + fs.release(ctx, file4_entry.inode, 0, handle, false, false, None)?; + + // Verify content + let file_path = temp_dirs[1].path().join("dir1").join("file4"); + let file_content = std::fs::read(file_path)?; + assert_eq!(file_content, content); + + // Test 2: Try to access file1 through opaque directory (should fail) + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(ctx, dir1_entry.inode, &file1_name).is_err()); + + // Test 3: Try to access file3 (should fail due to whiteout) + let file3_name = CString::new("file3").unwrap(); + assert!(fs.lookup(ctx, 1, &file3_name).is_err()); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/multikey.rs b/src/devices/src/virtio/fs/multikey.rs index 8dc35a447..27f7816c9 100644 --- a/src/devices/src/virtio/fs/multikey.rs +++ b/src/devices/src/virtio/fs/multikey.rs @@ -9,7 +9,7 @@ use std::collections::BTreeMap; /// `std::collections::BTreeMap` also apply to this struct. Additionally, there is a 1:1 /// relationship between the 2 key types. In other words, for each `K1` in the map, there is exactly /// one `K2` in the map and vice versa. -#[derive(Default)] +#[derive(Default, Debug)] pub struct MultikeyBTreeMap where K1: Ord, From 5d1bcd01ce14318828228a7d699313266ad524ca Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Mon, 24 Mar 2025 11:48:53 +0100 Subject: [PATCH 04/14] feat(fs): expose overlayfs API for macOS (#8) * feat(fs): expose overlayfs API for macOS Add public API for using OverlayFS functionality in libkrun on macOS by exposing the implementation through a new FsImplConfig enum. This allows clients to configure either passthrough or overlayfs filesystem modes. Key changes: - Add FsImplConfig enum to select between Passthrough and Overlayfs modes - Add FsImplShare enum to handle different sharing configurations - Refactor Fs implementation to delegate operations to selected backend - Update Config struct to include layers configuration - Clean up and reorganize filesystem server code - Add comprehensive test coverage for overlayfs operations The implementation maintains the existing passthrough functionality while adding the ability to configure overlayfs mode with multiple read-only layers and a writable top layer. * feat(overlayfs): add special handling for init.krun lookup Add special case handling in the lookup method to return a predefined entry when looking up "init.krun" file. This allows exposing the init binary with specific permissions (755) and size based on the included binary data. * fix(overlayfs): improve init.krun inode handling and documentation - Set init.krun inode ID dynamically using next_inode counter - Reorder init.krun inode check before handle data lookup in read() - Add documentation about default values for config options - Clarify proc_sfd_rawfd usage in sandboxing scenarios - Update comments to specifically reference init.krun * refactor(fs): remove debug println statements from overlayfs --- include/libkrun.h | 25 + src/devices/src/virtio/fs/device.rs | 45 +- src/devices/src/virtio/fs/kinds.rs | 647 ++++++++++++++++++ .../src/virtio/fs/macos/overlayfs/fs.rs | 101 +-- .../src/virtio/fs/macos/overlayfs/tests.rs | 8 +- .../virtio/fs/macos/overlayfs/tests/create.rs | 16 +- .../fs/macos/overlayfs/tests/metadata.rs | 24 +- .../virtio/fs/macos/overlayfs/tests/misc.rs | 41 +- .../virtio/fs/macos/overlayfs/tests/open.rs | 2 - src/devices/src/virtio/fs/mod.rs | 6 +- src/devices/src/virtio/fs/server.rs | 114 +-- src/devices/src/virtio/fs/worker.rs | 22 +- src/libkrun/src/lib.rs | 93 ++- src/vmm/src/builder.rs | 2 +- src/vmm/src/vmm_config/fs.rs | 4 +- 15 files changed, 972 insertions(+), 178 deletions(-) create mode 100644 src/devices/src/virtio/fs/kinds.rs diff --git a/include/libkrun.h b/include/libkrun.h index efd3127a6..ab9cfa7c5 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -59,9 +59,34 @@ int32_t krun_set_vm_config(uint32_t ctx_id, uint8_t num_vcpus, uint32_t ram_mib) * * Returns: * Zero on success or a negative error number on failure. + * Documented errors: + * -EEXIST when a root device is already set + * + * Notes: + * This function is mutually exclusive with krun_set_overlayfs_root. */ int32_t krun_set_root(uint32_t ctx_id, const char *root_path); +/** + * Sets up an OverlayFS to be used as root for the microVM. Not available in libkrun-SEV. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "root_layers" - an array of string pointers to filesystem paths representing + * the layers to be used for the OverlayFS. The array must be + * NULL-terminated and contain at least one layer. + * + * Returns: + * Zero on success or a negative error number on failure. + * Documented errors: + * -EINVAL when no layers are provided + * -EEXIST when a root device is already set + * + * Notes: + * This function is mutually exclusive with krun_set_root. + */ +int32_t krun_set_overlayfs_root(uint32_t ctx_id, const char *const root_layers[]); + /** * DEPRECATED. Use krun_add_disk instead. * diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index 9d7a21e04..faa4b5b19 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -15,6 +15,8 @@ use vm_memory::{ByteValued, GuestMemoryMmap}; use super::super::{ ActivateResult, DeviceState, FsError, Queue as VirtQueue, VirtioDevice, VirtioShmRegion, }; +use super::kinds::{FsImplConfig, FsImplShare}; +use super::macos::overlayfs; use super::passthrough; use super::worker::FsWorker; use super::ExportTable; @@ -51,7 +53,7 @@ pub struct Fs { device_state: DeviceState, config: VirtioFsConfig, shm_region: Option, - passthrough_cfg: passthrough::Config, + fs_config: FsImplConfig, worker_thread: Option>, worker_stopfd: EventFd, #[cfg(target_os = "macos")] @@ -61,7 +63,7 @@ pub struct Fs { impl Fs { pub(crate) fn with_queues( fs_id: String, - shared_dir: String, + fs_share: FsImplShare, queues: Vec, ) -> super::Result { let mut queue_events = Vec::new(); @@ -76,10 +78,15 @@ impl Fs { let mut config = VirtioFsConfig::default(); config.tag[..tag.len()].copy_from_slice(tag.as_slice()); config.num_request_queues = 1; - - let fs_cfg = passthrough::Config { - root_dir: shared_dir, - ..Default::default() + let fs_config = match fs_share { + FsImplShare::Passthrough(root_dir) => FsImplConfig::Passthrough(passthrough::Config { + root_dir, + ..Default::default() + }), + FsImplShare::Overlayfs(layers) => FsImplConfig::Overlayfs(overlayfs::Config { + layers, + ..Default::default() + }), }; Ok(Fs { @@ -94,7 +101,7 @@ impl Fs { device_state: DeviceState::Inactive, config, shm_region: None, - passthrough_cfg: fs_cfg, + fs_config, worker_thread: None, worker_stopfd: EventFd::new(EFD_NONBLOCK).map_err(FsError::EventFd)?, #[cfg(target_os = "macos")] @@ -102,12 +109,12 @@ impl Fs { }) } - pub fn new(fs_id: String, shared_dir: String) -> super::Result { + pub fn new(fs_id: String, fs_share: FsImplShare) -> super::Result { let queues: Vec = defs::QUEUE_SIZES .iter() .map(|&max_size| VirtQueue::new(max_size)) .collect(); - Self::with_queues(fs_id, shared_dir, queues) + Self::with_queues(fs_id, fs_share, queues) } pub fn id(&self) -> &str { @@ -124,11 +131,20 @@ impl Fs { pub fn set_export_table(&mut self, export_table: ExportTable) -> u64 { static FS_UNIQUE_ID: AtomicU64 = AtomicU64::new(0); + let fsid = FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); - self.passthrough_cfg.export_fsid = FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); - self.passthrough_cfg.export_table = Some(export_table); + match &mut self.fs_config { + FsImplConfig::Passthrough(cfg) => { + cfg.export_fsid = fsid; + cfg.export_table = Some(export_table); + } + FsImplConfig::Overlayfs(cfg) => { + cfg.export_fsid = fsid; + cfg.export_table = Some(export_table); + } + } - self.passthrough_cfg.export_fsid + fsid } #[cfg(target_os = "macos")] @@ -215,6 +231,7 @@ impl VirtioDevice for Fs { .iter() .map(|e| e.try_clone().unwrap()) .collect(); + let worker = FsWorker::new( self.queues.clone(), queue_evts, @@ -224,13 +241,13 @@ impl VirtioDevice for Fs { self.irq_line, mem.clone(), self.shm_region.clone(), - self.passthrough_cfg.clone(), + self.fs_config.clone(), self.worker_stopfd.try_clone().unwrap(), #[cfg(target_os = "macos")] self.map_sender.clone(), ); - self.worker_thread = Some(worker.run()); + self.worker_thread = Some(worker.run()); self.device_state = DeviceState::Activated(mem); Ok(()) } diff --git a/src/devices/src/virtio/fs/kinds.rs b/src/devices/src/virtio/fs/kinds.rs new file mode 100644 index 000000000..901cd721d --- /dev/null +++ b/src/devices/src/virtio/fs/kinds.rs @@ -0,0 +1,647 @@ +use std::{ffi::CStr, io, path::PathBuf, time::Duration}; + +use crossbeam_channel::Sender; +use hvf::MemoryMapping; + +use crate::virtio::bindings; + +use super::{ + filesystem::{ + Context, DirEntry, Entry, Extensions, FileSystem, GetxattrReply, ListxattrReply, + ZeroCopyReader, ZeroCopyWriter, + }, + fuse::{FsOptions, OpenOptions, RemovemappingOne, SetattrValid}, + overlayfs::{self, OverlayFs}, + passthrough::{self, PassthroughFs}, +}; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +#[derive(Clone, Debug)] +pub enum FsImplConfig { + Passthrough(passthrough::Config), + Overlayfs(overlayfs::Config), +} + +pub enum FsImpl { + Passthrough(PassthroughFs), + Overlayfs(OverlayFs), +} + +#[derive(Clone, Debug)] +pub enum FsImplShare { + Passthrough(String), + Overlayfs(Vec), +} + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +impl FileSystem for FsImpl { + type Inode = u64; + type Handle = u64; + + fn init(&self, capable: FsOptions) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.init(capable), + FsImpl::Overlayfs(fs) => fs.init(capable), + } + } + + fn destroy(&self) { + match self { + FsImpl::Passthrough(fs) => fs.destroy(), + FsImpl::Overlayfs(fs) => fs.destroy(), + } + } + + fn lookup(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.lookup(ctx, parent, name), + FsImpl::Overlayfs(fs) => fs.lookup(ctx, parent, name), + } + } + + fn forget(&self, ctx: Context, inode: Self::Inode, count: u64) { + match self { + FsImpl::Passthrough(fs) => fs.forget(ctx, inode, count), + FsImpl::Overlayfs(fs) => fs.forget(ctx, inode, count), + } + } + + fn batch_forget(&self, ctx: Context, requests: Vec<(Self::Inode, u64)>) { + match self { + FsImpl::Passthrough(fs) => fs.batch_forget(ctx, requests), + FsImpl::Overlayfs(fs) => fs.batch_forget(ctx, requests), + } + } + + fn getattr( + &self, + ctx: Context, + inode: Self::Inode, + handle: Option, + ) -> io::Result<(bindings::stat64, Duration)> { + match self { + FsImpl::Passthrough(fs) => fs.getattr(ctx, inode, handle), + FsImpl::Overlayfs(fs) => fs.getattr(ctx, inode, handle), + } + } + + fn setattr( + &self, + ctx: Context, + inode: Self::Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + match self { + FsImpl::Passthrough(fs) => fs.setattr(ctx, inode, attr, handle, valid), + FsImpl::Overlayfs(fs) => fs.setattr(ctx, inode, attr, handle, valid), + } + } + + fn readlink(&self, ctx: Context, inode: Self::Inode) -> io::Result> { + match self { + FsImpl::Passthrough(fs) => fs.readlink(ctx, inode), + FsImpl::Overlayfs(fs) => fs.readlink(ctx, inode), + } + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Self::Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.symlink(ctx, linkname, parent, name, extensions), + FsImpl::Overlayfs(fs) => fs.symlink(ctx, linkname, parent, name, extensions), + } + } + + #[allow(clippy::too_many_arguments)] + fn mknod( + &self, + ctx: Context, + inode: Self::Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.mknod(ctx, inode, name, mode, rdev, umask, extensions), + FsImpl::Overlayfs(fs) => fs.mknod(ctx, inode, name, mode, rdev, umask, extensions), + } + } + + fn mkdir( + &self, + ctx: Context, + parent: Self::Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.mkdir(ctx, parent, name, mode, umask, extensions), + FsImpl::Overlayfs(fs) => fs.mkdir(ctx, parent, name, mode, umask, extensions), + } + } + + fn unlink(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.unlink(ctx, parent, name), + FsImpl::Overlayfs(fs) => fs.unlink(ctx, parent, name), + } + } + + fn rmdir(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.rmdir(ctx, parent, name), + FsImpl::Overlayfs(fs) => fs.rmdir(ctx, parent, name), + } + } + + fn rename( + &self, + ctx: Context, + olddir: Self::Inode, + oldname: &CStr, + newdir: Self::Inode, + newname: &CStr, + flags: u32, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.rename(ctx, olddir, oldname, newdir, newname, flags), + FsImpl::Overlayfs(fs) => fs.rename(ctx, olddir, oldname, newdir, newname, flags), + } + } + + fn link( + &self, + ctx: Context, + inode: Self::Inode, + newparent: Self::Inode, + newname: &CStr, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.link(ctx, inode, newparent, newname), + FsImpl::Overlayfs(fs) => fs.link(ctx, inode, newparent, newname), + } + } + + fn open( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + match self { + FsImpl::Passthrough(fs) => fs.open(ctx, inode, flags), + FsImpl::Overlayfs(fs) => fs.open(ctx, inode, flags), + } + } + + #[allow(clippy::too_many_arguments)] + fn create( + &self, + ctx: Context, + parent: Self::Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + match self { + FsImpl::Passthrough(fs) => fs.create(ctx, parent, name, mode, flags, umask, extensions), + FsImpl::Overlayfs(fs) => fs.create(ctx, parent, name, mode, flags, umask, extensions), + } + } + + #[allow(clippy::too_many_arguments)] + fn read( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + w: W, + size: u32, + offset: u64, + lock_owner: Option, + flags: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => { + fs.read(ctx, inode, handle, w, size, offset, lock_owner, flags) + } + FsImpl::Overlayfs(fs) => { + fs.read(ctx, inode, handle, w, size, offset, lock_owner, flags) + } + } + } + + #[allow(clippy::too_many_arguments)] + fn write( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + r: R, + size: u32, + offset: u64, + lock_owner: Option, + delayed_write: bool, + kill_priv: bool, + flags: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.write( + ctx, + inode, + handle, + r, + size, + offset, + lock_owner, + delayed_write, + kill_priv, + flags, + ), + FsImpl::Overlayfs(fs) => fs.write( + ctx, + inode, + handle, + r, + size, + offset, + lock_owner, + delayed_write, + kill_priv, + flags, + ), + } + } + + fn flush( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + lock_owner: u64, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.flush(ctx, inode, handle, lock_owner), + FsImpl::Overlayfs(fs) => fs.flush(ctx, inode, handle, lock_owner), + } + } + + fn fsync( + &self, + ctx: Context, + inode: Self::Inode, + datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.fsync(ctx, inode, datasync, handle), + FsImpl::Overlayfs(fs) => fs.fsync(ctx, inode, datasync, handle), + } + } + + fn fallocate( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.fallocate(ctx, inode, handle, mode, offset, length), + FsImpl::Overlayfs(fs) => fs.fallocate(ctx, inode, handle, mode, offset, length), + } + } + + #[allow(clippy::too_many_arguments)] + fn release( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + handle: Self::Handle, + flush: bool, + flock_release: bool, + lock_owner: Option, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => { + fs.release(ctx, inode, flags, handle, flush, flock_release, lock_owner) + } + FsImpl::Overlayfs(fs) => { + fs.release(ctx, inode, flags, handle, flush, flock_release, lock_owner) + } + } + } + + fn statfs(&self, ctx: Context, inode: Self::Inode) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.statfs(ctx, inode), + FsImpl::Overlayfs(fs) => fs.statfs(ctx, inode), + } + } + + fn setxattr( + &self, + ctx: Context, + inode: Self::Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setxattr(ctx, inode, name, value, flags), + FsImpl::Overlayfs(fs) => fs.setxattr(ctx, inode, name, value, flags), + } + } + + fn getxattr( + &self, + ctx: Context, + inode: Self::Inode, + name: &CStr, + size: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.getxattr(ctx, inode, name, size), + FsImpl::Overlayfs(fs) => fs.getxattr(ctx, inode, name, size), + } + } + + fn listxattr(&self, ctx: Context, inode: Self::Inode, size: u32) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.listxattr(ctx, inode, size), + FsImpl::Overlayfs(fs) => fs.listxattr(ctx, inode, size), + } + } + + fn removexattr(&self, ctx: Context, inode: Self::Inode, name: &CStr) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.removexattr(ctx, inode, name), + FsImpl::Overlayfs(fs) => fs.removexattr(ctx, inode, name), + } + } + + fn opendir( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + match self { + FsImpl::Passthrough(fs) => fs.opendir(ctx, inode, flags), + FsImpl::Overlayfs(fs) => fs.opendir(ctx, inode, flags), + } + } + + fn readdir( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + match self { + FsImpl::Passthrough(fs) => fs.readdir(ctx, inode, handle, size, offset, add_entry), + FsImpl::Overlayfs(fs) => fs.readdir(ctx, inode, handle, size, offset, add_entry), + } + } + + fn readdirplus( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry, Entry) -> io::Result, + { + match self { + FsImpl::Passthrough(fs) => fs.readdirplus(ctx, inode, handle, size, offset, add_entry), + FsImpl::Overlayfs(fs) => fs.readdirplus(ctx, inode, handle, size, offset, add_entry), + } + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Self::Inode, + datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.fsyncdir(ctx, inode, datasync, handle), + FsImpl::Overlayfs(fs) => fs.fsyncdir(ctx, inode, datasync, handle), + } + } + + fn releasedir( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + handle: Self::Handle, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.releasedir(ctx, inode, flags, handle), + FsImpl::Overlayfs(fs) => fs.releasedir(ctx, inode, flags, handle), + } + } + + fn access(&self, ctx: Context, inode: Self::Inode, mask: u32) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.access(ctx, inode, mask), + FsImpl::Overlayfs(fs) => fs.access(ctx, inode, mask), + } + } + + fn lseek( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + offset: u64, + whence: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.lseek(ctx, inode, handle, offset, whence), + FsImpl::Overlayfs(fs) => fs.lseek(ctx, inode, handle, offset, whence), + } + } + + #[allow(clippy::too_many_arguments)] + fn copyfilerange( + &self, + ctx: Context, + inode_in: Self::Inode, + handle_in: Self::Handle, + offset_in: u64, + inode_out: Self::Inode, + handle_out: Self::Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.copyfilerange( + ctx, inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ), + FsImpl::Overlayfs(fs) => fs.copyfilerange( + ctx, inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ), + } + } + + #[allow(clippy::too_many_arguments)] + fn setupmapping( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setupmapping( + ctx, + inode, + handle, + foffset, + len, + flags, + moffset, + host_shm_base, + shm_size, + map_sender, + ), + FsImpl::Overlayfs(fs) => fs.setupmapping( + ctx, + inode, + handle, + foffset, + len, + flags, + moffset, + host_shm_base, + shm_size, + map_sender, + ), + } + } + + fn removemapping( + &self, + ctx: Context, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => { + fs.removemapping(ctx, requests, host_shm_base, shm_size, map_sender) + } + FsImpl::Overlayfs(fs) => { + fs.removemapping(ctx, requests, host_shm_base, shm_size, map_sender) + } + } + } + + #[allow(clippy::too_many_arguments)] + fn ioctl( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + flags: u32, + cmd: u32, + arg: u64, + in_size: u32, + out_size: u32, + ) -> io::Result> { + match self { + FsImpl::Passthrough(fs) => { + fs.ioctl(ctx, inode, handle, flags, cmd, arg, in_size, out_size) + } + FsImpl::Overlayfs(fs) => { + fs.ioctl(ctx, inode, handle, flags, cmd, arg, in_size, out_size) + } + } + } + + fn getlk(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.getlk(), + FsImpl::Overlayfs(fs) => fs.getlk(), + } + } + + fn setlk(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setlk(), + FsImpl::Overlayfs(fs) => fs.setlk(), + } + } + + fn setlkw(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setlkw(), + FsImpl::Overlayfs(fs) => fs.setlkw(), + } + } + + fn bmap(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.bmap(), + FsImpl::Overlayfs(fs) => fs.bmap(), + } + } + + fn poll(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.poll(), + FsImpl::Overlayfs(fs) => fs.poll(), + } + } + + fn notify_reply(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.notify_reply(), + FsImpl::Overlayfs(fs) => fs.notify_reply(), + } + } +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/fs.rs b/src/devices/src/virtio/fs/macos/overlayfs/fs.rs index 0e8d16c33..79b7eca45 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/fs.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs/fs.rs @@ -48,6 +48,8 @@ const MAX_LAYERS: usize = 128; #[cfg(not(feature = "efi"))] static INIT_BINARY: &[u8] = include_bytes!("../../../../../../../init/init"); +const INIT_CSTR: &[u8] = b"init.krun\0"; + //-------------------------------------------------------------------------------------------------- // Types //-------------------------------------------------------------------------------------------------- @@ -133,7 +135,7 @@ enum FileId { } /// Configuration for the overlay filesystem -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Config { /// How long the FUSE client should consider directory entries to be valid. /// If the contents of a directory can only be modified by the FUSE client, @@ -143,6 +145,8 @@ pub struct Config { /// How long the FUSE client should consider file and directory attributes to be valid. /// If the attributes of a file or directory can only be modified by the FUSE client, /// this should be a large value. + /// + /// The default value is 5 seconds. pub attr_timeout: Duration, /// The caching policy the file system should use. @@ -158,7 +162,10 @@ pub struct Config { pub xattr: bool, /// Optional file descriptor for /proc/self/fd. - /// This is useful for sandboxing scenarios. + /// Callers can obtain a file descriptor and pass it here, so there's no need to open it in + /// OverlayFs::new(). This is specially useful for sandboxing. + /// + /// The default is `None`. pub proc_sfd_rawfd: Option, /// ID of this filesystem to uniquely identify exports. @@ -166,6 +173,9 @@ pub struct Config { /// Table of exported FDs to share with other subsystems. pub export_table: Option, + + /// Layers to be used for the overlay filesystem + pub layers: Vec, } /// An overlay filesystem implementation that combines multiple layers into a single logical filesystem. @@ -197,6 +207,11 @@ pub struct Config { /// - When reading, the top layer takes precedence over lower layers /// - Whiteout files in the top layer hide files from lower layers /// - Opaque directory markers completely mask lower layer directory contents +/// - It is undefined behavior for whiteouts and their corresponding entries to exist at the same level in the same directory. +/// For example, looking up such entry can result in different behavior depending on which is found first. +/// The filesystem will try to prevent adding whiteout entries directly. +/// +/// TODO: Need to implement entry caching to improve the performance of [`Self::lookup_segment_by_segment`]. pub struct OverlayFs { /// Map of inodes by ID and alternative keys inodes: RwLock>>, @@ -204,7 +219,7 @@ pub struct OverlayFs { /// Counter for generating the next inode ID next_inode: AtomicU64, - /// The initial inode ID (typically 1 for the root directory) + /// The `init.krun` inode ID init_inode: u64, /// Map of open file handles by ID @@ -213,7 +228,7 @@ pub struct OverlayFs { /// Counter for generating the next handle ID next_handle: AtomicU64, - /// The initial handle ID + /// The `init.krun` handle ID init_handle: u64, /// Map of memory-mapped windows @@ -244,15 +259,15 @@ impl InodeAltKey { impl OverlayFs { /// Creates a new OverlayFs with the given layers - pub fn new(layers: Vec, config: Config) -> io::Result { - if layers.is_empty() { + pub fn new(config: Config) -> io::Result { + if config.layers.is_empty() { return Err(io::Error::new( io::ErrorKind::InvalidInput, "at least one layer must be provided", )); } - if layers.len() > MAX_LAYERS { + if config.layers.len() > MAX_LAYERS { return Err(io::Error::new( io::ErrorKind::InvalidInput, "maximum overlayfs layer count exceeded", @@ -263,12 +278,16 @@ impl OverlayFs { let mut inodes = MultikeyBTreeMap::new(); // Initialize the root inodes for all layers - let layer_roots = Self::init_root_inodes(&layers, &mut inodes, &mut next_inode)?; + let layer_roots = Self::init_root_inodes(&config.layers, &mut inodes, &mut next_inode)?; + + // Set the `init.krun` inode + let init_inode = next_inode; + next_inode += 1; Ok(OverlayFs { inodes: RwLock::new(inodes), next_inode: AtomicU64::new(next_inode), - init_inode: 1, + init_inode, handles: RwLock::new(BTreeMap::new()), next_handle: AtomicU64::new(1), init_handle: 0, @@ -754,7 +773,6 @@ impl OverlayFs { Err(e) => return Some(Err(e)), }; - // TODO: whiteout should not override entry at the same level. so this check should be in not found case. // Check for whiteout at current level match self.check_whiteout(&parent_vol_path, segment_name) { Ok(true) => return None, // Found whiteout, stop searching @@ -1460,7 +1478,6 @@ impl OverlayFs { let entry = entry_result?; let name = entry.file_name(); let name_str = name.to_string_lossy(); - let inode_data = state.inode_data.as_ref().unwrap(); if state.seen.contains(name.as_bytes()) { continue; @@ -1472,31 +1489,10 @@ impl OverlayFs { opaque_marker_found = true; continue; } else if name_str.starts_with(WHITEOUT_PREFIX) { - // Whiteout file: extract the actual name + // Whiteout file; skip it let actual = &name_str[WHITEOUT_PREFIX.len()..]; - let actual_bytes = actual.as_bytes(); - if state.seen.contains(actual_bytes) { - continue; - } - - let actual_cstring = CString::new(actual).map_err(|_| { - io::Error::new(io::ErrorKind::Other, "Invalid whiteout name") - })?; - - let vol_path = self.dev_ino_and_name_to_vol_path( - inode_data.dev, - inode_data.ino, - &actual_cstring, - )?; - - match Self::unpatched_stat(&FileId::Path(vol_path)) { - Ok(_) => continue, - Err(e) if e.kind() == io::ErrorKind::NotFound => { - state.seen.insert(actual_bytes.to_vec()); - continue; - } - Err(e) => return Err(e), - } + state.seen.insert(actual.as_bytes().to_vec()); + continue; } else { state.seen.insert(name.as_bytes().to_vec()); } @@ -2656,7 +2652,11 @@ impl OverlayFs { let guest_addr = guest_shm_base + moffset; - let file = self.open_inode(inode, libc::O_RDWR)?; + // Ensure the inode is in the top layer + let inode_data = self.get_inode_data(inode)?; + let inode_data = self.ensure_top_layer(inode_data)?; + + let file = self.open_inode(inode_data.inode, libc::O_RDWR)?; let fd = file.as_raw_fd(); let host_addr = unsafe { @@ -2824,6 +2824,27 @@ impl FileSystem for OverlayFs { fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result { Self::validate_name(name)?; + + #[cfg(not(feature = "efi"))] + let init_name = unsafe { CStr::from_bytes_with_nul_unchecked(INIT_CSTR) }; + + #[cfg(not(feature = "efi"))] + if self.init_inode != 0 && name == init_name { + let mut st: bindings::stat64 = unsafe { std::mem::zeroed() }; + st.st_size = INIT_BINARY.len() as i64; + st.st_ino = self.init_inode; + st.st_mode = 0o100_755; + + return Ok(Entry { + inode: self.init_inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + }) + } + let (entry, _) = self.do_lookup(parent, name)?; self.bump_refcount(entry.inode); Ok(entry) @@ -2947,14 +2968,13 @@ impl FileSystem for OverlayFs { _lock_owner: Option, _flags: u32, ) -> io::Result { - let data = self.get_inode_handle_data(inode, handle)?; - #[cfg(not(feature = "efi"))] if inode == self.init_inode { - println!("init inode"); return w.write(&INIT_BINARY[offset as usize..(offset + (size as u64)) as usize]); } + let data = self.get_inode_handle_data(inode, handle)?; + let f = data.file.read().unwrap(); w.write_from(&f, size as usize, offset) } @@ -3138,8 +3158,6 @@ impl FileSystem for OverlayFs { let st = Self::patched_stat(&FileId::Path(c_path))?; - println!("st: {:?}", st); - let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); if mode == libc::F_OK { @@ -3278,6 +3296,7 @@ impl Default for Config { proc_sfd_rawfd: None, export_fsid: 0, export_table: None, + layers: vec![], } } } diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests.rs index c89c77f7c..1fffe98a5 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests.rs @@ -154,8 +154,12 @@ mod helper { temp_dirs.push(temp_dir); } - let cfg = Config::default(); - let overlayfs = OverlayFs::new(layer_paths, cfg)?; + let cfg = Config { + layers: layer_paths, + ..Default::default() + }; + + let overlayfs = OverlayFs::new(cfg)?; Ok((overlayfs, temp_dirs)) } diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs index 6079add5e..82b62398c 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs @@ -1,10 +1,4 @@ -use std::{ - ffi::CString, - fs::{self, FileType}, - io, - os::unix::fs::FileTypeExt, - path::Path, -}; +use std::{ffi::CString, fs, io}; use crate::virtio::{ bindings, @@ -1409,12 +1403,12 @@ fn test_mknod_basic() -> io::Result<()> { let ctx = Context::default(); // Test creating different types of nodes - let test_cases: Vec<(&str, u32, &str)> = vec![ - ("fifo1", libc::S_IFIFO as u32 | 0o644, "named pipe"), - ("sock1", libc::S_IFSOCK as u32 | 0o644, "unix domain socket"), + let test_cases: Vec<(&str, u32)> = vec![ + ("fifo1", libc::S_IFIFO as u32 | 0o644), + ("sock1", libc::S_IFSOCK as u32 | 0o644), ]; - for (name, mode, node_type) in test_cases { + for (name, mode) in test_cases { let node_name = CString::new(name).unwrap(); let entry = fs.mknod(ctx, 1, &node_name, mode, 0, 0o022, Extensions::default())?; diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs index 355fc7d33..5371acb0a 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs @@ -1,6 +1,12 @@ use std::{collections::HashSet, ffi::CString, fs, io}; -use crate::virtio::{bindings::{self, LINUX_ENODATA, LINUX_ENOSYS}, fs::filesystem::{Context, FileSystem, GetxattrReply, ListxattrReply}, fuse::{FsOptions, SetattrValid}, linux_errno::LINUX_ERANGE, macos::overlayfs::{Config, OverlayFs}}; +use crate::virtio::{ + bindings::{self, LINUX_ENODATA, LINUX_ENOSYS}, + fs::filesystem::{Context, FileSystem, GetxattrReply, ListxattrReply}, + fuse::{FsOptions, SetattrValid}, + linux_errno::LINUX_ERANGE, + macos::overlayfs::{Config, OverlayFs}, +}; use super::helper; @@ -508,7 +514,9 @@ fn test_xattrs() -> io::Result<()> { .map(|dir| dir.path().to_path_buf()) .collect::>(); - let overlayfs = OverlayFs::new(layer_paths, cfg)?; + cfg.layers = layer_paths; + + let overlayfs = OverlayFs::new(cfg)?; helper::debug_print_layers(&temp_dirs, false)?; // Initialize filesystem @@ -836,14 +844,12 @@ fn test_xattrs() -> io::Result<()> { // Create a new overlayfs with xattr disabled let mut cfg_no_xattr = Config::default(); cfg_no_xattr.xattr = false; + cfg_no_xattr.layers = temp_dirs + .iter() + .map(|dir| dir.path().to_path_buf()) + .collect(); - let overlayfs_no_xattr = OverlayFs::new( - temp_dirs - .iter() - .map(|dir| dir.path().to_path_buf()) - .collect(), - cfg_no_xattr, - )?; + let overlayfs_no_xattr = OverlayFs::new(cfg_no_xattr)?; overlayfs_no_xattr.init(FsOptions::empty())?; diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs index 34797bd20..598197c9f 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs @@ -238,8 +238,11 @@ fn test_copy_up_with_content() -> io::Result<()> { let layer_paths: Vec = temp_dirs.iter().map(|d| d.path().to_path_buf()).collect(); // Create the overlayfs - let cfg = Config::default(); - let fs = OverlayFs::new(layer_paths, cfg)?; + let cfg = Config { + layers: layer_paths, + ..Default::default() + }; + let fs = OverlayFs::new(cfg)?; let ctx = Context::default(); // Test 1: Open file1 from bottom layer with write access (should trigger copy-up) @@ -519,37 +522,3 @@ fn test_link_existing_name() -> io::Result<()> { Ok(()) } - -#[test] -fn test_readlink_whiteout() -> io::Result<()> { - // Create test layers: - // Lower layer: target1, link1 -> target1 - // Upper layer: .wh.link1 (whiteout for link1) - let layers = vec![ - vec![("target1", false, 0o644)], - vec![(".wh.link1", false, 0o644)], // Whiteout file - ]; - - let (fs, temp_dirs) = helper::create_overlayfs(layers)?; - - // Create symlink in bottom layer - std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link1"))?; - - // Initialize filesystem - fs.init(FsOptions::empty())?; - - // Try to lookup whited-out symlink (should fail) - let link_name = CString::new("link1").unwrap(); - match fs.lookup(Context::default(), 1, &link_name) { - Ok(_) => panic!("Expected lookup of whited-out symlink to fail"), - Err(e) => { - assert_eq!( - e.raw_os_error(), - Some(libc::ENOENT), - "Looking up whited-out symlink should return ENOENT" - ); - } - } - - Ok(()) -} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs b/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs index c09d6fb83..7d4916c68 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs @@ -127,8 +127,6 @@ fn test_open_whiteout() -> io::Result<()> { // Verify lookup fails assert!(result.is_err()); - // Since we can't directly check the error code with assert_eq! due to Debug trait issues, - // we'll just verify the file doesn't exist by trying to open a non-existent inode let non_existent_inode = 999; // Use a high number that shouldn't exist let open_result = fs.open(ctx, non_existent_inode, libc::O_RDONLY as u32); assert!(open_result.is_err()); diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index ea475a5c1..0ec47817c 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -1,10 +1,11 @@ mod device; #[allow(dead_code)] mod filesystem; +mod server; pub mod fuse; +mod kinds; #[allow(dead_code)] mod multikey; -mod server; mod worker; #[cfg(target_os = "linux")] @@ -15,9 +16,12 @@ pub use linux::fs_utils; pub use linux::passthrough; #[cfg(target_os = "macos")] pub mod macos; +pub use kinds::*; #[cfg(target_os = "macos")] pub use macos::fs_utils; #[cfg(target_os = "macos")] +pub use macos::overlayfs; +#[cfg(target_os = "macos")] pub use macos::passthrough; use super::bindings; diff --git a/src/devices/src/virtio/fs/server.rs b/src/devices/src/virtio/fs/server.rs index b477a099e..a6ba689fd 100644 --- a/src/devices/src/virtio/fs/server.rs +++ b/src/devices/src/virtio/fs/server.rs @@ -1,7 +1,3 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - #[cfg(target_os = "macos")] use crossbeam_channel::Sender; #[cfg(target_os = "macos")] @@ -17,61 +13,47 @@ use std::sync::atomic::{AtomicU64, Ordering}; use vm_memory::ByteValued; use super::super::linux_errno::linux_error; -use super::bindings; use super::descriptor_utils::{Reader, Writer}; -use super::filesystem::{ - Context, DirEntry, Entry, Extensions, FileSystem, GetxattrReply, ListxattrReply, SecContext, - ZeroCopyReader, ZeroCopyWriter, -}; +use super::filesystem::{Context, DirEntry, Entry, Extensions, FileSystem, GetxattrReply, ListxattrReply, SecContext, ZeroCopyReader, ZeroCopyWriter}; use super::fs_utils::einval; use super::fuse::*; +use super::{bindings, FsImpl}; use super::{FsError as Error, Result}; use crate::virtio::VirtioShmRegion; -const MAX_BUFFER_SIZE: u32 = 1 << 20; -const BUFFER_HEADER_SIZE: u32 = 0x1000; -const DIRENT_PADDING: [u8; 8] = [0; 8]; - -struct ZCReader<'a>(Reader<'a>); - -impl ZeroCopyReader for ZCReader<'_> { - fn read_to(&mut self, f: &File, count: usize, off: u64) -> io::Result { - self.0.read_to_at(f, count, off) - } +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +pub(super) const MAX_BUFFER_SIZE: u32 = 1 << 20; +pub(super) const BUFFER_HEADER_SIZE: u32 = 0x1000; +pub(super) const DIRENT_PADDING: [u8; 8] = [0; 8]; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// `FsImplServer` is a concrete FUSE server implementation designed to work with specific +/// filesystem implementations provided by libkrun, particularly: +/// +/// - [`PassthroughFs`]: For direct passthrough access to the host filesystem +/// - [`OverlayFs`]: For overlayfs functionality to combine multiple filesystem layers +pub struct FsImplServer { + fs: FsImpl, + options: AtomicU64, } -impl io::Read for ZCReader<'_> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.0.read(buf) - } -} +struct ZCReader<'a>(Reader<'a>); struct ZCWriter<'a>(Writer<'a>); -impl ZeroCopyWriter for ZCWriter<'_> { - fn write_from(&mut self, f: &File, count: usize, off: u64) -> io::Result { - self.0.write_from_at(f, count, off) - } -} - -impl io::Write for ZCWriter<'_> { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.0.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.0.flush() - } -} - -pub struct Server { - fs: F, - options: AtomicU64, -} +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- -impl Server { - pub fn new(fs: F) -> Server { - Server { +impl FsImplServer { + pub fn new(fs: FsImpl) -> FsImplServer { + FsImplServer { fs, options: AtomicU64::new(FsOptions::empty().bits()), } @@ -94,7 +76,7 @@ impl Server { w, ); } - debug!("opcode: {}", in_header.opcode); + match in_header.opcode { x if x == Opcode::Lookup as u32 => self.lookup(in_header, r, w), x if x == Opcode::Forget as u32 => self.forget(in_header, r), // No reply. @@ -1409,6 +1391,42 @@ impl Server { } } +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl ZeroCopyReader for ZCReader<'_> { + fn read_to(&mut self, f: &File, count: usize, off: u64) -> io::Result { + self.0.read_to_at(f, count, off) + } +} + +impl io::Read for ZCReader<'_> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.0.read(buf) + } +} + +impl ZeroCopyWriter for ZCWriter<'_> { + fn write_from(&mut self, f: &File, count: usize, off: u64) -> io::Result { + self.0.write_from_at(f, count, off) + } +} + +impl io::Write for ZCWriter<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.0.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.0.flush() + } +} + +//-------------------------------------------------------------------------------------------------- +// Functions +//-------------------------------------------------------------------------------------------------- + fn reply_ok( out: Option, data: Option<&[u8]>, diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index a0b93e08d..b95d2019b 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -15,8 +15,10 @@ use vm_memory::GuestMemoryMmap; use super::super::{FsError, Queue, VIRTIO_MMIO_INT_VRING}; use super::defs::{HPQ_INDEX, REQ_INDEX}; use super::descriptor_utils::{Reader, Writer}; -use super::passthrough::{self, PassthroughFs}; -use super::server::Server; +use super::server::FsImplServer; +use super::overlayfs::OverlayFs; +use super::passthrough::PassthroughFs; +use super::{FsImpl, FsImplConfig}; use crate::legacy::GicV3; use crate::virtio::VirtioShmRegion; @@ -30,7 +32,7 @@ pub struct FsWorker { mem: GuestMemoryMmap, shm_region: Option, - server: Server, + server: FsImplServer, stop_fd: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, @@ -47,10 +49,19 @@ impl FsWorker { irq_line: Option, mem: GuestMemoryMmap, shm_region: Option, - passthrough_cfg: passthrough::Config, + fs_config: FsImplConfig, stop_fd: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, ) -> Self { + let server = match fs_config { + FsImplConfig::Passthrough(passthrough_cfg) => FsImplServer::new(FsImpl::Passthrough( + PassthroughFs::new(passthrough_cfg).unwrap(), + )), + FsImplConfig::Overlayfs(overlayfs_cfg) => { + FsImplServer::new(FsImpl::Overlayfs(OverlayFs::new(overlayfs_cfg).unwrap())) + } + }; + Self { queues, queue_evts, @@ -58,10 +69,9 @@ impl FsWorker { interrupt_evt, intc, irq_line, - mem, shm_region, - server: Server::new(PassthroughFs::new(passthrough_cfg).unwrap()), + server, stop_fd, #[cfg(target_os = "macos")] map_sender, diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 9af22c511..286f7864c 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -20,6 +20,7 @@ use std::sync::Mutex; use crossbeam_channel::unbounded; #[cfg(feature = "blk")] use devices::virtio::block::ImageType; +use devices::virtio::fs::FsImplShare; #[cfg(feature = "net")] use devices::virtio::net::device::VirtioNetBackend; #[cfg(feature = "blk")] @@ -404,14 +405,76 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) }; let fs_id = "/dev/root".to_string(); - let shared_dir = root_path.to_string(); + let fs_share = FsImplShare::Passthrough(root_path.to_string()); match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + + // Check if root device is already set + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + + cfg.vmr.add_fs_device(FsDeviceConfig { + fs_id, + fs_share, + // Default to a conservative 512 MB window. + shm_size: Some(1 << 29), + }); + } + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(feature = "tee"))] +pub unsafe extern "C" fn krun_set_overlayfs_root( + ctx_id: u32, + c_root_layers: *const *const c_char, +) -> i32 { + let mut layers = Vec::new(); + let layers_array: &[*const c_char] = slice::from_raw_parts(c_root_layers, MAX_ARGS); + + for item in layers_array.iter().take(MAX_ARGS) { + if item.is_null() { + break; + } else { + let layer_path = match CStr::from_ptr(*item).to_str() { + Ok(path) => path, + Err(_) => return -libc::EINVAL, + }; + layers.push(PathBuf::from(layer_path)); + } + } + + // Need at least one layer + if layers.is_empty() { + return -libc::EINVAL; + } + + let fs_id = "/dev/root".to_string(); + let fs_share = FsImplShare::Overlayfs(layers); + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + + // Check if root device is already set + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + cfg.vmr.add_fs_device(FsDeviceConfig { fs_id, - shared_dir, + fs_share, // Default to a conservative 512 MB window. shm_size: Some(1 << 29), }); @@ -442,9 +505,18 @@ pub unsafe extern "C" fn krun_add_virtiofs( match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + + // Check if a device with the same tag already exists + let fs_id = tag.to_string(); + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + cfg.vmr.add_fs_device(FsDeviceConfig { - fs_id: tag.to_string(), - shared_dir: path.to_string(), + fs_id, + fs_share: FsImplShare::Passthrough(path.to_string()), shm_size: None, }); } @@ -475,9 +547,18 @@ pub unsafe extern "C" fn krun_add_virtiofs2( match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + + // Check if a device with the same tag already exists + let fs_id = tag.to_string(); + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + cfg.vmr.add_fs_device(FsDeviceConfig { - fs_id: tag.to_string(), - shared_dir: path.to_string(), + fs_id, + fs_share: FsImplShare::Passthrough(path.to_string()), shm_size: Some(shm_size.try_into().unwrap()), }); } diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8eda82a10..f5a94154e 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1203,7 +1203,7 @@ fn attach_fs_devices( for (i, config) in fs_devs.iter().enumerate() { let fs = Arc::new(Mutex::new( - devices::virtio::Fs::new(config.fs_id.clone(), config.shared_dir.clone()).unwrap(), + devices::virtio::Fs::new(config.fs_id.clone(), config.fs_share.clone()).unwrap(), )); let id = format!("{}{}", String::from(fs.lock().unwrap().id()), i); diff --git a/src/vmm/src/vmm_config/fs.rs b/src/vmm/src/vmm_config/fs.rs index cc7995021..f5690f56c 100644 --- a/src/vmm/src/vmm_config/fs.rs +++ b/src/vmm/src/vmm_config/fs.rs @@ -1,6 +1,8 @@ +use devices::virtio::fs::FsImplShare; + #[derive(Clone, Debug)] pub struct FsDeviceConfig { pub fs_id: String, - pub shared_dir: String, + pub fs_share: FsImplShare, pub shm_size: Option, } From 61209878efdf943ccc00895f304d592120d04278 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Tue, 1 Apr 2025 16:04:16 +0100 Subject: [PATCH 05/14] feat(net): Implement IP filtering for TSI backend (#9) * feat(net): Implement IP filtering for TSI backend This commit introduces IP-based filtering capabilities for the default TSI (Transparent Socket Impersonation) network backend in libkrun. A new C API function, `krun_set_tsi_scope`, allows users to configure: - An optional static IP address for the guest within the host network namespace. If specified, the guest can only bind/listen on this IP. - An optional subnet (in CIDR notation) defining the allowed communication group when scope 1 is used. - A reachability scope (0-3) controlling network access: - 0: Deny all IP communication. - 1: Allow communication only within the specified `subnet`. - 2: Allow communication only with public (non-private) IPs. - 3: Allow communication with any IP. The filtering logic is implemented in `src/devices/src/virtio/vsock/ip_filter.rs` and integrated into the `VsockMuxer`. It checks destination IPs for connect/sendto operations and bind IPs for listen operations against the configured rules. If an operation is denied, an appropriate error (ECONNREFUSED or EACCES) is sent back to the guest via a vsock control message. This feature enhances security by allowing finer-grained control over the network connectivity of krun virtual machines when using the TSI backend. Changes include: - Added `krun_set_tsi_scope` to `libkrun.h` and implementation in `lib.rs`. - Added `ipnetwork` dependency to relevant Cargo.toml files. - Created `ip_filter.rs` module for filtering logic. - Updated `Vsock` device, `VsockMuxer`, and `VsockDeviceConfig` to handle IP, subnet, and scope configuration. - Integrated filtering checks into `VsockMuxer`'s packet processing methods. - Added helper functions in `VsockMuxer` to send error responses to the guest. * refactor(net): rename reach to scope in header file --- Cargo.lock | 9 ++ include/libkrun.h | 24 +++++ src/devices/Cargo.toml | 1 + src/devices/src/virtio/vsock/device.rs | 16 ++- src/devices/src/virtio/vsock/ip_filter.rs | 93 +++++++++++++++++ src/devices/src/virtio/vsock/mod.rs | 1 + src/devices/src/virtio/vsock/muxer.rs | 119 ++++++++++++++++++++++ src/libkrun/Cargo.toml | 1 + src/libkrun/src/lib.rs | 114 +++++++++++++++++---- src/vmm/Cargo.toml | 1 + src/vmm/src/vmm_config/vsock.rs | 17 +++- 11 files changed, 376 insertions(+), 20 deletions(-) create mode 100644 src/devices/src/virtio/vsock/ip_filter.rs diff --git a/Cargo.lock b/Cargo.lock index 93a6f13bb..385ce35c9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -364,6 +364,7 @@ dependencies = [ "hvf", "imago", "intaglio", + "ipnetwork", "libc", "log", "lru", @@ -706,6 +707,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8972d5be69940353d5347a1344cb375d9b457d6809b428b05bb1ca2fb9ce007" +[[package]] +name = "ipnetwork" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf370abdafd54d13e54a620e8c3e1145f28e46cc9d704bc6d94414559df41763" + [[package]] name = "itertools" version = "0.12.1" @@ -796,6 +803,7 @@ dependencies = [ "devices", "env_logger", "hvf", + "ipnetwork", "libc", "log", "once_cell", @@ -1663,6 +1671,7 @@ dependencies = [ "devices", "env_logger", "hvf", + "ipnetwork", "kbs-types", "kernel", "kvm-bindings", diff --git a/include/libkrun.h b/include/libkrun.h index ab9cfa7c5..a03745183 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -318,6 +318,30 @@ int32_t krun_set_net_mac(uint32_t ctx_id, uint8_t *const c_mac); */ int32_t krun_set_port_map(uint32_t ctx_id, const char *const port_map[]); +/** + * Configures the static IP, subnet, and scope for the TSI network backend. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "c_ip" - an optional null-terminated string representing the guest's static IPv4 address. + * "c_subnet" - an optional null-terminated string representing the guest's subnet in CIDR notation (e.g., "192.168.1.0/24"). + * "scope" - an integer specifying the scope (0-3). Refer to TSI documentation for details. + * + * Returns: + * Zero on success or a negative error number on failure. + * Documented errors: + * -EINVAL if scope value is > 3 or IP/subnet strings are invalid. + * -ENOTSUP if the network mode is not TSI. + * + * Notes: + * This function is only effective when the default TSI network backend is used (i.e., neither + * krun_set_passt_fd nor krun_set_gvproxy_path has been called). + */ +int32_t krun_set_tsi_scope(uint32_t ctx_id, + const char *c_ip, + const char *c_subnet, + uint8_t scope); + /* Flags for virglrenderer. Copied from virglrenderer bindings. */ #define VIRGLRENDERER_USE_EGL 1 << 0 #define VIRGLRENDERER_THREAD_SYNC 1 << 1 diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 7bf79ae60..6342082ab 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -29,6 +29,7 @@ virtio-bindings = "0.2.0" vm-memory = { version = ">=0.13", features = ["backend-mmap"] } zerocopy = { version = "0.6.3", optional = true } zerocopy-derive = { version = "0.6.3", optional = true } +ipnetwork = "0.21" arch = { path = "../arch" } utils = { path = "../utils" } diff --git a/src/devices/src/virtio/vsock/device.rs b/src/devices/src/virtio/vsock/device.rs index 01df1317d..4386cfd0e 100644 --- a/src/devices/src/virtio/vsock/device.rs +++ b/src/devices/src/virtio/vsock/device.rs @@ -6,11 +6,13 @@ // found in the THIRD-PARTY file. use std::collections::HashMap; +use std::net::Ipv4Addr; use std::path::PathBuf; use std::result; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; +use ipnetwork::Ipv4Network; use utils::byte_order; use utils::eventfd::EventFd; use vm_memory::GuestMemoryMmap; @@ -20,6 +22,7 @@ use super::super::{ ActivateError, ActivateResult, DeviceState, Queue as VirtQueue, VirtioDevice, VsockError, VIRTIO_MMIO_INT_VRING, }; +use super::ip_filter::IpFilterConfig; use super::muxer::VsockMuxer; use super::packet::VsockPacket; use super::{defs, defs::uapi}; @@ -60,6 +63,9 @@ impl Vsock { host_port_map: Option>, queues: Vec, unix_ipc_port_map: Option>, + ip: Option, + subnet: Option, + scope: u8, ) -> super::Result { let mut queue_events = Vec::new(); for _ in 0..queues.len() { @@ -82,6 +88,11 @@ impl Vsock { interrupt_evt.try_clone().unwrap(), interrupt_status.clone(), unix_ipc_port_map, + IpFilterConfig { + ip, + subnet, + scope, + }, ), queue_rx, queue_tx, @@ -104,12 +115,15 @@ impl Vsock { cid: u64, host_port_map: Option>, unix_ipc_port_map: Option>, + ip: Option, + subnet: Option, + reach: u8, ) -> super::Result { let queues: Vec = defs::QUEUE_SIZES .iter() .map(|&max_size| VirtQueue::new(max_size)) .collect(); - Self::with_queues(cid, host_port_map, queues, unix_ipc_port_map) + Self::with_queues(cid, host_port_map, queues, unix_ipc_port_map, ip, subnet, reach) } pub fn id(&self) -> &str { diff --git a/src/devices/src/virtio/vsock/ip_filter.rs b/src/devices/src/virtio/vsock/ip_filter.rs new file mode 100644 index 000000000..6eed6382c --- /dev/null +++ b/src/devices/src/virtio/vsock/ip_filter.rs @@ -0,0 +1,93 @@ +use ipnetwork::Ipv4Network; +use std::net::Ipv4Addr; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// Configuration for IP-based filtering in the Vsock Muxer. +#[derive(Clone, Debug)] +pub struct IpFilterConfig { + /// Defines the scope of allowed connections/bindings. + /// 0: None (Block all IP communication) + /// 1: Group (Allow within `subnet`, bind only to `ip` if specified) + /// 2: Public (Allow public IPs, bind only to `ip` if specified) + /// 3: Any (Allow any IP, bind only to `ip` if specified) + pub scope: u8, + + /// If specified, binding/listening is ONLY allowed on this specific IP address + /// (ignored if scope is 0). + pub ip: Option, + + /// The allowed subnet for Scope 1 (Group). Required if scope is 1. + pub subnet: Option, +} + +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- + +impl IpFilterConfig { + /// Checks if the configuration is logically valid. + pub fn is_valid(&self) -> bool { + match self.scope { + 0 | 2 | 3 => true, // Scopes 0, 2, 3 are valid without extra checks (ip is optional) + 1 => self.subnet.is_some(), // Scope 1 requires a subnet + _ => false, // Invalid scope number + } + } + + /// Checks if an IP address is considered private. + /// (Includes loopback, private ranges, link-local, broadcast, documentation, shared CGN) + fn is_private(ip: Ipv4Addr) -> bool { + ip.is_loopback() + || ip.is_private() + || ip.is_link_local() + || ip.is_broadcast() + || ip.is_documentation() + || match ip.octets() { + [100, b, _, _] if b >= 64 && b <= 127 => true, // Shared Address Space (RFC 6598) + _ => false, + } + } + + /// Checks if connecting to a given destination IP is allowed by the filter rules. + pub fn is_allowed_connect(&self, dest_ip: Ipv4Addr) -> bool { + match self.scope { + 0 => false, // Scope 0: Deny all connections + 1 => { + // Scope 1: Group - Allow connection only if dest_ip is within the specified subnet + self.subnet.map_or(false, |subnet| subnet.contains(dest_ip)) + } + 2 => { + // Scope 2: Public - Allow connection only if dest_ip is NOT private + !Self::is_private(dest_ip) + } + 3 => true, // Scope 3: Any - Allow connection to any IP + _ => false, // Invalid scope + } + } + + /// Checks if binding to a given IP is allowed by the filter rules. + pub fn is_allowed_bind(&self, bind_ip: Ipv4Addr) -> bool { + if self.scope == 0 { + return false; // Scope 0: Deny all binding + } + + // Rule: "if ip specified, only the ip can be bound to or listened on." + if let Some(allowed_bind_ip) = self.ip { + return bind_ip == allowed_bind_ip; + } + + // No specific IP specified, check based on scope rules for the bind_ip itself + match self.scope { + // Scope 1: Group - Allow binding within the subnet if no specific IP given + 1 => self.subnet.map_or(false, |subnet| subnet.contains(bind_ip)), + // Scope 2: Public - Allow binding to public IPs if no specific IP given + 2 => !Self::is_private(bind_ip), + // Scope 3: Any - Allow binding to any IP if no specific IP given + 3 => true, + _ => false, // Invalid scope (scope 0 already handled) + } + } +} diff --git a/src/devices/src/virtio/vsock/mod.rs b/src/devices/src/virtio/vsock/mod.rs index 49917c5bf..b3916c472 100644 --- a/src/devices/src/virtio/vsock/mod.rs +++ b/src/devices/src/virtio/vsock/mod.rs @@ -19,6 +19,7 @@ mod tcp; mod timesync; mod udp; mod unix; +mod ip_filter; pub use self::defs::uapi::VIRTIO_ID_VSOCK as TYPE_VSOCK; pub use self::device::Vsock; diff --git a/src/devices/src/virtio/vsock/muxer.rs b/src/devices/src/virtio/vsock/muxer.rs index 6d027dcd0..d44a22db0 100644 --- a/src/devices/src/virtio/vsock/muxer.rs +++ b/src/devices/src/virtio/vsock/muxer.rs @@ -27,6 +27,8 @@ use vm_memory::GuestMemoryMmap; use std::net::Ipv4Addr; +use super::ip_filter::IpFilterConfig; + pub type ProxyMap = Arc>>>>; /// A muxer RX queue item. @@ -112,6 +114,7 @@ pub struct VsockMuxer { proxy_map: ProxyMap, reaper_sender: Option>, unix_ipc_port_map: Option>, + ip_filter: IpFilterConfig, } impl VsockMuxer { @@ -121,7 +124,12 @@ impl VsockMuxer { interrupt_evt: EventFd, interrupt_status: Arc, unix_ipc_port_map: Option>, + ip_filter: IpFilterConfig, ) -> Self { + if !ip_filter.is_valid() { + warn!("Invalid IpFilterConfig provided during VsockMuxer creation: {:?}. Check configuration.", ip_filter); + } + VsockMuxer { cid, host_port_map, @@ -136,6 +144,7 @@ impl VsockMuxer { proxy_map: Arc::new(RwLock::new(HashMap::new())), reaper_sender: None, unix_ipc_port_map, + ip_filter, } } @@ -321,6 +330,15 @@ impl VsockMuxer { fn process_connect(&self, pkt: &VsockPacket) { debug!("vsock: proxy connect request"); if let Some(req) = pkt.read_connect_req() { + if !self.check_destination_ip(req.addr) { + warn!( + "vsock: connect filtered: connection from guest:{}:{} to host:{} denied by IP filter rules", + pkt.src_cid(), pkt.src_port(), req.addr + ); + self.send_connect_rsp(pkt.src_port(), pkt.dst_port(), -libc::ECONNREFUSED); + return; + } + let id = (req.peer_port as u64) << 32 | defs::TSI_PROXY_PORT as u64; debug!("vsock: proxy connect request: id={}", id); let update = self @@ -333,6 +351,9 @@ impl VsockMuxer { if let Some(update) = update { self.process_proxy_update(id, update); } + } else { + warn!("vsock: could not parse connect request buffer for filtering"); + self.send_connect_rsp(pkt.src_port(), pkt.dst_port(), -libc::EINVAL); } } @@ -354,6 +375,17 @@ impl VsockMuxer { fn process_sendto_addr(&self, pkt: &VsockPacket) { debug!("vsock: new DGRAM sendto addr: src={}", pkt.src_port()); if let Some(req) = pkt.read_sendto_addr() { + if !self.check_destination_ip(req.addr) { + warn!( + "vsock: sendto_addr filtered: send from guest:{}:{} to host:{} denied by IP filter rules", + pkt.src_cid(), pkt.src_port(), req.addr + ); + + // Send error response back to the guest + self.send_sendto_addr_error_rsp(pkt.src_port(), -libc::ECONNREFUSED); + return; + } + let id = (req.peer_port as u64) << 32 | defs::TSI_PROXY_PORT as u64; debug!("vsock: new DGRAM sendto addr: id={}", id); let update = self @@ -380,6 +412,15 @@ impl VsockMuxer { fn process_listen_request(&self, pkt: &VsockPacket) { debug!("vsock: DGRAM listen request: src={}", pkt.src_port()); if let Some(req) = pkt.read_listen_req() { + if !self.check_bind_ip(req.addr) { + warn!( + "vsock: listen filtered: attempt to listen on host:{} from guest:{}:{} denied by IP filter rules", + req.addr, pkt.src_cid(), pkt.src_port() + ); + self.send_listen_rsp(pkt.src_port(), pkt.dst_port(), -libc::EACCES); + return; + } + let id = (req.peer_port as u64) << 32 | defs::TSI_PROXY_PORT as u64; debug!("vsock: DGRAM listen request: id={}", id); let update = self @@ -668,4 +709,82 @@ impl VsockMuxer { } Ok(()) } + + #[inline] + fn check_destination_ip(&self, dest_ip: Ipv4Addr) -> bool { + self.ip_filter.is_allowed_connect(dest_ip) + } + + #[inline] + fn check_bind_ip(&self, bind_ip: Ipv4Addr) -> bool { + self.ip_filter.is_allowed_bind(bind_ip) + } + + // Helper function to send different types of responses back to the guest + fn send_response(&self, rx: MuxerRx) { + // Get references to the needed components + let mem = match self.mem.as_ref() { + Some(m) => m, + None => { + error!("vsock: cannot send response: mem is None"); + return; + } + }; + let queue = match self.queue.as_ref() { + Some(q) => q, + None => { + error!("vsock: cannot send response: queue is None"); + return; + } + }; + + // Send the response to the guest + push_packet(self.cid, rx, &self.rxq, queue, mem); + } + + // Helper function for sending sendto_addr error responses + fn send_sendto_addr_error_rsp(&self, peer_port: u32, result: i32) { + debug!( + "vsock: sending sendto_addr error response: peer_port={}, result={}", + peer_port, result + ); + + // This response goes to the control port (DGRAM) + let rx = MuxerRx::ConnResponse { + local_port: defs::TSI_SENDTO_ADDR, + peer_port, + result, + }; + self.send_response(rx); + } + + fn send_connect_rsp(&self, local_port: u32, peer_port: u32, result: i32) { + debug!( + "vsock: sending connect response: local_port={}, peer_port={}, result={}", + local_port, peer_port, result + ); + + // This response goes to the control port (DGRAM) + let rx = MuxerRx::ConnResponse { + local_port: defs::TSI_CONNECT, // TSI_CONNECT = 1025 + peer_port, + result, + }; + self.send_response(rx); + } + + fn send_listen_rsp(&self, local_port: u32, peer_port: u32, result: i32) { + debug!( + "vsock: sending listen response: local_port={}, peer_port={}, result={}", + local_port, peer_port, result + ); + + // This response goes to the control port (DGRAM) + let rx = MuxerRx::ListenResponse { + local_port: defs::TSI_LISTEN, // TSI_LISTEN = 1029 + peer_port, + result, + }; + self.send_response(rx); + } } diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index e12347513..920c40f73 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -21,6 +21,7 @@ env_logger = "0.9.0" libc = ">=0.2.39" log = "0.4.0" once_cell = "1.4.1" +ipnetwork = "0.21" devices = { path = "../devices" } polly = { path = "../polly" } diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 286f7864c..a06ccbd24 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -1,25 +1,7 @@ #[macro_use] extern crate log; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::convert::TryInto; -use std::env; -use std::ffi::CStr; -#[cfg(target_os = "linux")] -use std::ffi::CString; -#[cfg(target_os = "linux")] -use std::os::fd::AsRawFd; -use std::os::fd::RawFd; -use std::path::PathBuf; -use std::slice; -use std::sync::atomic::{AtomicI32, Ordering}; -use std::sync::Mutex; - -#[cfg(target_os = "macos")] use crossbeam_channel::unbounded; -#[cfg(feature = "blk")] -use devices::virtio::block::ImageType; use devices::virtio::fs::FsImplShare; #[cfg(feature = "net")] use devices::virtio::net::device::VirtioNetBackend; @@ -28,11 +10,27 @@ use devices::virtio::CacheType; use env_logger::Env; #[cfg(target_os = "macos")] use hvf::MemoryMapping; +use ipnetwork::Ipv4Network; #[cfg(not(feature = "efi"))] use libc::size_t; use libc::{c_char, c_int}; use once_cell::sync::Lazy; use polly::event_manager::EventManager; +use std::collections::hash_map::Entry; +use std::collections::HashMap; +use std::convert::TryInto; +use std::env; +use std::ffi::CStr; +#[cfg(target_os = "linux")] +use std::ffi::CString; +use std::net::Ipv4Addr; +#[cfg(target_os = "linux")] +use std::os::fd::AsRawFd; +use std::os::fd::RawFd; +use std::path::PathBuf; +use std::slice; +use std::sync::atomic::{AtomicI32, Ordering}; +use std::sync::Mutex; use utils::eventfd::EventFd; use vmm::resources::VmResources; #[cfg(feature = "blk")] @@ -63,6 +61,9 @@ const INIT_PATH: &str = "/init.krun"; #[derive(Default)] struct TsiConfig { port_map: Option>, + ip: Option, + subnet: Option, + scope: u8, } enum NetworkConfig { @@ -833,6 +834,77 @@ pub unsafe extern "C" fn krun_set_port_map(ctx_id: u32, c_port_map: *const *cons KRUN_SUCCESS } +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +pub unsafe extern "C" fn krun_set_tsi_scope( + ctx_id: u32, + c_ip: *const c_char, + c_subnet: *const c_char, + scope: u8, +) -> i32 { + if scope > 3 { + error!("Invalid scope value: {}. Must be 0, 1, 2, or 3.", scope); + return -libc::EINVAL; + } + + let ip = if c_ip.is_null() { + None + } else { + match CStr::from_ptr(c_ip).to_str() { + Ok(s) if !s.is_empty() => { + // Parse IP format directly + match s.parse::() { + Ok(addr) => Some(addr), + Err(_) => { + error!("Invalid IP address format provided: {}", s); + return -libc::EINVAL; + } + } + } + Ok(_) => None, // Treat empty string as None + Err(_) => return -libc::EINVAL, + } + }; + + let subnet = if c_subnet.is_null() { + None + } else { + match CStr::from_ptr(c_subnet).to_str() { + Ok(s) if !s.is_empty() => { + // Parse Subnet format directly + match s.parse::() { + Ok(net) => Some(net), + Err(_) => { + error!("Invalid subnet format provided: {}", s); + return -libc::EINVAL; + } + } + } + Ok(_) => None, // Treat empty string as None + Err(_) => return -libc::EINVAL, + } + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + match &mut cfg.net_cfg { + NetworkConfig::Tsi(tsi_config) => { + tsi_config.ip = ip; + tsi_config.subnet = subnet; + tsi_config.scope = scope; + KRUN_SUCCESS + } + _ => { + error!("krun_set_tsi_scope is only supported for TSI network mode"); + -libc::ENOTSUP + } + } + } + Entry::Vacant(_) => -libc::ENOENT, + } +} + #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_rlimits(ctx_id: u32, c_rlimits: *const *const c_char) -> i32 { @@ -1257,6 +1329,9 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { guest_cid: 3, host_port_map: None, unix_ipc_port_map: None, + ip: None, + subnet: None, + scope: 0, }; if let Some(ref map) = ctx_cfg.unix_ipc_port_map { @@ -1268,6 +1343,9 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { NetworkConfig::Tsi(tsi_cfg) => { vsock_config.host_port_map = tsi_cfg.port_map; vsock_set = true; + vsock_config.ip = tsi_cfg.ip; + vsock_config.subnet = tsi_cfg.subnet; + vsock_config.scope = tsi_cfg.scope; } NetworkConfig::VirtioNetPasst(_fd) => { #[cfg(feature = "net")] diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 25ed38d72..a44ff4fb5 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -36,6 +36,7 @@ serde_json = { version = "1.0.64", optional = true } sev = { version = "4.0.0", features = ["openssl"], optional = true } curl = { version = "0.4", optional = true } nix = "0.24.1" +ipnetwork = "0.21" [target.'cfg(target_arch = "x86_64")'.dependencies] cpuid = { path = "../cpuid" } diff --git a/src/vmm/src/vmm_config/vsock.rs b/src/vmm/src/vmm_config/vsock.rs index 5aafe8582..8b909475f 100644 --- a/src/vmm/src/vmm_config/vsock.rs +++ b/src/vmm/src/vmm_config/vsock.rs @@ -3,9 +3,12 @@ use std::collections::HashMap; use std::fmt; +use std::net::Ipv4Addr; use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use ipnetwork::Ipv4Network; + use devices::virtio::{Vsock, VsockError}; type MutexVsock = Arc>; @@ -30,7 +33,7 @@ type Result = std::result::Result; /// This struct represents the strongly typed equivalent of the json body /// from vsock related requests. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub struct VsockDeviceConfig { /// ID of the vsock device. pub vsock_id: String, @@ -40,6 +43,12 @@ pub struct VsockDeviceConfig { pub host_port_map: Option>, /// An optional map of guest port to host UNIX domain sockets for IPC. pub unix_ipc_port_map: Option>, + /// Optional static IP address for TSI. + pub ip: Option, + /// Optional subnet for TSI. + pub subnet: Option, + /// Scope for TSI (0-3). + pub scope: u8, } struct VsockWrapper { @@ -78,6 +87,9 @@ impl VsockBuilder { u64::from(cfg.guest_cid), cfg.host_port_map, cfg.unix_ipc_port_map, + cfg.ip, + cfg.subnet, + cfg.scope, ) .map_err(VsockConfigError::CreateVsockDevice) } @@ -115,6 +127,9 @@ pub(crate) mod tests { guest_cid: 3, host_port_map: None, unix_ipc_port_map: None, + ip: None, + subnet: None, + scope: 0, } } From 0d8bff41cac66ae0fee39cd698572cbe4ca93bd7 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Tue, 1 Apr 2025 22:31:50 +0100 Subject: [PATCH 06/14] feat(vsock): make subnet optional for Group scope IP filtering (#10) When using scope 1 (Group) for IP filtering, subnet specification is now optional. If no subnet is provided, all connections will be blocked, matching scope 0 behavior. This provides more flexibility in network configuration while maintaining security. - Update IpFilterConfig to make subnet optional for scope 1 - Modify is_valid() to accept scope 1 without subnet - Update documentation in libkrun.h to clarify scope behaviors - Improve warning message specificity in VsockMuxer creation --- include/libkrun.h | 7 ++++++- src/devices/src/virtio/vsock/ip_filter.rs | 14 ++++++++------ src/devices/src/virtio/vsock/muxer.rs | 2 +- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/include/libkrun.h b/include/libkrun.h index a03745183..ffc1c45e7 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -325,7 +325,12 @@ int32_t krun_set_port_map(uint32_t ctx_id, const char *const port_map[]); * "ctx_id" - the configuration context ID. * "c_ip" - an optional null-terminated string representing the guest's static IPv4 address. * "c_subnet" - an optional null-terminated string representing the guest's subnet in CIDR notation (e.g., "192.168.1.0/24"). - * "scope" - an integer specifying the scope (0-3). Refer to TSI documentation for details. + * If scope is 1 and subnet is not provided, all connections will be blocked. + * "scope" - an integer specifying the scope (0-3): + * 0: None - Block all IP communication + * 1: Group - Allow within subnet (if specified; otherwise, block all like scope 0) + * 2: Public - Allow public IPs + * 3: Any - Allow any IP * * Returns: * Zero on success or a negative error number on failure. diff --git a/src/devices/src/virtio/vsock/ip_filter.rs b/src/devices/src/virtio/vsock/ip_filter.rs index 6eed6382c..4fb81fe18 100644 --- a/src/devices/src/virtio/vsock/ip_filter.rs +++ b/src/devices/src/virtio/vsock/ip_filter.rs @@ -10,7 +10,7 @@ use std::net::Ipv4Addr; pub struct IpFilterConfig { /// Defines the scope of allowed connections/bindings. /// 0: None (Block all IP communication) - /// 1: Group (Allow within `subnet`, bind only to `ip` if specified) + /// 1: Group (Allow within `subnet` if specified, otherwise behaves like scope 0) /// 2: Public (Allow public IPs, bind only to `ip` if specified) /// 3: Any (Allow any IP, bind only to `ip` if specified) pub scope: u8, @@ -19,7 +19,8 @@ pub struct IpFilterConfig { /// (ignored if scope is 0). pub ip: Option, - /// The allowed subnet for Scope 1 (Group). Required if scope is 1. + /// The allowed subnet for Scope 1 (Group). Optional - if not provided when scope is 1, + /// all connections will be blocked (same as scope 0). pub subnet: Option, } @@ -31,9 +32,8 @@ impl IpFilterConfig { /// Checks if the configuration is logically valid. pub fn is_valid(&self) -> bool { match self.scope { - 0 | 2 | 3 => true, // Scopes 0, 2, 3 are valid without extra checks (ip is optional) - 1 => self.subnet.is_some(), // Scope 1 requires a subnet - _ => false, // Invalid scope number + 0 | 1 | 2 | 3 => true, // All valid scopes (subnet is optional for scope 1) + _ => false, // Invalid scope number } } @@ -57,13 +57,14 @@ impl IpFilterConfig { 0 => false, // Scope 0: Deny all connections 1 => { // Scope 1: Group - Allow connection only if dest_ip is within the specified subnet + // If no subnet is specified, behaves like scope 0 (deny all) self.subnet.map_or(false, |subnet| subnet.contains(dest_ip)) } 2 => { // Scope 2: Public - Allow connection only if dest_ip is NOT private !Self::is_private(dest_ip) } - 3 => true, // Scope 3: Any - Allow connection to any IP + 3 => true, // Scope 3: Any - Allow connection to any IP _ => false, // Invalid scope } } @@ -82,6 +83,7 @@ impl IpFilterConfig { // No specific IP specified, check based on scope rules for the bind_ip itself match self.scope { // Scope 1: Group - Allow binding within the subnet if no specific IP given + // If no subnet is specified, behaves like scope 0 (deny all) 1 => self.subnet.map_or(false, |subnet| subnet.contains(bind_ip)), // Scope 2: Public - Allow binding to public IPs if no specific IP given 2 => !Self::is_private(bind_ip), diff --git a/src/devices/src/virtio/vsock/muxer.rs b/src/devices/src/virtio/vsock/muxer.rs index d44a22db0..75e947bd8 100644 --- a/src/devices/src/virtio/vsock/muxer.rs +++ b/src/devices/src/virtio/vsock/muxer.rs @@ -127,7 +127,7 @@ impl VsockMuxer { ip_filter: IpFilterConfig, ) -> Self { if !ip_filter.is_valid() { - warn!("Invalid IpFilterConfig provided during VsockMuxer creation: {:?}. Check configuration.", ip_filter); + warn!("Invalid IpFilterConfig provided during VsockMuxer creation: {:?}. Scope value must be between 0 and 3.", ip_filter); } VsockMuxer { From 0a37c827ec2a349ca720ec8fbf18487888de824f Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Fri, 11 Apr 2025 08:43:50 +0100 Subject: [PATCH 07/14] feat(overlayfs): complete Linux overlayfs implementation (#11) * feat(overlayfs): complete Linux overlayfs implementation Move overlayfs implementation to Linux-specific directory and add comprehensive Linux filesystem support. This change introduces: Core Features: - Implement path traversal with lookup_layer_by_layer and lookup_segment_by_segment - Add copy_up functionality for promoting files from lower to upper layers - Support FICLONE ioctl for efficient Copy-on-Write operations - Implement Linux-specific file operations (fallocate, lseek, copyfilerange) - Add memory mapping support via setupmapping and removemapping Linux Integration: - Handle Linux-specific file descriptors and system calls - Support Linux xattr operations and mount point management - Implement proper device ID handling for Linux filesystems - Add CoW optimizations using Linux-specific filesystem features * chore: cleanup workspace and test configuration - Remove ignore/debug from workspace members - Add #[cfg(test)] annotation to overlayfs test helper module - Remove redundant newline in devices/Cargo.toml * fix(overlayfs): fix linux overlayfs for tests --- src/devices/Cargo.toml | 1 - src/devices/src/virtio/fs/device.rs | 2 +- src/devices/src/virtio/fs/filesystem.rs | 1 + src/devices/src/virtio/fs/kinds.rs | 12 +- src/devices/src/virtio/fs/linux/mod.rs | 1 + src/devices/src/virtio/fs/linux/overlayfs.rs | 3399 +++++++++++++++++ .../macos/{overlayfs/fs.rs => overlayfs.rs} | 89 +- .../src/virtio/fs/macos/overlayfs/mod.rs | 5 - src/devices/src/virtio/fs/mod.rs | 2 + src/devices/src/virtio/fs/multikey.rs | 4 +- .../tests => tests/overlayfs}/create.rs | 0 .../tests => tests/overlayfs}/lookup.rs | 5 +- .../tests => tests/overlayfs}/metadata.rs | 23 +- .../tests => tests/overlayfs}/misc.rs | 26 +- .../tests.rs => tests/overlayfs/mod.rs} | 23 +- .../tests => tests/overlayfs}/open.rs | 0 .../tests => tests/overlayfs}/read.rs | 31 +- .../tests => tests/overlayfs}/remove.rs | 48 +- .../tests => tests/overlayfs}/write.rs | 2 +- 19 files changed, 3578 insertions(+), 96 deletions(-) create mode 100644 src/devices/src/virtio/fs/linux/overlayfs.rs rename src/devices/src/virtio/fs/macos/{overlayfs/fs.rs => overlayfs.rs} (97%) delete mode 100644 src/devices/src/virtio/fs/macos/overlayfs/mod.rs rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/create.rs (100%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/lookup.rs (99%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/metadata.rs (98%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/misc.rs (95%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests.rs => tests/overlayfs/mod.rs} (89%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/open.rs (100%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/read.rs (98%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/remove.rs (93%) rename src/devices/src/virtio/fs/{macos/overlayfs/tests => tests/overlayfs}/write.rs (99%) diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 6342082ab..7d179c3f8 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -49,6 +49,5 @@ lru = ">=0.9" rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" - [dev-dependencies] tempfile = "3.17.1" diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index faa4b5b19..53d87a0fb 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -16,7 +16,7 @@ use super::super::{ ActivateResult, DeviceState, FsError, Queue as VirtQueue, VirtioDevice, VirtioShmRegion, }; use super::kinds::{FsImplConfig, FsImplShare}; -use super::macos::overlayfs; +use super::overlayfs; use super::passthrough; use super::worker::FsWorker; use super::ExportTable; diff --git a/src/devices/src/virtio/fs/filesystem.rs b/src/devices/src/virtio/fs/filesystem.rs index ed1284d8b..b55346cbf 100644 --- a/src/devices/src/virtio/fs/filesystem.rs +++ b/src/devices/src/virtio/fs/filesystem.rs @@ -25,6 +25,7 @@ pub use fuse::RemovemappingOne; pub use fuse::SetattrValid; /// Information about a path in the filesystem. +#[derive(Debug)] pub struct Entry { /// An `Inode` that uniquely identifies this path. During `lookup`, setting this to `0` means a /// negative entry. Returning `ENOENT` also means a negative entry but setting this to `0` diff --git a/src/devices/src/virtio/fs/kinds.rs b/src/devices/src/virtio/fs/kinds.rs index 901cd721d..d76da42e2 100644 --- a/src/devices/src/virtio/fs/kinds.rs +++ b/src/devices/src/virtio/fs/kinds.rs @@ -1,6 +1,10 @@ + + use std::{ffi::CStr, io, path::PathBuf, time::Duration}; +#[cfg(target_os = "macos")] use crossbeam_channel::Sender; +#[cfg(target_os = "macos")] use hvf::MemoryMapping; use crate::virtio::bindings; @@ -546,7 +550,7 @@ impl FileSystem for FsImpl { moffset, host_shm_base, shm_size, - map_sender, + #[cfg(target_os = "macos")] map_sender, ), FsImpl::Overlayfs(fs) => fs.setupmapping( ctx, @@ -558,7 +562,7 @@ impl FileSystem for FsImpl { moffset, host_shm_base, shm_size, - map_sender, + #[cfg(target_os = "macos")] map_sender, ), } } @@ -573,10 +577,10 @@ impl FileSystem for FsImpl { ) -> io::Result<()> { match self { FsImpl::Passthrough(fs) => { - fs.removemapping(ctx, requests, host_shm_base, shm_size, map_sender) + fs.removemapping(ctx, requests, host_shm_base, shm_size, #[cfg(target_os = "macos")] map_sender) } FsImpl::Overlayfs(fs) => { - fs.removemapping(ctx, requests, host_shm_base, shm_size, map_sender) + fs.removemapping(ctx, requests, host_shm_base, shm_size, #[cfg(target_os = "macos")] map_sender) } } } diff --git a/src/devices/src/virtio/fs/linux/mod.rs b/src/devices/src/virtio/fs/linux/mod.rs index b8edbc7f9..94cbd09d2 100644 --- a/src/devices/src/virtio/fs/linux/mod.rs +++ b/src/devices/src/virtio/fs/linux/mod.rs @@ -1,2 +1,3 @@ pub mod fs_utils; pub mod passthrough; +pub mod overlayfs; diff --git a/src/devices/src/virtio/fs/linux/overlayfs.rs b/src/devices/src/virtio/fs/linux/overlayfs.rs new file mode 100644 index 000000000..19b6b0c83 --- /dev/null +++ b/src/devices/src/virtio/fs/linux/overlayfs.rs @@ -0,0 +1,3399 @@ +use std::{ + collections::{btree_map, BTreeMap, HashSet}, + ffi::{CStr, CString}, + fs::File, + io, + mem::{self, MaybeUninit}, + os::{ + fd::{AsRawFd, FromRawFd, RawFd}, + unix::{ffi::OsStrExt, fs::MetadataExt}, + }, + path::PathBuf, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, LazyLock, RwLock, + }, + time::Duration, +}; + +use caps::{has_cap, CapSet, Capability}; +use intaglio::{cstr::SymbolTable, Symbol}; +use nix::request_code_read; + +use crate::virtio::{ + bindings, + fs::{ + filesystem::{ + self, Context, DirEntry, Entry, ExportTable, Extensions, FileSystem, FsOptions, + GetxattrReply, ListxattrReply, OpenOptions, SetattrValid, ZeroCopyReader, + ZeroCopyWriter, + }, + fuse, + multikey::MultikeyBTreeMap, + }, +}; + +//-------------------------------------------------------------------------------------------------- +// Modules +//-------------------------------------------------------------------------------------------------- + +#[path = "../tests/overlayfs/mod.rs"] +#[cfg(test)] +mod tests; + +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +/// The prefix for whiteout files +const WHITEOUT_PREFIX: &str = ".wh."; + +/// The marker for opaque directories +const OPAQUE_MARKER: &str = ".wh..wh..opq"; + +/// Maximum allowed number of layers for the overlay filesystem. +const MAX_LAYERS: usize = 128; + +#[cfg(not(feature = "efi"))] +static INIT_BINARY: &[u8] = include_bytes!("../../../../../../init/init"); + +/// The name of the init binary +const INIT_CSTR: &[u8] = b"init.krun\0"; + +/// The name of the empty directory +const EMPTY_CSTR: LazyLock<&CStr> = + LazyLock::new(|| unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }); + +/// The name of the `/proc/self/fd` directory +const PROC_SELF_FD_CSTR: LazyLock<&CStr> = + LazyLock::new(|| unsafe { CStr::from_bytes_with_nul_unchecked(b"/proc/self/fd\0") }); + +/// FICLONE ioctl for copy-on-write file cloning +/// Defined in Linux's fs.h as _IOW(0x94, 9, int) +const FICLONE: u64 = (0x94 << 8) | 9 | (std::mem::size_of::() as u64) << 16 | 1 << 30; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// Type alias for inode identifiers +type Inode = u64; + +/// Type alias for file handle identifiers +type Handle = u64; + +/// Alternative key for looking up inodes by device and inode number +#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] +struct InodeAltKey { + /// The inode number from the host filesystem + ino: libc::ino64_t, + + /// The device ID from the host filesystem + dev: libc::dev_t, + + /// The mount ID from the host filesystem + mnt_id: u64, +} + +/// Data associated with an inode +#[derive(Debug)] +pub(crate) struct InodeData { + /// The inode number in the overlay filesystem + pub(crate) inode: Inode, + + /// The file handle for the inode + pub(crate) file: File, + + /// The device ID from the host filesystem + pub(crate) dev: libc::dev_t, + + /// The mount ID from the host filesystem + pub(crate) mnt_id: u64, + + /// Reference count for this inode from the perspective of [`FileSystem::lookup`] + pub(crate) refcount: AtomicU64, + + /// Path to inode + pub(crate) path: Vec, + + /// The layer index this inode belongs to + pub(crate) layer_idx: usize, +} + +/// Data associated with an open file handle +#[derive(Debug)] +pub(crate) struct HandleData { + /// The inode this handle refers to + inode: Inode, + + /// The underlying file object + file: RwLock, + + /// Whether the file handle is exported + exported: AtomicBool, +} + +pub(crate) struct ScopedGid; + +pub(crate) struct ScopedUid; + +/// The caching policy that the file system should report to the FUSE client. By default the FUSE +/// protocol uses close-to-open consistency. This means that any cached contents of the file are +/// invalidated the next time that file is opened. +#[derive(Default, Debug, Clone)] +pub enum CachePolicy { + /// The client should never cache file data and all I/O should be directly forwarded to the + /// server. This policy must be selected when file contents may change without the knowledge of + /// the FUSE client (i.e., the file system does not have exclusive access to the directory). + Never, + + /// The client is free to choose when and how to cache file data. This is the default policy and + /// uses close-to-open consistency as described in the enum documentation. + #[default] + Auto, + + /// The client should always cache file data. This means that the FUSE client will not + /// invalidate any cached data that was returned by the file system the last time the file was + /// opened. This policy should only be selected when the file system has exclusive access to the + /// directory. + Always, +} + +/// Configuration options that control the behavior of the file system. +#[derive(Debug, Clone)] +pub struct Config { + /// How long the FUSE client should consider directory entries to be valid. If the contents of a + /// directory can only be modified by the FUSE client (i.e., the file system has exclusive + /// access), then this should be a large value. + /// + /// The default value for this option is 5 seconds. + pub entry_timeout: Duration, + + /// How long the FUSE client should consider file and directory attributes to be valid. If the + /// attributes of a file or directory can only be modified by the FUSE client (i.e., the file + /// system has exclusive access), then this should be set to a large value. + /// + /// The default value for this option is 5 seconds. + pub attr_timeout: Duration, + + /// The caching policy the file system should use. See the documentation of `CachePolicy` for + /// more details. + pub cache_policy: CachePolicy, + + /// Whether the file system should enabled writeback caching. This can improve performance as it + /// allows the FUSE client to cache and coalesce multiple writes before sending them to the file + /// system. However, enabling this option can increase the risk of data corruption if the file + /// contents can change without the knowledge of the FUSE client (i.e., the server does **NOT** + /// have exclusive access). Additionally, the file system should have read access to all files + /// in the directory it is serving as the FUSE client may send read requests even for files + /// opened with `O_WRONLY`. + /// + /// Therefore callers should only enable this option when they can guarantee that: 1) the file + /// system has exclusive access to the directory and 2) the file system has read permissions for + /// all files in that directory. + /// + /// The default value for this option is `false`. + pub writeback: bool, + + /// The path of the root directory. + /// + /// The default is `/`. + pub root_dir: String, + + /// Whether the file system should support Extended Attributes (xattr). Enabling this feature may + /// have a significant impact on performance, especially on write parallelism. This is the result + /// of FUSE attempting to remove the special file privileges after each write request. + /// + /// The default value for this options is `false`. + pub xattr: bool, + + /// Optional file descriptor for /proc/self/fd. Callers can obtain a file descriptor and pass it + /// here, so there's no need to open it in the filesystem implementation. This is specially useful + /// for sandboxing. + /// + /// The default is `None`. + pub proc_sfd_rawfd: Option, + + /// ID of this filesystem to uniquely identify exports. + pub export_fsid: u64, + + /// Table of exported FDs to share with other subsystems. + pub export_table: Option, + + /// Layers to be used for the overlay filesystem + pub layers: Vec, +} + +/// An overlay filesystem implementation that combines multiple layers into a single logical filesystem. +/// +/// This implementation follows standard overlay filesystem concepts, similar to Linux's OverlayFS, +/// while using OCI image specification's layer filesystem changeset format for whiteouts: +/// +/// - Uses OCI-style whiteout files (`.wh.` prefixed files) to mark deleted files in upper layers +/// - Uses OCI-style opaque directory markers (`.wh..wh..opq`) to mask lower layer directories +/// +/// ## Layer Structure +/// +/// The overlay filesystem consists of: +/// - A single top layer (upperdir) that is writable +/// - Zero or more lower layers that are read-only +/// +/// ## Layer Ordering +/// +/// When creating an overlay filesystem, layers are provided in order from lowest to highest: +/// The last layer in the provided sequence becomes the top layer (upperdir), while +/// the others become read-only lower layers. This matches the OCI specification where: +/// - The top layer (upperdir) handles all modifications +/// - Lower layers provide the base content +/// - Changes in the top layer shadow content in lower layers +/// +/// ## Layer Behavior +/// +/// - All write operations occur in the top layer +/// - When reading, the top layer takes precedence over lower layers +/// - Whiteout files in the top layer hide files from lower layers +/// - Opaque directory markers completely mask lower layer directory contents +/// - It is undefined behavior for whiteouts and their corresponding entries to exist at the same level in the same directory. +/// For example, looking up such entry can result in different behavior depending on which is found first. +/// The filesystem will try to prevent adding whiteout entries directly. +/// +/// TODO: Need to implement entry caching to improve the performance of [`Self::lookup_segment_by_segment`]. +pub struct OverlayFs { + /// Map of inodes by ID and alternative keys. The alternative keys allow looking up inodes by their + /// underlying host filesystem inode number, device ID and mount ID. + inodes: RwLock>>, + + /// Counter for generating the next inode ID. Each new inode gets a unique ID from this counter. + next_inode: AtomicU64, + + /// The initial inode ID (typically 1 for the root directory) + init_inode: u64, + + /// Map of open file handles by ID. Each open file gets a unique handle ID that maps to the + /// underlying file descriptor and associated data. + handles: RwLock>>, + + /// Counter for generating the next handle ID. Each new file handle gets a unique ID from this counter. + next_handle: AtomicU64, + + /// The initial handle ID + init_handle: u64, + + /// File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from + /// `inodes` into one that can go into `handles`. This is accomplished by reading the + /// `/proc/self/fd/{}` symlink. + proc_self_fd: File, + + /// Whether writeback caching is enabled for this directory. This will only be true when + /// `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`. + writeback: AtomicBool, + + /// Whether to announce submounts. When true, the filesystem will report when directories are + /// mount points for other filesystems. + announce_submounts: AtomicBool, + + /// The UID of the process if it doesn't have CAP_SETUID capability, None otherwise. + /// Used to restrict UID changes to privileged processes. + my_uid: Option, + + /// The GID of the process if it doesn't have CAP_SETGID capability, None otherwise. + /// Used to restrict GID changes to privileged processes. + my_gid: Option, + + /// Whether the process has CAP_FOWNER capability. + cap_fowner: bool, + + /// Configuration options for the filesystem + config: Config, + + /// Symbol table for interned filenames to efficiently store and compare path components + filenames: Arc>, + + /// Root inodes for each layer, ordered from bottom to top. The last element is the upperdir + /// (writable layer) while all others are read-only lower layers. + layer_roots: Arc>>, +} + +/// Represents either a file or a path +enum FileOrPath { + /// A file + File(File), + + /// A path + Path(CString), +} + +/// Represents either a file descriptor or a path +enum FileId { + /// A file descriptor + Fd(RawFd), + + /// A path + Path(CString), +} + +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- + +impl ScopedGid { + fn new(gid: libc::gid_t) -> io::Result { + let res = unsafe { libc::syscall(libc::SYS_setresgid, -1, gid, -1) }; + if res != 0 { + return Err(io::Error::last_os_error()); + } + + Ok(Self {}) + } +} + +impl ScopedUid { + fn new(uid: libc::uid_t) -> io::Result { + let res = unsafe { libc::syscall(libc::SYS_setresuid, -1, uid, -1) }; + if res != 0 { + return Err(io::Error::last_os_error()); + } + + Ok(Self {}) + } +} + +impl InodeAltKey { + fn new(ino: libc::ino64_t, dev: libc::dev_t, mnt_id: u64) -> Self { + Self { ino, dev, mnt_id } + } +} + +impl OverlayFs { + /// Creates a new OverlayFs with the given layers + pub fn new(config: Config) -> io::Result { + if config.layers.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "at least one layer must be provided", + )); + } + + if config.layers.len() > MAX_LAYERS { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "maximum overlayfs layer count exceeded", + )); + } + + let mut next_inode = 1; + let mut inodes = MultikeyBTreeMap::new(); + + // Initialize the root inodes for all layers + let layer_roots = Self::init_root_inodes(&config.layers, &mut inodes, &mut next_inode)?; + + // Set the `init.krun` inode + let init_inode = next_inode; + next_inode += 1; + + // Get the file descriptor for /proc/self/fd + let proc_self_fd = if let Some(fd) = config.proc_sfd_rawfd { + fd + } else { + // Safe because this doesn't modify any memory and we check the return value. + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + PROC_SELF_FD_CSTR.as_ptr(), + libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + fd + }; + + // Get the UID of the process + let my_uid = if has_cap(None, CapSet::Effective, Capability::CAP_SETUID).unwrap_or_default() + { + None + } else { + // SAFETY: This syscall is always safe to call and always succeeds. + Some(unsafe { libc::getuid() }) + }; + + // Get the GID of the process + let my_gid = if has_cap(None, CapSet::Effective, Capability::CAP_SETGID).unwrap_or_default() + { + None + } else { + // SAFETY: This syscall is always safe to call and always succeeds. + Some(unsafe { libc::getgid() }) + }; + + let cap_fowner = + has_cap(None, CapSet::Effective, Capability::CAP_FOWNER).unwrap_or_default(); + + // SAFETY: We just opened this fd or it was provided by our caller. + let proc_self_fd = unsafe { File::from_raw_fd(proc_self_fd) }; + + Ok(OverlayFs { + inodes: RwLock::new(inodes), + next_inode: AtomicU64::new(next_inode), + init_inode, + handles: RwLock::new(BTreeMap::new()), + next_handle: AtomicU64::new(1), + init_handle: 0, + proc_self_fd, + writeback: AtomicBool::new(false), + announce_submounts: AtomicBool::new(false), + my_uid, + my_gid, + cap_fowner, + config, + filenames: Arc::new(RwLock::new(SymbolTable::new())), + layer_roots: Arc::new(RwLock::new(layer_roots)), + }) + } + + /// Initialize root inodes for all layers + /// + /// This function processes layers from top to bottom, creating root inodes for each layer. + /// + /// Parameters: + /// - layers: Slice of paths to the layer roots, ordered from bottom to top + /// - inodes: Mutable reference to the inodes map to populate + /// - next_inode: Mutable reference to the next inode counter + /// + /// Returns: + /// - io::Result> containing the root inodes for each layer + fn init_root_inodes( + layers: &[PathBuf], + inodes: &mut MultikeyBTreeMap>, + next_inode: &mut u64, + ) -> io::Result> { + // Pre-allocate layer_roots with the right size + let mut layer_roots = vec![0; layers.len()]; + + // Process layers from top to bottom + for (i, layer_path) in layers.iter().enumerate().rev() { + let layer_idx = i; // Layer index from bottom to top + + // Get the stat information for this layer's root + let c_path = CString::new(layer_path.to_string_lossy().as_bytes())?; + + // Open the directory + let file = Self::open_path_file(&c_path)?; + + // Get statx information + let (st, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + // Create the alt key for this inode + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev, mnt_id); + + // Create the inode data + let inode_id = *next_inode; + *next_inode += 1; + + let inode_data = Arc::new(InodeData { + inode: inode_id, + file, + dev: st.st_dev, + mnt_id, + refcount: AtomicU64::new(1), + path: vec![], + layer_idx, + }); + + // Insert the inode into the map + inodes.insert(inode_id, alt_key, inode_data); + + // Store the root inode for this layer + layer_roots[layer_idx] = inode_id; + } + + Ok(layer_roots) + } + + /// Opens a file without following symlinks. + fn open_file(path: &CStr, flags: i32) -> io::Result { + let fd = unsafe { libc::open(path.as_ptr(), flags | libc::O_NOFOLLOW, 0) }; + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Opens a file relative to a parent without following symlinks. + fn open_file_at(parent: RawFd, name: &CStr, flags: i32) -> io::Result { + let fd = unsafe { libc::openat(parent, name.as_ptr(), flags | libc::O_NOFOLLOW, 0) }; + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Opens a path as an O_PATH file. + fn open_path_file(path: &CStr) -> io::Result { + Self::open_file(path, libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC) + } + + /// Opens a path relative to a parent as an O_PATH file. + fn open_path_file_at(parent: RawFd, name: &CStr) -> io::Result { + Self::open_file_at( + parent, + name, + libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, + ) + } + + /// Performs a statx syscall without any modifications to the returned stat structure. + fn statx(fd: RawFd, name: Option<&CStr>) -> io::Result<(libc::stat64, u64)> { + let mut stx = MaybeUninit::::zeroed(); + let res = unsafe { + libc::statx( + fd, + name.unwrap_or(&*EMPTY_CSTR).as_ptr(), + libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, + libc::STATX_BASIC_STATS | libc::STATX_MNT_ID, + stx.as_mut_ptr(), + ) + }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because the kernel guarantees that the struct is now fully initialized. + let stx = unsafe { stx.assume_init() }; + + // Unfortunately, we cannot use an initializer to create the stat64 object, + // because it may contain padding and reserved fields (depending on the + // architecture), and it does not implement the Default trait. + // So we take a zeroed struct and set what we can. (Zero in all fields is + // wrong, but safe.) + let mut st = unsafe { MaybeUninit::::zeroed().assume_init() }; + + st.st_dev = libc::makedev(stx.stx_dev_major, stx.stx_dev_minor); + st.st_ino = stx.stx_ino; + st.st_mode = stx.stx_mode as _; + st.st_nlink = stx.stx_nlink as _; + st.st_uid = stx.stx_uid; + st.st_gid = stx.stx_gid; + st.st_rdev = libc::makedev(stx.stx_rdev_major, stx.stx_rdev_minor); + st.st_size = stx.stx_size as _; + st.st_blksize = stx.stx_blksize as _; + st.st_blocks = stx.stx_blocks as _; + st.st_atime = stx.stx_atime.tv_sec; + st.st_atime_nsec = stx.stx_atime.tv_nsec as _; + st.st_mtime = stx.stx_mtime.tv_sec; + st.st_mtime_nsec = stx.stx_mtime.tv_nsec as _; + st.st_ctime = stx.stx_ctime.tv_sec; + st.st_ctime_nsec = stx.stx_ctime.tv_nsec as _; + + Ok((st, stx.stx_mnt_id)) + } + + /// Turns an inode data into a file descriptor string. + fn data_to_fd_str(data: &InodeData) -> io::Result { + let fd = format!("{}", data.file.as_raw_fd()); + CString::new(fd).map_err(|_| einval()) + } + + /// Turns an inode data into a path. + fn data_to_path(data: &InodeData) -> io::Result { + let path = format!("/proc/self/fd/{}", data.file.as_raw_fd()); + CString::new(path).map_err(|_| einval()) + } + + /// Turns an inode into an opened file. + fn open_inode(&self, inode: Inode, mut flags: i32) -> io::Result { + let data = self.get_inode_data(inode)?; + let fd_str = Self::data_to_fd_str(&data)?; + + // When writeback caching is enabled, the kernel may send read requests even if the + // userspace program opened the file write-only. So we need to ensure that we have opened + // the file for reading as well as writing. + let writeback = self.writeback.load(Ordering::Relaxed); + if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY { + flags &= !libc::O_ACCMODE; + flags |= libc::O_RDWR; + } + + // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`. + // However, this breaks atomicity as the file may have changed on disk, invalidating the + // cached copy of the data in the kernel and the offset that the kernel thinks is the end of + // the file. Just allow this for now as it is the user's responsibility to enable writeback + // caching only for directories that are not shared. It also means that we need to clear the + // `O_APPEND` flag. + if writeback && flags & libc::O_APPEND != 0 { + flags &= !libc::O_APPEND; + } + + // If the file is a symlink, just clone existing file. + if data.file.metadata()?.is_symlink() { + return Ok(data.file.try_clone()?); + } + + // Safe because this doesn't modify any memory and we check the return value. We don't + // really check `flags` because if the kernel can't handle poorly specified flags then we + // have much bigger problems. + // + // It is safe to follow here since symlinks are returned early as O_PATH files. + let fd = unsafe { + libc::openat( + self.proc_self_fd.as_raw_fd(), + fd_str.as_ptr(), + flags | libc::O_CLOEXEC & (!libc::O_NOFOLLOW), + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Turns an inode into an opened file or a path. + fn open_inode_or_path(&self, inode: Inode, flags: i32) -> io::Result { + match self.open_inode(inode, flags) { + Ok(file) => Ok(FileOrPath::File(file)), + Err(e) if e.raw_os_error() == Some(libc::ELOOP) => { + let data = self.get_inode_data(inode)?; + let path = Self::data_to_path(&data)?; + Ok(FileOrPath::Path(path)) + } + Err(e) => Err(e), + } + } + + pub fn get_config(&self) -> &Config { + &self.config + } + + pub fn get_filenames(&self) -> &Arc> { + &self.filenames + } + + fn get_layer_root(&self, layer_idx: usize) -> io::Result> { + let layer_roots = self.layer_roots.read().unwrap(); + + // Check if the layer index is valid + if layer_idx >= layer_roots.len() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "layer index out of bounds", + )); + } + + // Get the inode for this layer + let inode = layer_roots[layer_idx]; + if inode == 0 { + return Err(io::Error::new(io::ErrorKind::NotFound, "layer not found")); + } + + // Get the inode data + self.get_inode_data(inode) + } + + /// Creates a new inode and adds it to the inode map + fn create_inode( + &self, + file: File, + ino: libc::ino64_t, + dev: libc::dev_t, + mnt_id: u64, + path: Vec, + layer_idx: usize, + ) -> (Inode, Arc) { + let inode = self.next_inode.fetch_add(1, Ordering::SeqCst); + + let data = Arc::new(InodeData { + inode, + file, + dev, + mnt_id, + refcount: AtomicU64::new(1), + path, + layer_idx, + }); + + let alt_key = InodeAltKey::new(ino, dev, mnt_id); + self.inodes + .write() + .unwrap() + .insert(inode, alt_key, data.clone()); + + (inode, data) + } + + /// Creates an Entry from stat information and inode data + fn create_entry(&self, inode: Inode, st: bindings::stat64) -> Entry { + Entry { + inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + } + } + + fn create_whiteout_path(&self, name: &CStr) -> io::Result { + let name_str = name.to_str().map_err(|_| einval())?; + let whiteout_path = format!("{WHITEOUT_PREFIX}{name_str}"); + CString::new(whiteout_path).map_err(|_| einval()) + } + + /// Checks for whiteout file in top layer + fn check_whiteout(&self, parent: RawFd, name: &CStr) -> io::Result { + let whiteout_cpath = self.create_whiteout_path(name)?; + + match Self::statx(parent, Some(&whiteout_cpath)) { + Ok(_) => { + Ok(true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Ok(false) + } + Err(e) => { + Err(e) + } + } + } + + /// Checks for an opaque directory marker in the given parent directory path. + fn check_opaque_marker(&self, parent: RawFd) -> io::Result { + let opaque_cpath = CString::new(OPAQUE_MARKER).map_err(|_| einval())?; + + match Self::statx(parent, Some(&opaque_cpath)) { + Ok(_) => { + Ok(true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Ok(false) + } + Err(e) => { + Err(e) + } + } + } + + /// Interns a name and returns the corresponding Symbol + fn intern_name(&self, name: &CStr) -> io::Result { + // Clone the name to avoid lifetime issues + let name_to_intern = CString::new(name.to_bytes()).map_err(|_| einval())?; + + // Get a write lock to intern it + let mut filenames = self.filenames.write().unwrap(); + filenames.intern(name_to_intern).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to intern filename: {}", e), + ) + }) + } + + /// Gets the InodeData for an inode + pub(super) fn get_inode_data(&self, inode: Inode) -> io::Result> { + self.inodes + .read() + .unwrap() + .get(&inode) + .cloned() + .ok_or_else(ebadf) + } + + /// Gets the HandleData for a handle + pub(super) fn get_inode_handle_data( + &self, + inode: Inode, + handle: Handle, + ) -> io::Result> { + self.handles + .read() + .unwrap() + .get(&handle) + .filter(|hd| hd.inode == inode) + .cloned() + .ok_or_else(ebadf) + } + + fn get_top_layer_idx(&self) -> usize { + self.layer_roots.read().unwrap().len() - 1 + } + + fn bump_refcount(&self, inode: Inode) { + let inodes = self.inodes.write().unwrap(); + let inode_data = inodes.get(&inode).unwrap(); + inode_data.refcount.fetch_add(1, Ordering::SeqCst); + } + + /// Validates a name to prevent path traversal attacks and special overlay markers + /// + /// This function checks if a name contains: + /// - Path traversal sequences like ".." + /// - Other potentially dangerous patterns like slashes + /// - Whiteout markers (.wh. prefix) + /// - Opaque directory markers (.wh..wh..opq) + /// + /// Returns: + /// - Ok(()) if the name is safe + /// - Err(io::Error) if the name contains invalid patterns + fn validate_name(name: &CStr) -> io::Result<()> { + let name_bytes = name.to_bytes(); + + // Check for empty name + if name_bytes.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "empty name is not allowed", + )); + } + + // Check for path traversal sequences + if name_bytes == b".." || name_bytes.contains(&b'/') || name_bytes.contains(&b'\\') { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "path traversal attempt detected", + )); + } + + // Check for null bytes + if name_bytes.contains(&0) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains null bytes", + )); + } + + // Convert to str for string pattern matching + let name_str = match std::str::from_utf8(name_bytes) { + Ok(s) => s, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains invalid UTF-8", + )) + } + }; + + // Check for whiteout prefix + if name_str.starts_with(".wh.") { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot start with whiteout prefix", + )); + } + + // Check for opaque marker + if name_str == ".wh..wh..opq" { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot be an opaque directory marker", + )); + } + + Ok(()) + } + + /// Looks up a path segment by segment in a given layer + /// + /// This function traverses a path one segment at a time within a specific layer, + /// handling whiteouts and opaque markers along the way. + /// + /// ### Arguments + /// * `layer_root` - Root inode data for the layer being searched + /// * `path_segments` - Path components to traverse, as interned symbols + /// * `path_inodes` - Vector to store inode data for each path segment traversed + /// + /// # Return Value + /// Returns `Option>` where: + /// - `Some(Ok(stat))` - Successfully found the file/directory and retrieved its stats + /// - `Some(Err(e))` - Encountered an error during lookup that should be propagated: + /// - If error is `NotFound`, caller should try next layer + /// - For any other IO error, caller should stop searching entirely + /// - `None` - Stop searching lower layers because either: + /// - Found a whiteout file for this path (file was deleted in this layer) + /// - Found an opaque directory marker (directory contents are masked in this layer) + /// + /// # Example Return Flow + /// 1. If path exists: `Some(Ok(stat))` + /// 2. If path has whiteout: `None` + /// 3. If path not found: `Some(Err(NotFound))` + /// 4. If directory has opaque marker: `None` + /// 5. If IO error occurs: `Some(Err(io_error))` + /// + /// # Side Effects + /// - Creates inodes for each path segment if they don't already exist + /// - Updates path_inodes with inode data for each segment traversed + /// - Increments reference counts for existing inodes that are reused + /// + /// # Path Resolution + /// For a path like "foo/bar/baz", the function: + /// 1. Starts at layer_root + /// 2. Looks up "foo", checking for whiteouts/opaque markers + /// 3. If "foo" exists, creates/reuses its inode and adds to path_inodes + /// 4. Repeats for "bar" and "baz" + /// 5. Returns stats for "baz" if found + fn lookup_segment_by_segment( + &self, + layer_root: &Arc, + path_segments: &[Symbol], + path_inodes: &mut Vec>, + ) -> Option> { + let mut opaque_marker_found = false; + + // Start from layer root + let root_file = match layer_root.file.try_clone() { + Ok(file) => file, + Err(e) => { + return Some(Err(e)); + } + }; + + // Set current. + let mut current = match Self::statx(root_file.as_raw_fd(), None) { + Ok((stat, mnt_id)) => (root_file, stat, mnt_id), + Err(e) => return Some(Err(e)), + }; + + // Traverse each path segment + for (depth, segment) in path_segments.iter().enumerate() { + // Get the current segment name and parent vol path + let filenames = self.filenames.read().unwrap(); + let segment_name = filenames.get(*segment).unwrap(); + + // Check for whiteout at current level + match self.check_whiteout(current.0.as_raw_fd(), segment_name) { + Ok(true) => { + return None; // Found whiteout, stop searching + } + Ok(false) => (), // No whiteout, continue + Err(e) => { + return Some(Err(e)); + } + } + + // Check for opaque marker at current level + match self.check_opaque_marker(current.0.as_raw_fd()) { + Ok(true) => { + opaque_marker_found = true; + } + Ok(false) => (), + Err(e) => { + return Some(Err(e)); + } + } + + let segment_name = segment_name.to_owned(); + + drop(filenames); // Now safe to drop filenames lock + + match Self::statx(current.0.as_raw_fd(), Some(&segment_name)) { + Ok((st, mnt_id)) => { + // Open the current segment + let new_file = + match Self::open_path_file_at(current.0.as_raw_fd(), &segment_name) { + Ok(file) => { + file + } + Err(e) => { + return Some(Err(e)); + } + }; + + // Update parent for next iteration + current = match new_file.try_clone() { + Ok(file) => (file, st, mnt_id), + Err(e) => { + return Some(Err(e)); + } + }; + + // Create or get inode for this path segment + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev, mnt_id); + let inode_data = { + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + data.clone() + } else { + drop(inodes); // Drop read lock before write lock + + let mut path = path_inodes[depth].path.clone(); + path.push(*segment); + + // Safe because we just opened this fd. + let (_, data) = self.create_inode( + new_file, + st.st_ino, + st.st_dev, + mnt_id, + path, + layer_root.layer_idx, + ); + + data + } + }; + + // Update path_inodes with the current segment's inode data + if (depth + 1) >= path_inodes.len() { + // Haven't seen this depth before, append + path_inodes.push(inode_data); + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound && opaque_marker_found => { + // For example, for a lookup of /foo/bar/baz, where /foo/bar has an opaque marker, + // then if we cannot find /foo/bar/baz in the current layer, we cannot find it + // in any other layer as /foo/bar is masked. + return None; + } + Err(e) => { + return Some(Err(e)); + } + } + } + + Some(Ok(current)) + } + + /// Looks up a file or directory entry across multiple filesystem layers. + /// + /// This function starts from the specified upper layer (given by start_layer_idx) and searches downwards + /// through the layers to locate the file represented by the provided path segments (an interned path). + /// At each layer, it calls lookup_segment_by_segment to traverse the path step by step while handling + /// whiteout files and opaque directory markers. If an entry is found in a layer, the function returns + /// an Entry structure containing the file metadata along with a vector of InodeData for each path segment traversed. + /// + /// ## Arguments + /// + /// * `start_layer_idx` - The index of the starting layer (from the topmost, which may be the writable layer). + /// * `path_segments` - A slice of interned symbols representing the path components to traverse. + /// + /// ## Returns + /// + /// On success, returns a tuple containing: + /// - An Entry representing the located file or directory along with its attributes. + /// - A vector of Arc corresponding to the inodes for each traversed path segment. + /// + /// ## Errors + /// + /// Returns an io::Error if: + /// - The file is not found in any layer (ENOENT), or + /// - An error occurs during the lookup process in one of the layers. + fn lookup_layer_by_layer<'a>( + &'a self, + start_layer_idx: usize, + path_segments: &[Symbol], + ) -> io::Result<(Entry, Arc, Vec>)> { + let mut path_inodes = vec![]; + + // Start from the start_layer_idx and try each layer down to layer 0 + for layer_idx in (0..=start_layer_idx).rev() { + let layer_root = self.get_layer_root(layer_idx)?; + + // If path_inodes has only the root inode or is empty, we need to restart the lookup with the new layer root. + if path_inodes.len() < 2 { + path_inodes = vec![layer_root.clone()]; + } + + match self.lookup_segment_by_segment(&layer_root, &path_segments, &mut path_inodes) { + Some(Ok((file, st, mnt_id))) => { + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev, mnt_id); + + // Check if we already have this inode + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + return Ok((self.create_entry(data.inode, st), data.clone(), path_inodes)); + } + + drop(inodes); + + // Open the path + let path = path_segments.to_vec(); + + // Create new inode + let (inode, data) = + self.create_inode(file, st.st_ino, st.st_dev, mnt_id, path, layer_idx); + path_inodes.push(data.clone()); + + return Ok((self.create_entry(inode, st), data, path_inodes)); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + // Continue to check lower layers + continue; + } + Some(Err(e)) => { + return Err(e); + } + None => { + // Hit a whiteout or opaque marker, stop searching lower layers + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + } + } + + // Not found in any layer + Err(io::Error::from_raw_os_error(libc::ENOENT)) + } + + /// Performs a lookup operation + pub(crate) fn do_lookup( + &self, + parent: Inode, + name: &CStr, + ) -> io::Result<(Entry, Vec>)> { + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Create path segments for lookup by appending the new name + let mut path_segments = parent_data.path.clone(); + let symbol = self.intern_name(name)?; + path_segments.push(symbol); + + let (mut entry, child_data, path_inodes) = + self.lookup_layer_by_layer(parent_data.layer_idx, &path_segments)?; + + // Set the submount flag if the endirectory is a mount point + let mut attr_flags = 0; + if (entry.attr.st_mode & libc::S_IFMT) == libc::S_IFDIR + && self.announce_submounts.load(Ordering::Relaxed) + && (child_data.dev != parent_data.dev || child_data.mnt_id != parent_data.mnt_id) + { + attr_flags |= fuse::ATTR_SUBMOUNT; + } + + entry.attr_flags = attr_flags; + + Ok((entry, path_inodes)) + } + + /// Copies up a file or directory from a lower layer to the top layer + pub(crate) fn copy_up(&self, path_inodes: &[Arc]) -> io::Result<()> { + // Get the top layer root + let top_layer_idx = self.get_top_layer_idx(); + let top_layer_root = self.get_layer_root(top_layer_idx)?; + + // Start from root and copy up each segment that's not in the top layer + let mut parent = top_layer_root.file.try_clone()?; + + // Skip the root inode + for inode_data in path_inodes.iter().skip(1) { + // Skip if this segment is already in the top layer + if inode_data.layer_idx == top_layer_idx { + parent = inode_data.file.try_clone()?; + continue; + } + + // Get the current segment name + let segment_name = { + let name = inode_data.path.last().unwrap(); + let filenames = self.filenames.read().unwrap(); + filenames.get(*name).unwrap().to_owned() + }; + + let (src_stat, _) = Self::statx(inode_data.file.as_raw_fd(), None)?; + let file_type = src_stat.st_mode & libc::S_IFMT; + + // Copy up the file + match file_type { + libc::S_IFREG => { + // Open source file with O_RDONLY + let src_file = self.open_inode(inode_data.inode, libc::O_RDONLY)?; + + // Open destination file with O_WRONLY | O_CREAT + let dst_file = Self::open_file_at( + parent.as_raw_fd(), + &segment_name, + libc::O_WRONLY | libc::O_CREAT, + )?; + + // Try to use FICLONE ioctl for CoW copying first (works on modern Linux filesystems like Btrfs, XFS, etc.) + let result = unsafe { + libc::ioctl(dst_file.as_raw_fd(), FICLONE as _, src_file.as_raw_fd()) + }; + + if result < 0 { + debug!("FICLONE failed, falling back to regular copy"); + let err = io::Error::last_os_error(); + // If FICLONE fails (e.g., across filesystems), fall back to regular copy + if err.raw_os_error() == Some(libc::EXDEV) + || err.raw_os_error() == Some(libc::EINVAL) + || err.raw_os_error() == Some(libc::ETXTBSY) + || err.raw_os_error() == Some(libc::EOPNOTSUPP) + { + // Fall back to regular copy + self.copy_file_contents( + src_file.as_raw_fd(), + dst_file.as_raw_fd(), + (src_stat.st_mode & 0o777) as u32, + )?; + } else { + return Err(err); + } + } + } + libc::S_IFDIR => { + // Directory: just create it with the same permissions + unsafe { + if libc::mkdirat( + parent.as_raw_fd(), + segment_name.as_ptr(), + src_stat.st_mode & 0o777, + ) < 0 + { + return Err(io::Error::last_os_error()); + } + } + } + libc::S_IFLNK => { + // Symbolic link: read target and recreate link + let mut buf = vec![0u8; libc::PATH_MAX as usize]; + let len = unsafe { + libc::readlinkat( + inode_data.file.as_raw_fd(), + EMPTY_CSTR.as_ptr(), + buf.as_mut_ptr() as *mut _, + buf.len(), + ) + }; + + if len < 0 { + return Err(io::Error::last_os_error()); + } + + buf.truncate(len as usize); + + unsafe { + if libc::symlinkat( + buf.as_ptr() as *const _, + parent.as_raw_fd(), + segment_name.as_ptr(), + ) < 0 + { + return Err(io::Error::last_os_error()); + } + + if libc::fchmodat( + parent.as_raw_fd(), + segment_name.as_ptr(), + src_stat.st_mode & 0o777, + 0, + ) < 0 + { + return Err(io::Error::last_os_error()); + } + } + } + _ => { + // Other types (devices, sockets, etc.) are not supported yet. + return Err(io::Error::new( + io::ErrorKind::Unsupported, + "unsupported file type for copy up", + )); + } + } + + // Update parent for next iteration + let child = Self::open_path_file_at(parent.as_raw_fd(), &segment_name)?; + let (new_stat, new_mnt_id) = Self::statx(child.as_raw_fd(), None)?; + parent = child.try_clone()?; + + // Update the inode entry to point to the new copy in the top layer + let alt_key = InodeAltKey::new(new_stat.st_ino, new_stat.st_dev, new_mnt_id); + let mut inodes = self.inodes.write().unwrap(); + + // Create new inode data with updated dev/ino/layer_idx but same refcount + let new_data = Arc::new(InodeData { + inode: inode_data.inode, + file: child, + dev: new_stat.st_dev, + mnt_id: new_mnt_id, + refcount: AtomicU64::new(inode_data.refcount.load(Ordering::SeqCst)), + path: inode_data.path.clone(), + layer_idx: top_layer_idx, + }); + + // Replace the old entry with the new one + inodes.insert(inode_data.inode, alt_key, new_data); + } + + Ok(()) + } + + /// Helper method to copy file contents when clonefile is not available or fails + fn copy_file_contents(&self, src_fd: RawFd, dst_fd: RawFd, mode: u32) -> io::Result<()> { + unsafe { + // Copy file contents + let mut buf = [0u8; 8192]; + loop { + let n_read = libc::read(src_fd, buf.as_mut_ptr() as *mut _, buf.len()); + if n_read <= 0 { + break; + } + let mut pos = 0; + while pos < n_read { + let n_written = libc::write( + dst_fd, + buf.as_ptr().add(pos as usize) as *const _, + (n_read - pos) as usize, + ); + if n_written <= 0 { + return Err(io::Error::last_os_error()); + } + pos += n_written; + } + } + + // Explicitly set permissions to match source file + // This will override any effects from the umask + if libc::fchmod(dst_fd, mode as libc::mode_t) < 0 { + return Err(io::Error::last_os_error()); + } + } + + Ok(()) + } + + /// Ensures the file is in the top layer by copying it up if necessary. + /// + /// This function: + /// 1. Checks if the file is already in the top layer + /// 2. If not, looks up the complete path to the file + /// 3. Copies the file and all its parent directories to the top layer + /// 4. Returns the inode data for the copied file + /// + /// ### Arguments + /// * `inode_data` - The inode data for the file to copy up + /// + /// ### Returns + /// * `Ok(InodeData)` - The inode data for the file in the top layer + /// * `Err(io::Error)` - If the copy-up operation fails + fn ensure_top_layer(&self, inode_data: Arc) -> io::Result> { + let top_layer_idx = self.get_top_layer_idx(); + + // If already in top layer, return early + if inode_data.layer_idx == top_layer_idx { + return Ok(inode_data); + } + + // Build the path segments + let path_segments = inode_data.path.clone(); + + // Lookup the file to get all path inodes + let (_, _, path_inodes) = self.lookup_layer_by_layer(top_layer_idx, &path_segments)?; + + // Copy up the file + self.copy_up(&path_inodes)?; + + // Get the inode data for the copied file + self.get_inode_data(inode_data.inode) + } + + /// Creates a whiteout file for a given parent directory and name. + /// This is used to hide files that exist in lower layers. + /// + /// # Arguments + /// * `parent` - The inode of the parent directory + /// * `name` - The name of the file to create a whiteout for + /// + /// # Returns + /// * `Ok(())` if the whiteout was created successfully + /// * `Err(io::Error)` if there was an error creating the whiteout + fn create_whiteout_for_lower(&self, parent: Inode, name: &CStr) -> io::Result<()> { + if let Ok((_, mut path_inodes)) = self.do_lookup(parent, name) { + // Copy up the parent directory if needed + path_inodes.pop(); + self.copy_up(&path_inodes)?; + let parent_fd = self.get_inode_data(parent)?.file.as_raw_fd(); + + let whiteout_cpath = self.create_whiteout_path(name)?; + let fd = unsafe { + libc::openat( + parent_fd, + whiteout_cpath.as_ptr(), + libc::O_CREAT | libc::O_WRONLY | libc::O_EXCL | libc::O_NOFOLLOW, + 0o000, // Whiteout files have no permissions + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + unsafe { libc::close(fd) }; + } + + Ok(()) + } + + /// Temporarily changes the effective UID and GID of the current thread to the requested values using RAII guards. + /// + /// If the requested UID or GID is 0 (root) or already matches the current effective UID/GID (as stored in my_uid and my_gid), + /// no credential switching is performed and None is returned for that component. + /// + /// When credential switching is performed, an RAII guard (ScopedUid or ScopedGid) is returned that will restore the + /// effective UID or GID to root (0) when dropped. If the process lacks the required capability (CAP_SETUID or CAP_SETGID) + /// and the requested UID/GID does not match the current credentials, the function returns an EPERM error. + /// + /// # Arguments + /// * `uid` - The requested user ID to switch to. + /// * `gid` - The requested group ID to switch to. + /// + /// # Returns + /// A tuple `(Option, Option)` where: + /// - `Option` is Some if the effective UID was changed, or None if no change was needed. + /// - `Option` is Some if the effective GID was changed, or None if no change was needed. + /// + /// # Errors + /// Returns EPERM if the process lacks the required capability to change to a non-matching UID or GID. + fn set_scoped_credentials( + &self, + uid: libc::uid_t, + gid: libc::gid_t, + ) -> io::Result<(Option, Option)> { + // Handle GID changes first since changing UID to non-root may prevent GID changes + let scoped_gid = if gid == 0 || self.my_gid == Some(gid) { + // If the requested GID is 0 (root) or matches our current GID, + // no credential switching is needed. + None + } else if self.my_gid.is_some() { + // Process doesn't have CAP_SETGID capability and the requested GID + // does not match our current GID, so we cannot switch. + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } else { + // Process has CAP_SETGID capability, attempt to switch to the requested GID + Some(ScopedGid::new(gid)?) + }; + + // Handle UID changes after GID + let scoped_uid = if uid == 0 || self.my_uid == Some(uid) { + // If the requested UID is 0 (root) or matches our current UID, + // no credential switching is needed. + None + } else if self.my_uid.is_some() { + // Process doesn't have CAP_SETUID capability and the requested UID + // does not match our current UID, so we cannot switch. + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } else { + // Process has CAP_SETUID capability, attempt to switch to the requested UID + Some(ScopedUid::new(uid)?) + }; + + Ok((scoped_uid, scoped_gid)) + } + + /// Decrements the reference count for an inode and removes it if the count reaches zero + fn do_forget(&self, inode: Inode, count: u64) { + let mut inodes = self.inodes.write().unwrap(); + if let Some(data) = inodes.get(&inode) { + // Acquiring the write lock on the inode map prevents new lookups from incrementing the + // refcount but there is the possibility that a previous lookup already acquired a + // reference to the inode data and is in the process of updating the refcount so we need + // to loop here until we can decrement successfully. + loop { + let refcount = data.refcount.load(Ordering::Relaxed); + + // Saturating sub because it doesn't make sense for a refcount to go below zero and + // we don't want misbehaving clients to cause integer overflow. + let new_count = refcount.saturating_sub(count); + + if data + .refcount + .compare_exchange(refcount, new_count, Ordering::Release, Ordering::Relaxed) + .unwrap() + == refcount + { + if new_count == 0 { + // We just removed the last refcount for this inode. There's no need for an + // acquire fence here because we hold a write lock on the inode map and any + // thread that is waiting to do a forget on the same inode will have to wait + // until we release the lock. So there's is no other release store for us to + // synchronize with before deleting the entry. + inodes.remove(&inode); + } + break; + } + } + } + } + + /// Performs an open operation + fn do_open(&self, inode: Inode, mut flags: u32) -> io::Result<(Option, OpenOptions)> { + if !self.cap_fowner { + // O_NOATIME can only be used with CAP_FOWNER or if we are the file + // owner. Not worth checking the latter, just drop it if we don't + // have the cap. This makes overlayfs mounts with virtiofs lower dirs + // work. + flags &= !(libc::O_NOATIME as u32); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer + let inode_data = self.ensure_top_layer(inode_data)?; + + // Open the file with the appropriate flags and generate a new unique handle ID + let file = RwLock::new(self.open_inode(inode_data.inode, flags as i32)?); + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + + // Create handle data structure with file and empty dirstream + let data = HandleData { + inode, + file, + exported: Default::default(), + }; + + // Store the handle data in the handles map + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + // Set up OpenOptions based on the cache policy configuration + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + // For CachePolicy::Never, set DIRECT_IO to bypass kernel caching for files (not directories) + CachePolicy::Never => opts.set( + OpenOptions::DIRECT_IO, + flags & (libc::O_DIRECTORY as u32) == 0, + ), + + // For CachePolicy::Always, set different caching options based on whether it's a file or directory + CachePolicy::Always => { + if flags & (libc::O_DIRECTORY as u32) == 0 { + // For files: KEEP_CACHE maintains kernel cache between open/close operations + opts |= OpenOptions::KEEP_CACHE; + } else { + // For directories: CACHE_DIR enables caching of directory entries + opts |= OpenOptions::CACHE_DIR; + } + } + + // For CachePolicy::Auto, use default caching behavior + _ => {} + }; + + // Return the handle and options + Ok((Some(handle), opts)) + } + + /// Performs a release operation + fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> { + let mut handles = self.handles.write().unwrap(); + + if let btree_map::Entry::Occupied(e) = handles.entry(handle) { + if e.get().inode == inode { + if e.get().exported.load(Ordering::Relaxed) { + self.config + .export_table + .as_ref() + .unwrap() + .lock() + .unwrap() + .remove(&(self.config.export_fsid, handle)); + } + + // We don't need to close the file here because that will happen automatically when + // the last `Arc` is dropped. + e.remove(); + return Ok(()); + } + } + + Err(ebadf()) + } + + /// Performs a mkdir operation + fn do_mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Create the directory + let res = unsafe { libc::mkdirat(parent_fd, name.as_ptr(), mode & !umask) }; + if res == 0 { + let file = Self::open_path_file_at(parent_fd, name)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + /// Performs an unlink operation + fn do_unlink(&self, parent: Inode, name: &CStr, flags: libc::c_int) -> io::Result<()> { + let top_layer_idx = self.get_top_layer_idx(); + let (entry, _) = self.do_lookup(parent, name)?; + + // If the inode is in the top layer. the parent will also be in the top layer, we need to unlink it. + let entry_data = self.get_inode_data(entry.inode)?; + if entry_data.layer_idx == top_layer_idx { + let parent_fd = self.get_inode_data(parent)?.file.as_raw_fd(); + + // Remove the inode from the overlayfs + let res = unsafe { libc::unlinkat(parent_fd, name.as_ptr(), flags) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // If after an unlink, the entry still exists in a lower layer, we need to add a whiteout + self.create_whiteout_for_lower(parent, name)?; + + Ok(()) + } + + /// Returns an iterator over all valid entries in the directory across all layers. + /// + /// Note: OverlayFs is a high-level, layered filesystem. A simple readdir on a single directory does not produce the complete view. + /// This function traverses the directory across multiple layers, merging entries while handling duplicates, + /// whiteout files, and opaque markers. + /// + /// ## Arguments + /// * `dir` - The inode of the directory to iterate over. + /// * `add_entry` - A callback function that processes each directory entry. If the callback returns 0, + /// it signals that the directory buffer is full and iteration should stop. + /// + /// ## Returns + /// * `Ok(())` if the directory was iterated successfully. + /// * `Err(io::Error)` if an error occurred during iteration. + pub(super) fn process_dir_entries(&self, dir: Inode, mut add_entry: F) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + // Local state to track iteration over layers + struct LazyReaddirState { + current_layer: isize, // current layer (top-down) + inode_data: Option>, + current_iter: Option, + seen: HashSet>, + } + + let inode_data = self.get_inode_data(dir)?; + let top_layer = self.get_top_layer_idx() as isize; + let path = inode_data.path.clone(); + let mut state = LazyReaddirState { + current_layer: top_layer, + inode_data: None, + current_iter: None, + seen: HashSet::new(), + }; + + let mut current_offset = 0u64; + let mut opaque_marker_found = false; + loop { + // If no current iterator, attempt to initialize one for the current layer + if state.current_iter.is_none() { + if state.current_layer < 0 { + break; // All layers exhausted + } + + let layer_root = self.get_layer_root(state.current_layer as usize)?; + let mut path_inodes = vec![layer_root.clone()]; + + match self.lookup_segment_by_segment(&layer_root, &path, &mut path_inodes) { + Some(Ok(_)) => { + let last_inode = path_inodes.last().unwrap(); + let path = Self::data_to_path(last_inode)?; + let dir_str = path.as_c_str().to_str().map_err(|_| { + io::Error::new(io::ErrorKind::Other, "Invalid path string") + })?; + + state.inode_data = Some(last_inode.clone()); + state.current_iter = Some(std::fs::read_dir(dir_str)?); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + state.current_layer -= 1; + continue; + } + Some(Err(e)) => return Err(e), + None => { + state.current_layer = -1; + continue; + } + } + } + + if let Some(iter) = state.current_iter.as_mut() { + if let Some(entry_result) = iter.next() { + let entry = entry_result?; + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + + if state.seen.contains(name.as_bytes()) { + continue; + } + + // Handle opaque marker and whiteout files + if name_str == OPAQUE_MARKER { + // Opaque marker found; mark it and skip this entry + opaque_marker_found = true; + continue; + } else if name_str.starts_with(WHITEOUT_PREFIX) { + // Whiteout file; skip it + let actual = &name_str[WHITEOUT_PREFIX.len()..]; + state.seen.insert(actual.as_bytes().to_vec()); + continue; + } else { + state.seen.insert(name.as_bytes().to_vec()); + } + + let metadata = entry.metadata()?; + let mode = metadata.mode() as u32; + let s_ifmt = libc::S_IFMT as u32; + let type_ = if mode & s_ifmt == (libc::S_IFDIR as u32) { + libc::DT_DIR + } else if mode & s_ifmt == (libc::S_IFREG as u32) { + libc::DT_REG + } else if mode & s_ifmt == (libc::S_IFLNK as u32) { + libc::DT_LNK + } else if mode & s_ifmt == (libc::S_IFIFO as u32) { + libc::DT_FIFO + } else if mode & s_ifmt == (libc::S_IFCHR as u32) { + libc::DT_CHR + } else if mode & s_ifmt == (libc::S_IFBLK as u32) { + libc::DT_BLK + } else if mode & s_ifmt == (libc::S_IFSOCK as u32) { + libc::DT_SOCK + } else { + libc::DT_UNKNOWN + }; + + current_offset += 1; + + let dir_entry = DirEntry { + ino: metadata.ino(), + offset: current_offset, + type_: type_ as u32, + name: name.as_bytes(), + }; + + if add_entry(dir_entry)? == 0 { + return Ok(()); + } + } else { + state.current_iter = None; + if opaque_marker_found { + break; + } + state.current_layer -= 1; + continue; + } + } + } + + Ok(()) + } + + /// Reads directory entries for the given inode by merging entries from all underlying layers. + /// + /// Unlike conventional filesystems that simply call readdir on a directory file descriptor, + /// OverlayFs must aggregate entries from multiple layers. The `offset` parameter specifies the starting + /// index in the merged list of directory entries. The provided `add_entry` callback is invoked for each + /// entry; a return value of 0 indicates that the directory buffer is full and reading should cease. + /// + /// NOTE: The current implementation of offset does not entirely follow FUSE expected behaviors. + /// Changes to entries in the write layer can affect the offset, potentially causing inconsistencies + /// in directory listing between calls. + /// + /// TODO: Implement a more robust offset handling mechanism that maintains consistency even when + /// the underlying directory structure changes. One way is making offset a composite value of + /// layer (1 MSB) + offset (7 LSB). This will also require having multiple open dirs from lower layers + /// in [HandleData]. + pub(super) fn do_readdir( + &self, + inode: Inode, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + if size == 0 { + return Ok(()); + } + + let mut current_offset = 0u64; + self.process_dir_entries(inode, |entry| { + if current_offset < offset { + current_offset += 1; + return Ok(1); + } + + add_entry(entry) + }) + } + + fn do_create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. We don't + // really check `flags` because if the kernel can't handle poorly specified flags then we + // have much bigger problems. + let fd = unsafe { + libc::openat( + parent_fd, + name.as_ptr(), + flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + mode & !(umask & 0o777), + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + let (stat, mnt_id) = Self::statx(fd, None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created file + let file = unsafe { File::from_raw_fd(fd) }; + let (inode, _) = self.create_inode( + file.try_clone()?, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created file + let entry = self.create_entry(inode, stat); + + // Create the handle for the newly created file + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + let data = HandleData { + inode: entry.inode, + file: RwLock::new(file), + exported: Default::default(), + }; + + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, + CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, + _ => {} + }; + + Ok((entry, Some(handle), opts)) + } + + fn do_getattr(&self, inode: Inode) -> io::Result<(libc::stat64, Duration)> { + let fd = self.get_inode_data(inode)?.file.as_raw_fd(); + let (st, _) = Self::statx(fd, None)?; + + Ok((st, self.config.attr_timeout)) + } + + fn do_rename( + &self, + old_parent: Inode, + old_name: &CStr, + new_parent: Inode, + new_name: &CStr, + flags: u32, + ) -> io::Result<()> { + // Copy up the old path to the top layer if not already in the top layer + let (_, old_path_inodes) = self.do_lookup(old_parent, old_name)?; + self.copy_up(&old_path_inodes)?; + let old_parent_data = self.get_inode_data(old_parent)?; + + // Copy up the new parent to the top layer if not already in the top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; + + // Perform the rename + let res = unsafe { + libc::renameat2( + old_parent_data.file.as_raw_fd(), + old_name.as_ptr(), + new_parent_data.file.as_raw_fd(), + new_name.as_ptr(), + flags, + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // After successful rename, check if we need to add a whiteout for the old path + self.create_whiteout_for_lower(old_parent, old_name)?; + + Ok(()) + } + + fn do_mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Create the node device + let res = unsafe { + libc::mknodat( + parent_fd, + name.as_ptr(), + (mode & !umask) as libc::mode_t, + u64::from(rdev), + ) + }; + + if res == 0 { + let file = Self::open_path_file_at(parent_fd, name)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + fn do_link(&self, inode: Inode, newparent: Inode, newname: &CStr) -> io::Result { + // Get the fd for the source file. + let inode_data = self.get_inode_data(inode)?; + + // Copy up the source file to the top layer if needed + let inode_data = self.ensure_top_layer(inode_data)?; + let old_fd_str = Self::data_to_fd_str(&inode_data)?; + + // Extraneous check to ensure the source file is not a symlink + let stat = Self::statx(inode_data.file.as_raw_fd(), None)?.0; + if stat.st_mode & libc::S_IFMT == libc::S_IFLNK { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot link to a symlink", + )); + } + + // Get and ensure new parent is in top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(newparent)?)?; + let new_parent_fd = new_parent_data.file.as_raw_fd(); + + // Safety: It is expected that old_fd_str has been checked by the kernel to not be a symlink. + let res = unsafe { + libc::linkat( + self.proc_self_fd.as_raw_fd(), + old_fd_str.as_ptr(), + new_parent_fd, + newname.as_ptr(), + libc::AT_SYMLINK_FOLLOW, // Follow is needed to handle /proc/self/fd/ symlink + ) + }; + + if res == 0 { + let file = Self::open_path_file_at(new_parent_fd, newname)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = new_parent_data.path.clone(); + path.push(self.intern_name(newname)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + new_parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + fn do_symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Create the node device + let res = unsafe { libc::symlinkat(linkname.as_ptr(), parent_fd, name.as_ptr()) }; + + if res == 0 { + let file = Self::open_path_file_at(parent_fd, name)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + fn do_readlink(&self, inode: Inode) -> io::Result> { + // Get the path for this inode + let inode_data = self.get_inode_data(inode)?; + + // Allocate a buffer for the link target + let mut buf = vec![0; libc::PATH_MAX as usize]; + + // Safe because this will only modify the contents of `buf` and we check the return value. + let res = unsafe { + libc::readlinkat( + inode_data.file.as_raw_fd(), + EMPTY_CSTR.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + buf.len(), + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Resize the buffer to the actual length of the link target + buf.resize(res as usize, 0); + Ok(buf) + } + + fn do_setxattr(&self, inode: Inode, name: &CStr, value: &[u8], flags: u32) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = + match self.open_inode_or_path(inode_data.inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { + libc::fsetxattr( + file.as_raw_fd(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + flags as libc::c_int, + ) + } + } + FileOrPath::Path(path) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { + libc::lsetxattr( + path.as_ptr(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + flags as libc::c_int, + ) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_getxattr(&self, inode: Inode, name: &CStr, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Don't allow getting attributes for init + if inode == self.init_inode { + return Err(io::Error::from_raw_os_error(libc::ENODATA)); + } + + // Safe because this will only modify the contents of `buf` + let mut buf = vec![0; size as usize]; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = match self.open_inode_or_path(inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::fgetxattr( + file.as_raw_fd(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + ) + } + } + FileOrPath::Path(path) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::lgetxattr( + path.as_ptr(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + ) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + if size == 0 { + Ok(GetxattrReply::Count(res as u32)) + } else { + // Truncate the buffer to the actual length of the value + buf.resize(res as usize, 0); + Ok(GetxattrReply::Value(buf)) + } + } + + fn do_listxattr(&self, inode: Inode, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Don't allow getting attributes for init + if inode == self.init_inode { + return Err(io::Error::from_raw_os_error(libc::ENODATA)); + } + + // Safe because this will only modify the contents of `buf` + let mut buf = vec![0; size as usize]; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = match self.open_inode_or_path(inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::flistxattr( + file.as_raw_fd(), + buf.as_mut_ptr() as *mut libc::c_char, + size as libc::size_t, + ) + } + } + FileOrPath::Path(path) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::llistxattr( + path.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + size as libc::size_t, + ) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + if size == 0 { + Ok(ListxattrReply::Count(res as u32)) + } else { + // Truncate the buffer to the actual length of the value + buf.resize(res as usize, 0); + Ok(ListxattrReply::Names(buf)) + } + } + + fn do_removexattr(&self, inode: Inode, name: &CStr) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = + match self.open_inode_or_path(inode_data.inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { libc::fremovexattr(file.as_raw_fd(), name.as_ptr()) } + } + FileOrPath::Path(path) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { libc::lremovexattr(path.as_ptr(), name.as_ptr()) } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_fallocate( + &self, + inode: Inode, + handle: Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::fallocate64( + fd, + mode as libc::c_int, + offset as libc::off64_t, + length as libc::off64_t, + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_lseek(&self, inode: Inode, handle: Handle, offset: u64, whence: u32) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lseek64(fd, offset as libc::off64_t, whence as libc::c_int) }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(res as u64) + } + + fn do_copyfilerange( + &self, + inode_in: Inode, + handle_in: Handle, + offset_in: u64, + inode_out: Inode, + handle_out: Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + let data_in = self.get_inode_handle_data(inode_in, handle_in)?; + let data_out = self.get_inode_handle_data(inode_out, handle_out)?; + let fd_in = data_in.file.write().unwrap().as_raw_fd(); + let fd_out = data_out.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::copy_file_range( + fd_in, + &mut (offset_in as i64) as &mut _ as *mut _, + fd_out, + &mut (offset_out as i64) as &mut _ as *mut _, + len.try_into().unwrap(), + flags.try_into().unwrap(), + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(res as usize) + } + + fn do_setupmapping( + &self, + inode: Inode, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + let open_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::O_RDWR + } else { + libc::O_RDONLY + }; + + let prot_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + + if (moffset + len) > shm_size { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + let addr = host_shm_base + moffset; + + if inode == self.init_inode { + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED, + -1, + 0, + ) + }; + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + let to_copy = if len as usize > INIT_BINARY.len() { + INIT_BINARY.len() + } else { + len as usize + }; + + unsafe { + libc::memcpy( + addr as *mut libc::c_void, + INIT_BINARY.as_ptr() as *const _, + to_copy, + ) + }; + + return Ok(()); + } + + // Ensure the inode is in the top layer + let inode_data = self.get_inode_data(inode)?; + let inode_data = self.ensure_top_layer(inode_data)?; + + let file = self.open_inode(inode_data.inode, open_flags)?; + let fd = file.as_raw_fd(); + + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + prot_flags, + libc::MAP_SHARED | libc::MAP_FIXED, + fd, + foffset as libc::off_t, + ) + }; + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_removemapping( + &self, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + for req in requests { + let addr = host_shm_base + req.moffset; + if (req.moffset + req.len) > shm_size { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + debug!("removemapping: addr={:x} len={:?}", addr, req.len); + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + req.len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED, + -1, + 0_i64, + ) + }; + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + } + + Ok(()) + } + + fn do_ioctl( + &self, + inode: Inode, + handle: Handle, + cmd: u32, + out_size: u32, + ) -> io::Result> { + const VIRTIO_IOC_MAGIC: u8 = b'v'; + const VIRTIO_IOC_TYPE_EXPORT_FD: u8 = 1; + const VIRTIO_IOC_EXPORT_FD_SIZE: usize = 2 * mem::size_of::(); + const VIRTIO_IOC_EXPORT_FD_REQ: u32 = request_code_read!( + VIRTIO_IOC_MAGIC, + VIRTIO_IOC_TYPE_EXPORT_FD, + VIRTIO_IOC_EXPORT_FD_SIZE + ) as u32; + + match cmd { + VIRTIO_IOC_EXPORT_FD_REQ => { + if out_size as usize != VIRTIO_IOC_EXPORT_FD_SIZE { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + let mut exports = self + .config + .export_table + .as_ref() + .ok_or(io::Error::from_raw_os_error(libc::EOPNOTSUPP))? + .lock() + .unwrap(); + + let handles = self.handles.read().unwrap(); + let data = handles + .get(&handle) + .filter(|hd| hd.inode == inode) + .ok_or_else(ebadf)?; + + data.exported.store(true, Ordering::Relaxed); + + let fd = data.file.read().unwrap().try_clone()?; + + exports.insert((self.config.export_fsid, handle), fd); + + let mut ret: Vec<_> = self.config.export_fsid.to_ne_bytes().into(); + ret.extend_from_slice(&handle.to_ne_bytes()); + Ok(ret) + } + _ => Err(io::Error::from_raw_os_error(libc::EOPNOTSUPP)), + } + } +} + +//-------------------------------------------------------------------------------------------------- +// Functions +//-------------------------------------------------------------------------------------------------- + +/// Returns a "bad file descriptor" error +fn ebadf() -> io::Error { + io::Error::from_raw_os_error(libc::EBADF) +} + +/// Returns an "invalid argument" error +fn einval() -> io::Error { + io::Error::from_raw_os_error(libc::EINVAL) +} + +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl FileSystem for OverlayFs { + type Inode = Inode; + type Handle = Handle; + + fn init(&self, capable: FsOptions) -> io::Result { + // Set the umask to 0 to ensure that all file permissions are set correctly + unsafe { libc::umask(0o000) }; + + // Enable readdirplus if supported + let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; + + // Enable writeback caching if requested and supported + if self.config.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { + opts |= FsOptions::WRITEBACK_CACHE; + self.writeback.store(true, Ordering::Relaxed); + } + + // Enable submounts if supported + if capable.contains(FsOptions::SUBMOUNTS) { + opts |= FsOptions::SUBMOUNTS; + self.announce_submounts.store(true, Ordering::Relaxed); + } + + Ok(opts) + } + + fn destroy(&self) { + // Clear all handles + self.handles.write().unwrap().clear(); + + // Clear all inodes + self.inodes.write().unwrap().clear(); + } + + fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result { + // Get the inode data + let data = self.get_inode_data(inode)?; + + // Call statvfs64 to get filesystem statistics + // Safe because this will only modify `out` and we check the return value. + let mut out = MaybeUninit::::zeroed(); + let res = unsafe { libc::fstatvfs64(data.file.as_raw_fd(), out.as_mut_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because statvfs64 initialized the struct + Ok(unsafe { out.assume_init() }) + } + + fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result { + Self::validate_name(name)?; + + #[cfg(not(feature = "efi"))] + let init_name = unsafe { CStr::from_bytes_with_nul_unchecked(INIT_CSTR) }; + + #[cfg(not(feature = "efi"))] + if self.init_inode != 0 && name == init_name { + let mut st: bindings::stat64 = unsafe { std::mem::zeroed() }; + st.st_size = INIT_BINARY.len() as i64; + st.st_ino = self.init_inode; + st.st_mode = 0o100_755; + + return Ok(Entry { + inode: self.init_inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + }); + } + + let (entry, _) = self.do_lookup(parent, name)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn forget(&self, _ctx: Context, inode: Inode, count: u64) { + self.do_forget(inode, count); + } + + fn opendir( + &self, + _ctx: Context, + inode: Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.do_open(inode, flags | (libc::O_DIRECTORY as u32)) + } + + fn releasedir( + &self, + _ctx: Context, + inode: Inode, + _flags: u32, + handle: Handle, + ) -> io::Result<()> { + self.do_release(inode, handle) + } + + fn mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mkdir(ctx, parent, name, mode, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { + self.do_unlink(parent, name, libc::AT_REMOVEDIR) + } + + fn readdir( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(filesystem::DirEntry<'_>) -> io::Result, + { + self.do_readdir(inode, size, offset, add_entry) + } + + fn readdirplus( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(filesystem::DirEntry<'_>, Entry) -> io::Result, + { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_readdir(inode, size, offset, |dir_entry| { + let (entry, _) = self.do_lookup(inode, &CString::new(dir_entry.name).unwrap())?; + add_entry(dir_entry, entry) + }) + } + + fn open( + &self, + _ctx: Context, + inode: Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + if inode == self.init_inode { + Ok((Some(self.init_handle), OpenOptions::empty())) + } else { + self.do_open(inode, flags) + } + } + + fn release( + &self, + _ctx: Context, + inode: Inode, + _flags: u32, + handle: Handle, + _flush: bool, + _flock_release: bool, + _lock_owner: Option, + ) -> io::Result<()> { + self.do_release(inode, handle) + } + + fn create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + Self::validate_name(name)?; + let (entry, handle, opts) = + self.do_create(ctx, parent, name, mode, flags, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok((entry, handle, opts)) + } + + fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { + self.do_unlink(parent, name, 0) + } + + fn read( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + mut w: W, + size: u32, + offset: u64, + _lock_owner: Option, + _flags: u32, + ) -> io::Result { + #[cfg(not(feature = "efi"))] + if inode == self.init_inode { + return w.write(&INIT_BINARY[offset as usize..(offset + (size as u64)) as usize]); + } + + let data = self.get_inode_handle_data(inode, handle)?; + + let f = data.file.read().unwrap(); + w.write_from(&f, size as usize, offset) + } + + fn write( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + mut r: R, + size: u32, + offset: u64, + _lock_owner: Option, + _delayed_write: bool, + kill_priv: bool, + _flags: u32, + ) -> io::Result { + if kill_priv { + // We need to change credentials during a write so that the kernel will remove setuid + // or setgid bits from the file if it was written to by someone other than the owner. + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + } + + let data = self.get_inode_handle_data(inode, handle)?; + let f = data.file.read().unwrap(); + r.read_to(&f, size as usize, offset) + } + + fn getattr( + &self, + _ctx: Context, + inode: Inode, + _handle: Option, + ) -> io::Result<(libc::stat64, Duration)> { + self.do_getattr(inode) + } + + fn setattr( + &self, + _ctx: Context, + inode: Inode, + attr: libc::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(libc::stat64, Duration)> { + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get the file identifier - either from handle or path + let file_id = if let Some(handle) = handle { + // Get the handle data + let handles = self.handles.read().unwrap(); + let handle_data = handles.get(&handle).ok_or_else(ebadf)?; + let file = handle_data.file.read().unwrap(); + FileId::Fd(file.as_raw_fd()) + } else { + let fd_str = Self::data_to_fd_str(&inode_data)?; + FileId::Path(fd_str) + }; + + // Handle mode changes + if valid.contains(SetattrValid::MODE) { + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + match file_id { + FileId::Fd(fd) => libc::fchmod(fd, attr.st_mode), + FileId::Path(ref p) => { + libc::fchmodat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), attr.st_mode, 0) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle ownership changes + if valid.intersects(SetattrValid::UID | SetattrValid::GID) { + let uid = if valid.contains(SetattrValid::UID) { + attr.st_uid + } else { + // Cannot use -1 here because these are unsigned values. + u32::MAX + }; + + let gid = if valid.contains(SetattrValid::GID) { + attr.st_gid + } else { + // Cannot use -1 here because these are unsigned values. + u32::MAX + }; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::fchownat( + inode_data.file.as_raw_fd(), + EMPTY_CSTR.as_ptr(), + uid, + gid, + libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle size changes + if valid.contains(SetattrValid::SIZE) { + // Safe because this doesn't modify any memory and we check the return value. + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::ftruncate(fd, attr.st_size) }, + _ => { + // There is no `ftruncateat` so we need to get a new fd and truncate it. + let f = self.open_inode(inode, libc::O_NONBLOCK | libc::O_RDWR)?; + unsafe { libc::ftruncate(f.as_raw_fd(), attr.st_size) } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle timestamp changes + if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) { + let mut tvs = [ + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + ]; + + if valid.contains(SetattrValid::ATIME_NOW) { + tvs[0].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::ATIME) { + tvs[0].tv_sec = attr.st_atime; + tvs[0].tv_nsec = attr.st_atime_nsec; + } + + if valid.contains(SetattrValid::MTIME_NOW) { + tvs[1].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::MTIME) { + tvs[1].tv_sec = attr.st_mtime; + tvs[1].tv_nsec = attr.st_mtime_nsec; + } + + // Safe because this doesn't modify any memory and we check the return value + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::futimens(fd, tvs.as_ptr()) }, + FileId::Path(ref p) => unsafe { + libc::utimensat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), tvs.as_ptr(), 0) + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Return the updated attributes and timeout + self.do_getattr(inode) + } + + fn rename( + &self, + _ctx: Context, + olddir: Inode, + oldname: &CStr, + newdir: Inode, + newname: &CStr, + flags: u32, + ) -> io::Result<()> { + Self::validate_name(oldname)?; + Self::validate_name(newname)?; + self.do_rename(olddir, oldname, newdir, newname, flags) + } + + fn mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mknod(ctx, parent, name, mode, rdev, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn link( + &self, + _ctx: Context, + inode: Inode, + newparent: Inode, + newname: &CStr, + ) -> io::Result { + Self::validate_name(newname)?; + let entry = self.do_link(inode, newparent, newname)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_symlink(ctx, linkname, parent, name, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result> { + self.do_readlink(inode) + } + + fn flush( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + _lock_owner: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + // Since this method is called whenever an fd is closed in the client, we can emulate that + // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe + // because this doesn't modify any memory and we check the return values. + unsafe { + let newfd = libc::dup(data.file.write().unwrap().as_raw_fd()); + if newfd < 0 { + return Err(io::Error::last_os_error()); + } + + if libc::close(newfd) < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + } + + fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return values. + let res = unsafe { + if datasync { + libc::fdatasync(fd) + } else { + libc::fsync(fd) + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Inode, + datasync: bool, + handle: Handle, + ) -> io::Result<()> { + self.fsync(ctx, inode, datasync, handle) + } + + fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> { + let inode_data = self.get_inode_data(inode)?; + let fd = inode_data.file.as_raw_fd(); + + let (st, _) = Self::statx(fd, None)?; + let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); + + if mode == libc::F_OK { + // The file exists since we were able to call `stat(2)` on it. + return Ok(()); + } + + if (mode & libc::R_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0) + && st.st_mode & 0o004 == 0 + { + return Err(io::Error::from_raw_os_error(libc::EACCES)); + } + + if (mode & libc::W_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0) + && st.st_mode & 0o002 == 0 + { + return Err(io::Error::from_raw_os_error(libc::EACCES)); + } + + // root can only execute something if it is executable by one of the owner, the group, or + // everyone. + if (mode & libc::X_OK) != 0 + && (ctx.uid != 0 || st.st_mode & 0o111 == 0) + && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0) + && st.st_mode & 0o001 == 0 + { + return Err(io::Error::from_raw_os_error(libc::EACCES)); + } + + Ok(()) + } + + fn setxattr( + &self, + _ctx: Context, + inode: Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + self.do_setxattr(inode, name, value, flags) + } + + fn getxattr( + &self, + _ctx: Context, + inode: Inode, + name: &CStr, + size: u32, + ) -> io::Result { + self.do_getxattr(inode, name, size) + } + + fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result { + self.do_listxattr(inode, size) + } + + fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> { + self.do_removexattr(inode, name) + } + + fn fallocate( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + self.do_fallocate(inode, handle, mode, offset, length) + } + + fn lseek( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + offset: u64, + whence: u32, + ) -> io::Result { + self.do_lseek(inode, handle, offset, whence) + } + + fn copyfilerange( + &self, + _ctx: Context, + inode_in: Inode, + handle_in: Handle, + offset_in: u64, + inode_out: Inode, + handle_out: Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + self.do_copyfilerange( + inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ) + } + + fn setupmapping( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + self.do_setupmapping(inode, foffset, len, flags, moffset, host_shm_base, shm_size) + } + + fn removemapping( + &self, + _ctx: Context, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + self.do_removemapping(requests, host_shm_base, shm_size) + } + + fn ioctl( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + _flags: u32, + cmd: u32, + _arg: u64, + _in_size: u32, + out_size: u32, + ) -> io::Result> { + self.do_ioctl(inode, handle, cmd, out_size) + } +} + +impl Drop for ScopedGid { + fn drop(&mut self) { + let res = unsafe { libc::syscall(libc::SYS_setresgid, -1, 0, -1) }; + if res != 0 { + log::error!( + "failed to restore gid back to root: {}", + io::Error::last_os_error() + ); + } + } +} + +impl Drop for ScopedUid { + fn drop(&mut self) { + let res = unsafe { libc::syscall(libc::SYS_setresuid, -1, 0, -1) }; + if res != 0 { + log::error!( + "failed to restore uid back to root: {}", + io::Error::last_os_error() + ); + } + } +} + +impl Default for Config { + fn default() -> Self { + Config { + entry_timeout: Duration::from_secs(5), + attr_timeout: Duration::from_secs(5), + cache_policy: Default::default(), + writeback: false, + root_dir: String::from("/"), + xattr: true, + proc_sfd_rawfd: None, + export_fsid: 0, + export_table: None, + layers: vec![], + } + } +} diff --git a/src/devices/src/virtio/fs/macos/overlayfs/fs.rs b/src/devices/src/virtio/fs/macos/overlayfs.rs similarity index 97% rename from src/devices/src/virtio/fs/macos/overlayfs/fs.rs rename to src/devices/src/virtio/fs/macos/overlayfs.rs index 79b7eca45..ba8f11489 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/fs.rs +++ b/src/devices/src/virtio/fs/macos/overlayfs.rs @@ -26,6 +26,14 @@ use crate::virtio::fs::fuse; use crate::virtio::fs::multikey::MultikeyBTreeMap; use crate::virtio::linux_errno::{linux_error, LINUX_ERANGE}; + +//-------------------------------------------------------------------------------------------------- +// Modules +//-------------------------------------------------------------------------------------------------- + +#[path = "../tests/overlayfs/mod.rs"] +mod tests; + //-------------------------------------------------------------------------------------------------- // Constants //-------------------------------------------------------------------------------------------------- @@ -46,7 +54,7 @@ const OWNER_PERMS_XATTR_KEY: &[u8] = b"user.vm.owner_perms\0"; const MAX_LAYERS: usize = 128; #[cfg(not(feature = "efi"))] -static INIT_BINARY: &[u8] = include_bytes!("../../../../../../../init/init"); +static INIT_BINARY: &[u8] = include_bytes!("../../../../../../init/init"); const INIT_CSTR: &[u8] = b"init.krun\0"; @@ -237,6 +245,9 @@ pub struct OverlayFs { /// Whether writeback caching is enabled writeback: AtomicBool, + /// Whether submounts are supported + announce_submounts: AtomicBool, + /// Configuration options config: Config, @@ -293,6 +304,7 @@ impl OverlayFs { init_handle: 0, map_windows: Mutex::new(HashMap::new()), writeback: AtomicBool::new(false), + announce_submounts: AtomicBool::new(false), config, filenames: Arc::new(RwLock::new(SymbolTable::new())), layer_roots: Arc::new(RwLock::new(layer_roots)), @@ -875,7 +887,7 @@ impl OverlayFs { &'a self, start_layer_idx: usize, path_segments: &[Symbol], - ) -> io::Result<(Entry, Vec>)> { + ) -> io::Result<(Entry, Arc, Vec>)> { let mut path_inodes = vec![]; // Start from the start_layer_idx and try each layer down to layer 0 @@ -894,7 +906,7 @@ impl OverlayFs { // Check if we already have this inode let inodes = self.inodes.read().unwrap(); if let Some(data) = inodes.get_alt(&alt_key) { - return Ok((self.create_entry(data.inode, st), path_inodes)); + return Ok((self.create_entry(data.inode, st), data.clone(), path_inodes)); } drop(inodes); @@ -907,7 +919,8 @@ impl OverlayFs { layer_idx, ); path_inodes.push(data.clone()); - return Ok((self.create_entry(inode, st), path_inodes)); + + return Ok((self.create_entry(inode, st), data, path_inodes)); } Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { // Continue to check lower layers @@ -941,7 +954,20 @@ impl OverlayFs { let symbol = self.intern_name(name)?; path_segments.push(symbol); - self.lookup_layer_by_layer(parent_data.layer_idx, &path_segments) + let (mut entry, child_data, path_inodes) = self.lookup_layer_by_layer(parent_data.layer_idx, &path_segments)?; + + // Set the submount flag if the entry is a directory and the submounts are announced + let mut attr_flags = 0; + if (entry.attr.st_mode & libc::S_IFMT) == libc::S_IFDIR + && self.announce_submounts.load(Ordering::Relaxed) + && child_data.dev != parent_data.dev + { + attr_flags |= fuse::ATTR_SUBMOUNT; + } + + entry.attr_flags = attr_flags; + + Ok((entry, path_inodes)) } /// Performs a raw stat syscall without any modifications to the returned stat structure. @@ -1354,7 +1380,7 @@ impl OverlayFs { let path_segments = inode_data.path.clone(); // Lookup the file to get all path inodes - let (_, path_inodes) = self.lookup_layer_by_layer(top_layer_idx, &path_segments)?; + let (_, _, path_inodes) = self.lookup_layer_by_layer(top_layer_idx, &path_segments)?; // Copy up the file self.copy_up(&path_inodes)?; @@ -1774,6 +1800,7 @@ impl OverlayFs { self.do_getattr(inode) } + /// Performs a mkdir operation fn do_mkdir( &self, ctx: Context, @@ -2050,11 +2077,21 @@ impl OverlayFs { // Copy up the source file to the top layer if needed let inode_data = self.ensure_top_layer(inode_data)?; + // Get source and destination paths + let src_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + + // Extraneous check to ensure the source file is not a symlink + let stat = Self::unpatched_stat(&FileId::Path(src_path.clone()))?; + if stat.st_mode & libc::S_IFMT == libc::S_IFLNK { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot link to a symlink", + )); + } + // Get and ensure new parent is in top layer let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; - // Get source and destination paths - let src_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; let dst_path = self.dev_ino_and_name_to_vol_path(new_parent_data.dev, new_parent_data.ino, new_name)?; @@ -2086,11 +2123,6 @@ impl OverlayFs { /// Decrements the reference count for an inode and removes it if the count reaches zero fn do_forget(&self, inode: Inode, count: u64) { - // Skip forgetting the root inode - if inode == self.init_inode { - return; - } - let mut inodes = self.inodes.write().unwrap(); if let Some(data) = inodes.get(&inode) { // Acquiring the write lock on the inode map prevents new lookups from incrementing the @@ -2204,7 +2236,7 @@ impl OverlayFs { return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); } - // Don't allow getting attributes for the root inode + // Don't allow getting attributes for init if inode == self.init_inode { return Err(linux_error(io::Error::from_raw_os_error(libc::ENODATA))); } @@ -2779,7 +2811,8 @@ impl FileSystem for OverlayFs { // Set the umask to 0 to ensure that all file permissions are set correctly unsafe { libc::umask(0o000) }; - let mut opts = FsOptions::empty(); + // Enable readdirplus if supported + let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; // Enable writeback caching if requested and supported if self.config.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { @@ -2787,9 +2820,10 @@ impl FileSystem for OverlayFs { self.writeback.store(true, Ordering::SeqCst); } - // Enable posix ACLs if supported - if capable.contains(FsOptions::POSIX_ACL) { - opts |= FsOptions::POSIX_ACL; + // Enable submounts if supported + if capable.contains(FsOptions::SUBMOUNTS) { + opts |= FsOptions::SUBMOUNTS; + self.announce_submounts.store(true, Ordering::Relaxed); } Ok(opts) @@ -3208,7 +3242,9 @@ impl FileSystem for OverlayFs { extensions: Extensions, ) -> io::Result<(Entry, Option, OpenOptions)> { Self::validate_name(name)?; - self.do_create(ctx, parent, name, mode, flags, umask, extensions) + let (entry, handle, opts) = self.do_create(ctx, parent, name, mode, flags, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok((entry, handle, opts)) } fn mknod( @@ -3222,7 +3258,9 @@ impl FileSystem for OverlayFs { extensions: Extensions, ) -> io::Result { Self::validate_name(name)?; - self.do_mknod(ctx, parent, name, mode, umask, extensions) + let entry = self.do_mknod(ctx, parent, name, mode, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) } fn fallocate( @@ -3301,17 +3339,6 @@ impl Default for Config { } } -// Add Default implementation for Context -impl Default for Context { - fn default() -> Self { - Context { - uid: 0, - gid: 0, - pid: 0, - } - } -} - //-------------------------------------------------------------------------------------------------- // External Functions //-------------------------------------------------------------------------------------------------- diff --git a/src/devices/src/virtio/fs/macos/overlayfs/mod.rs b/src/devices/src/virtio/fs/macos/overlayfs/mod.rs deleted file mode 100644 index 074462f86..000000000 --- a/src/devices/src/virtio/fs/macos/overlayfs/mod.rs +++ /dev/null @@ -1,5 +0,0 @@ -mod fs; -#[cfg(test)] -mod tests; - -pub use fs::*; diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index 0ec47817c..208414bb7 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -14,6 +14,8 @@ pub mod linux; pub use linux::fs_utils; #[cfg(target_os = "linux")] pub use linux::passthrough; +#[cfg(target_os = "linux")] +pub use linux::overlayfs; #[cfg(target_os = "macos")] pub mod macos; pub use kinds::*; diff --git a/src/devices/src/virtio/fs/multikey.rs b/src/devices/src/virtio/fs/multikey.rs index 27f7816c9..bcbcb2717 100644 --- a/src/devices/src/virtio/fs/multikey.rs +++ b/src/devices/src/virtio/fs/multikey.rs @@ -18,8 +18,8 @@ where // We need to keep a copy of the second key in the main map so that we can remove entries using // just the main key. Otherwise we would require the caller to provide both keys when calling // `remove`. - main: BTreeMap, - alt: BTreeMap, + pub main: BTreeMap, + pub alt: BTreeMap, } impl MultikeyBTreeMap diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs b/src/devices/src/virtio/fs/tests/overlayfs/create.rs similarity index 100% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/create.rs rename to src/devices/src/virtio/fs/tests/overlayfs/create.rs diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs b/src/devices/src/virtio/fs/tests/overlayfs/lookup.rs similarity index 99% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs rename to src/devices/src/virtio/fs/tests/overlayfs/lookup.rs index 3cc80511b..bc9afdb43 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/lookup.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/lookup.rs @@ -1,6 +1,9 @@ use std::{ffi::CString, io}; -use crate::virtio::{fs::filesystem::{Context, FileSystem}, fuse::FsOptions}; +use crate::virtio::{ + fs::filesystem::{Context, FileSystem}, + fuse::FsOptions, +}; use super::helper; diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs b/src/devices/src/virtio/fs/tests/overlayfs/metadata.rs similarity index 98% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs rename to src/devices/src/virtio/fs/tests/overlayfs/metadata.rs index 5371acb0a..bf9431f15 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/metadata.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/metadata.rs @@ -4,8 +4,7 @@ use crate::virtio::{ bindings::{self, LINUX_ENODATA, LINUX_ENOSYS}, fs::filesystem::{Context, FileSystem, GetxattrReply, ListxattrReply}, fuse::{FsOptions, SetattrValid}, - linux_errno::LINUX_ERANGE, - macos::overlayfs::{Config, OverlayFs}, + linux_errno::LINUX_ERANGE, overlayfs::{Config, OverlayFs}, }; use super::helper; @@ -304,7 +303,7 @@ fn test_setattr_copy_up() -> io::Result<()> { // Initialize filesystem fs.init(FsOptions::empty())?; - // Test setattr on file in lower layer (should trigger copy_up) + // Test setattr on file in lower layer let file1_name = CString::new("file1").unwrap(); let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; @@ -707,7 +706,9 @@ fn test_xattrs() -> io::Result<()> { // Try to read the xattr directly from the middle layer file (should not exist) let middle_layer_path = CString::new(middle_layer_file.to_str().unwrap()).unwrap(); let mut buf = vec![0; 100]; + #[cfg(target_os = "macos")] let res = unsafe { + #[cfg(target_os = "macos")] libc::getxattr( middle_layer_path.as_ptr(), middle_xattr_name.as_ptr(), @@ -717,12 +718,22 @@ fn test_xattrs() -> io::Result<()> { 0, ) }; + + #[cfg(target_os = "linux")] + let res = unsafe { + libc::getxattr( + middle_layer_path.as_ptr(), + middle_xattr_name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + ) + }; + assert!(res < 0, "Xattr should not exist on middle layer file"); let err = io::Error::last_os_error(); assert!( - err.raw_os_error().unwrap() == libc::ENOATTR - || err.raw_os_error().unwrap() == libc::ENODATA, - "Expected ENOATTR or ENODATA when reading xattr from middle layer file" + err.raw_os_error().unwrap() == libc::ENODATA, + "Expected ENODATA when reading xattr from middle layer file" ); // ---------- Test xattrs on nested directories ---------- diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs b/src/devices/src/virtio/fs/tests/overlayfs/misc.rs similarity index 95% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs rename to src/devices/src/virtio/fs/tests/overlayfs/misc.rs index 598197c9f..219e69f2d 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/misc.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/misc.rs @@ -2,7 +2,11 @@ use std::{ffi::CString, fs, io, os::unix::fs::PermissionsExt, path::PathBuf}; use tempfile::TempDir; -use crate::virtio::{fs::filesystem::{Context, FileSystem}, fuse::FsOptions, macos::overlayfs::{Config, OverlayFs}}; +use crate::virtio::{ + fs::filesystem::{Context, FileSystem}, + fuse::FsOptions, + overlayfs::{Config, OverlayFs}, +}; use super::helper; @@ -396,7 +400,7 @@ fn test_link_errors() -> io::Result<()> { // - dir1/ let layers = vec![vec![("file1", false, 0o644), ("dir1", true, 0o755)]]; - let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; let ctx = Context::default(); let file1_name = CString::new("file1").unwrap(); @@ -422,6 +426,24 @@ fn test_link_errors() -> io::Result<()> { .link(ctx, file1_entry.inode, dir1_entry.inode, &invalid_name) .is_err()); + // Test linking a symlink (should error) + // Create a symlink in the bottom layer + let symlink_path = temp_dirs[0].path().join("symlink"); + std::os::unix::fs::symlink("file1", &symlink_path)?; + + // Initialize filesystem to detect the new symlink + fs.init(FsOptions::empty())?; + + // Get the symlink's inode + let symlink_name = CString::new("symlink").unwrap(); + let symlink_entry = fs.lookup(ctx, 1, &symlink_name)?; + + // Try to create a hard link to the symlink (should fail) + let link_name = CString::new("link_to_symlink").unwrap(); + assert!(fs + .link(ctx, symlink_entry.inode, dir1_entry.inode, &link_name) + .is_err()); + Ok(()) } diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests.rs b/src/devices/src/virtio/fs/tests/overlayfs/mod.rs similarity index 89% rename from src/devices/src/virtio/fs/macos/overlayfs/tests.rs rename to src/devices/src/virtio/fs/tests/overlayfs/mod.rs index 1fffe98a5..85d6ea594 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/mod.rs @@ -22,10 +22,25 @@ mod remove; #[cfg(test)] mod write; +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl Default for crate::virtio::fs::filesystem::Context { + fn default() -> Self { + Self { + uid: 0, + gid: 0, + pid: 0, + } + } +} + //-------------------------------------------------------------------------------------------------- // Modules: Helper //-------------------------------------------------------------------------------------------------- +#[cfg(test)] mod helper { use std::{ fs::{self, File}, @@ -36,7 +51,7 @@ mod helper { use crate::virtio::{ fs::filesystem::{ZeroCopyReader, ZeroCopyWriter}, - macos::overlayfs::{Config, OverlayFs}, + fs::overlayfs::{Config, OverlayFs}, }; use tempfile::TempDir; @@ -158,13 +173,17 @@ mod helper { layers: layer_paths, ..Default::default() }; - + let overlayfs = OverlayFs::new(cfg)?; Ok((overlayfs, temp_dirs)) } // Debug utility to print the directory structure of each layer using tree command pub(super) fn debug_print_layers(temp_dirs: &[TempDir], show_perms: bool) -> io::Result<()> { + if Command::new("tree").arg("--version").output().is_err() { + println!("tree command is not accessible. please install it to see the layer directory structures."); + return Ok(()); + } println!("\n=== Layer Directory Structures ==="); for (i, dir) in temp_dirs.iter().enumerate() { diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs b/src/devices/src/virtio/fs/tests/overlayfs/open.rs similarity index 100% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/open.rs rename to src/devices/src/virtio/fs/tests/overlayfs/open.rs diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs b/src/devices/src/virtio/fs/tests/overlayfs/read.rs similarity index 98% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs rename to src/devices/src/virtio/fs/tests/overlayfs/read.rs index 6d997e1dd..715dd236b 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/read.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/read.rs @@ -1,7 +1,9 @@ use std::{ffi::CString, fs, io}; use crate::virtio::{ - fs::filesystem::{Context, FileSystem}, fuse::FsOptions, macos::overlayfs::tests::helper::TestContainer, + fs::filesystem::{Context, FileSystem}, + fuse::FsOptions, + overlayfs::tests::helper::TestContainer, }; use super::helper; @@ -173,31 +175,33 @@ fn test_readlink_errors() -> io::Result<()> { // Initialize filesystem fs.init(FsOptions::empty())?; - // Test readlink on regular file (should fail) + // Test readlink on regular file (should fail with either EINVAL or ENOENT) let file_name = CString::new("regular_file").unwrap(); let file_entry = fs.lookup(Context::default(), 1, &file_name)?; let result = fs.readlink(Context::default(), file_entry.inode); match result { Err(e) => { - assert_eq!( - e.raw_os_error(), - Some(libc::EINVAL), - "Reading link of regular file should return EINVAL" + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EINVAL || code == libc::ENOENT, + "Reading link of regular file should return either EINVAL or ENOENT, got {}", + code ); } Ok(_) => panic!("Expected error for regular file"), } - // Test readlink on directory (should fail) + // Test readlink on directory (should fail with either EINVAL or ENOENT) let dir_name = CString::new("directory").unwrap(); let dir_entry = fs.lookup(Context::default(), 1, &dir_name)?; let result = fs.readlink(Context::default(), dir_entry.inode); match result { Err(e) => { - assert_eq!( - e.raw_os_error(), - Some(libc::EINVAL), - "Reading link of directory should return EINVAL" + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EINVAL || code == libc::ENOENT, + "Reading link of directory should return either EINVAL or ENOENT, got {}", + code ); } Ok(_) => panic!("Expected error for directory"), @@ -755,8 +759,6 @@ fn test_readdir_with_offset() -> io::Result<()> { }, )?; - println!("entries: {:?}", entries); - // Read the second batch of directory entries starting from the last offset let mut more_entries = Vec::new(); fs.readdir(ctx, entry.inode, handle, 4096, last_offset, |dir_entry| { @@ -765,15 +767,12 @@ fn test_readdir_with_offset() -> io::Result<()> { Ok(1) })?; - println!("more_entries: {:?}", more_entries); - // Verify that we got all entries between the two reads let all_entries: Vec<_> = entries .into_iter() .chain(more_entries.into_iter()) .collect(); - println!("all_entries: {:?}", all_entries); assert!(all_entries.contains(&"file1".to_string())); assert!(all_entries.contains(&"file2".to_string())); assert!(all_entries.contains(&"file3".to_string())); diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs b/src/devices/src/virtio/fs/tests/overlayfs/remove.rs similarity index 93% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs rename to src/devices/src/virtio/fs/tests/overlayfs/remove.rs index 38fd3087d..69c33f5e4 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/remove.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/remove.rs @@ -177,11 +177,11 @@ fn test_unlink_errors() -> io::Result<()> { match fs.unlink(ctx, 1, &invalid_name) { Ok(_) => panic!("Unlink succeeded with invalid name"), Err(e) => { - assert_eq!( - e.kind(), - io::ErrorKind::PermissionDenied, - "Expected PermissionDenied error, got {:?}", - e.kind() + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EPERM || code == libc::ENOENT, + "Expected EPERM or ENOENT error for path traversal, got {}", + code ); } } @@ -238,20 +238,20 @@ fn test_unlink_complex_layers() -> io::Result<()> { // Test 3: Unlink a file from lowest layer let file1_name = CString::new("file1.txt").unwrap(); fs.unlink(ctx, dir1_entry.inode, &file1_name)?; - // // Expect a whiteout in the top layer but the original file remains in lower layer - // assert!(temp_dirs[2].path().join("dir1/.wh.file1.txt").exists()); - // assert!(temp_dirs[0].path().join("dir1/file1.txt").exists()); - - // // Test 4: Unlink a file from lowest layer that is already whiteouted - // let file2_name = CString::new("file2.txt").unwrap(); - // // First unlink to create the whiteout - // fs.unlink(ctx, dir1_entry.inode, &file2_name)?; - // assert!(temp_dirs[2].path().join("dir1/.wh.file2.txt").exists()); - // // Second attempt should fail with ENOENT - // match fs.unlink(ctx, dir1_entry.inode, &file2_name) { - // Ok(_) => panic!("Unlink succeeded on already whiteouted file"), - // Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), - // } + // Expect a whiteout in the top layer but the original file remains in lower layer + assert!(temp_dirs[2].path().join("dir1/.wh.file1.txt").exists()); + assert!(temp_dirs[0].path().join("dir1/file1.txt").exists()); + + // Test 4: Unlink a file from lowest layer that is already whiteouted + let file2_name = CString::new("file2.txt").unwrap(); + // First unlink to create the whiteout + fs.unlink(ctx, dir1_entry.inode, &file2_name)?; + assert!(temp_dirs[2].path().join("dir1/.wh.file2.txt").exists()); + // Second attempt should fail with ENOENT + match fs.unlink(ctx, dir1_entry.inode, &file2_name) { + Ok(_) => panic!("Unlink succeeded on already whiteouted file"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } Ok(()) } @@ -432,11 +432,11 @@ fn test_rmdir_errors() -> io::Result<()> { match fs.rmdir(ctx, 1, &invalid_name) { Ok(_) => panic!("rmdir succeeded with invalid name"), Err(e) => { - assert_eq!( - e.kind(), - io::ErrorKind::PermissionDenied, - "Expected PermissionDenied error, got {:?}", - e.kind() + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EPERM || code == libc::ENOENT, + "Expected EPERM or ENOENT error for path traversal, got {}", + code ); } } diff --git a/src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs b/src/devices/src/virtio/fs/tests/overlayfs/write.rs similarity index 99% rename from src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs rename to src/devices/src/virtio/fs/tests/overlayfs/write.rs index 86e8bfb49..a823f8837 100644 --- a/src/devices/src/virtio/fs/macos/overlayfs/tests/write.rs +++ b/src/devices/src/virtio/fs/tests/overlayfs/write.rs @@ -1,6 +1,6 @@ use std::{ffi::CString, io}; -use crate::virtio::{fs::filesystem::{Context, FileSystem}, macos::overlayfs::tests::helper::TestContainer}; +use crate::virtio::{fs::filesystem::{Context, FileSystem}, overlayfs::tests::helper::TestContainer}; use super::helper; From 37662ee07a0aeea21751d53a6f939cd123d46535 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Sat, 19 Apr 2025 23:42:29 +0100 Subject: [PATCH 08/14] feat(mount): process /etc/fstab during initialization (#12) Add mount -a command to process /etc/fstab entries, enabling virtiofs shares --- init/init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/init/init.c b/init/init.c index 93553b736..6646d969a 100644 --- a/init/init.c +++ b/init/init.c @@ -448,6 +448,9 @@ static int mount_filesystems() /* May fail if already exists and that's fine. */ symlink("/proc/self/fd", "/dev/fd"); + /* Process /etc/fstab to mount additional filesystems, including virtiofs shares */ + system("/bin/mount -a") + return 0; } From ed7ff79e732249e46c9d8b2019192c5c4a8d22c0 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Sat, 19 Apr 2025 23:47:47 +0100 Subject: [PATCH 09/14] fix: typo --- init/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/init.c b/init/init.c index 6646d969a..66b7932f4 100644 --- a/init/init.c +++ b/init/init.c @@ -449,7 +449,7 @@ static int mount_filesystems() symlink("/proc/self/fd", "/dev/fd"); /* Process /etc/fstab to mount additional filesystems, including virtiofs shares */ - system("/bin/mount -a") + system("/bin/mount -a"); return 0; } From 8e0efd3f28f63a64219d90ba209a277d2b662f34 Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Sun, 20 Apr 2025 00:18:22 +0100 Subject: [PATCH 10/14] refactor(vsock): more permissive binding for make public scope (#13) Combine scope 2 (Public) and scope 3 (Any) to have the same behavior, both allowing binding to any IP address. This simplifies the logic by removing the public IP check since both scopes now permit unrestricted IP binding. --- src/devices/src/virtio/vsock/ip_filter.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/devices/src/virtio/vsock/ip_filter.rs b/src/devices/src/virtio/vsock/ip_filter.rs index 4fb81fe18..94bf39ff6 100644 --- a/src/devices/src/virtio/vsock/ip_filter.rs +++ b/src/devices/src/virtio/vsock/ip_filter.rs @@ -85,10 +85,8 @@ impl IpFilterConfig { // Scope 1: Group - Allow binding within the subnet if no specific IP given // If no subnet is specified, behaves like scope 0 (deny all) 1 => self.subnet.map_or(false, |subnet| subnet.contains(bind_ip)), - // Scope 2: Public - Allow binding to public IPs if no specific IP given - 2 => !Self::is_private(bind_ip), - // Scope 3: Any - Allow binding to any IP if no specific IP given - 3 => true, + // Scope 2 & 3: Any & Public - Allow binding to any IP if no specific IP given + 2 | 3 => true, _ => false, // Invalid scope (scope 0 already handled) } } From 5500334cfd14505a0057ccc04469bdd0113b11af Mon Sep 17 00:00:00 2001 From: Stephen Akinyemi Date: Sun, 20 Apr 2025 11:30:29 +0100 Subject: [PATCH 11/14] feat(init): implement native virtiofs fstab mounting (#14) Replace system("/bin/mount -a") with direct mounting of virtiofs entries from /etc/fstab. This provides better error handling and eliminates dependency on mount(8) binary. - Add mkdir_p() helper for recursive directory creation - Add is_mounted() to check existing mount points - Add clean_opts() to handle mount options - Add mount_fstab_virtiofs() to process fstab entries - Only mount virtiofs entries, leaving other filesystems unmounted --- init/init.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 164 insertions(+), 2 deletions(-) diff --git a/init/init.c b/init/init.c index 66b7932f4..0ab327656 100644 --- a/init/init.c +++ b/init/init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "jsmn.h" @@ -389,6 +390,167 @@ static int chroot_luks() } #endif +/* mkdir -p (recursively create all parents) */ +static int mkdir_p(const char *path, mode_t mode) +{ + char tmp[256]; + char *p = NULL; + size_t len; + + if (!path || !*path) return -1; + len = strnlen(path, sizeof(tmp) - 1); + memcpy(tmp, path, len); + tmp[len] = '\0'; + + if (tmp[len - 1] == '/') + tmp[len - 1] = '\0'; + + for (p = tmp + 1; *p; ++p) { + if (*p == '/') { + *p = '\0'; + if (mkdir(tmp, mode) < 0 && errno != EEXIST) + return -1; + *p = '/'; + } + } + if (mkdir(tmp, mode) < 0 && errno != EEXIST) + return -1; + return 0; +} + +/* Return: 1 = same fs already mounted + * -1 = dir busy with different type/tag + * 0 = not mounted yet + */ +static int is_mounted(const char *dir, const char *src, const char *type) +{ + FILE *fp = setmntent("/proc/self/mounts", "r"); + if (!fp) return 0; /* silent best-effort */ + + struct mntent *m; + int found = 0; + + while ((m = getmntent(fp)) != NULL) { + if (strcmp(m->mnt_dir, dir) == 0) { + if (strcmp(m->mnt_type, type) != 0) + found = -1; /* same dir, other fstype */ + else + found = (strcmp(m->mnt_fsname, src) == 0) ? 1 : -1; + break; + } + } + endmntent(fp); + return found; +} + +/* Strip the single word "defaults" (and empty commas) from opt string. + * Returns pointer inside `buf`. buf must persist until mount(2) call. + */ +static const char *clean_opts(const char *orig, char *buf, size_t buflen) +{ + if (!orig || !*orig) return NULL; + if (strcmp(orig, "defaults") == 0) return NULL; + + /* quick path: if the substring "defaults" not present, pass as-is */ + if (!strstr(orig, "defaults")) return orig; + + /* otherwise build a filtered copy */ + char *dst = buf; + const char *tok; + char tmp[256]; + strncpy(tmp, orig, sizeof(tmp)-1); + tmp[sizeof(tmp)-1] = '\0'; + + for (tok = strtok(tmp, ","); tok; tok = strtok(NULL, ",")) { + if (strcmp(tok, "defaults") == 0 || *tok == '\0') + continue; + size_t n = snprintf(dst, buflen - (dst - buf), "%s,", tok); + dst += n; + } + if (dst != buf) *(dst - 1) = '\0'; /* remove trailing comma */ + return (dst == buf) ? NULL : buf; /* all stripped? -> NULL */ +} + +/* Mount every virtiofs entry found in /etc/fstab. + * Idempotent, silent on success, logs only actionable errors. */ +static int mount_fstab_virtiofs(void) +{ + FILE *fp = setmntent("/etc/fstab", "r"); + if (!fp) /* no fstab → nothing to do, not an error */ + return 0; + + struct mntent *e; + int rc = 0; + + while ((e = getmntent(fp)) != NULL) { + /* ─────────── 1. we only care about virtiofs rows ─────────── */ + if (strcmp(e->mnt_type, "virtiofs") != 0) + continue; + + if (!e->mnt_fsname[0] || !e->mnt_dir[0]) { + fprintf(stderr, + "virtiofs-init: malformed fstab line – skipped\n"); + rc = -1; + continue; + } + + /* ─────────── 2. make local copies BEFORE is_mounted() ─────── */ + char fsname[256], dir[256], opts[256]; + strncpy(fsname, e->mnt_fsname, sizeof(fsname) - 1); + strncpy(dir, e->mnt_dir, sizeof(dir) - 1); + strncpy(opts, e->mnt_opts, sizeof(opts) - 1); + fsname[sizeof(fsname) - 1] = + dir[sizeof(dir) - 1] = + opts[sizeof(opts) - 1] = '\0'; + + /* ─────────── 3. ensure mount‑point exists (mkdir -p) ───────── */ + if (mkdir_p(dir, 0755) < 0) { + fprintf(stderr, + "virtiofs-init: cannot create %s: %s\n", + dir, strerror(errno)); + rc = -1; + continue; + } + + /* ─────────── 4. skip if already mounted / busy ─────────────── */ + switch (is_mounted(dir, fsname, "virtiofs")) { + case 1: continue; /* identical mount already there */ + case -1: fprintf(stderr, + "virtiofs-init: %s busy – skipped\n", dir); + rc = -1; + continue; + } + + /* ─────────── 5. translate common flags BEFORE they vanish ──── */ + unsigned long flags = 0; + struct mntent fake = { .mnt_opts = opts }; + if (hasmntopt(&fake, "ro")) flags |= MS_RDONLY; + if (hasmntopt(&fake, "nosuid")) flags |= MS_NOSUID; + if (hasmntopt(&fake, "nodev")) flags |= MS_NODEV; + if (hasmntopt(&fake, "noexec")) flags |= MS_NOEXEC; + + /* Clean "defaults" out of the option list */ + char optbuf[256]; + const char *data = clean_opts(opts, optbuf, sizeof(optbuf)); + + /* ─────────── 6. actual mount attempt ───────────────────────── */ + if (mount(fsname, dir, "virtiofs", flags, data) < 0) { + if (errno == ENODEV || errno == ENOENT) { + fprintf(stderr, + "virtiofs-init: tag %s absent – skipped\n", fsname); + } else if (errno != EBUSY) { + fprintf(stderr, + "virtiofs-init: mount %s→%s failed: %s\n", + fsname, dir, strerror(errno)); + rc = -1; + } + } + } + + endmntent(fp); + return rc; +} + static int mount_filesystems() { char *const DIRS_LEVEL1[] = {"/dev", "/proc", "/sys"}; @@ -448,8 +610,8 @@ static int mount_filesystems() /* May fail if already exists and that's fine. */ symlink("/proc/self/fd", "/dev/fd"); - /* Process /etc/fstab to mount additional filesystems, including virtiofs shares */ - system("/bin/mount -a"); + /* Mount virtiofs shares from /etc/fstab (if any) */ + mount_fstab_virtiofs(); return 0; } From d1c272ed042145caf656428611ebc6bf7f299239 Mon Sep 17 00:00:00 2001 From: frc4533-lincoln <132951735+frc4533-lincoln@users.noreply.github.com> Date: Tue, 27 May 2025 13:42:34 -0400 Subject: [PATCH 12/14] Switch to my libc fork so it will build on musl --- src/devices/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 7d179c3f8..0fe5f265c 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -19,7 +19,7 @@ intaglio = "1.10.0" bitflags = "1.2.0" crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = ">=0.2.39" +libc = { git = "https://github.com/frc4533-lincoln/libc.git", branch = "main" } log = "0.4.0" nix = { version = "0.24.1", features = ["poll"] } pw = { package = "pipewire", version = "0.8.0", optional = true } From b96a88d1dfb1b84cd9d7f7f3d75515c0adfbc63b Mon Sep 17 00:00:00 2001 From: frc4533-lincoln <132951735+frc4533-lincoln@users.noreply.github.com> Date: Tue, 27 May 2025 15:36:51 -0400 Subject: [PATCH 13/14] Fix for musl systems and update deps --- Cargo.lock | 624 ++++++++++++------- Cargo.toml | 9 + src/arch/Cargo.toml | 8 +- src/cpuid/Cargo.toml | 6 +- src/cpuid/src/transformer/common.rs | 2 +- src/devices/Cargo.toml | 4 +- src/devices/src/virtio/fs/linux/overlayfs.rs | 12 + src/imago/Cargo.toml | 13 +- src/kernel/Cargo.toml | 2 +- src/libkrun/Cargo.toml | 2 +- src/smbios/Cargo.toml | 2 +- src/utils/Cargo.toml | 4 +- src/vmm/Cargo.toml | 10 +- src/vmm/src/linux/vstate.rs | 10 +- src/vmm/src/signal_handler.rs | 8 +- 15 files changed, 459 insertions(+), 257 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 385ce35c9..f70a2941f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -44,7 +44,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "libc", + "libc 0.2.172", ] [[package]] @@ -59,9 +59,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arch" @@ -70,7 +70,7 @@ dependencies = [ "arch_gen", "kvm-bindings", "kvm-ioctls", - "libc", + "libc 1.0.0-alpha.1", "smbios", "utils", "vm-fdt", @@ -83,9 +83,9 @@ version = "0.1.0" [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", @@ -99,7 +99,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", - "libc", + "libc 0.2.172", "winapi", ] @@ -111,13 +111,13 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ "addr2line", "cfg-if", - "libc", + "libc 0.2.172", "miniz_oxide", "object", "rustc-demangle", @@ -146,16 +146,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "annotate-snippets", - "bitflags 2.6.0", + "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools", + "itertools 0.12.1", "lazy_static", "lazycell", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", + "shlex", + "syn", +] + +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.9.1", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", "shlex", "syn", ] @@ -174,15 +194,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "byteorder" @@ -196,15 +216,15 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "190baaad529bcfbde9e1a19022c42781bdb6ff9de25721abdb8fd98c0807730b" dependencies = [ - "libc", + "libc 0.2.172", "thiserror", ] [[package]] name = "cc" -version = "1.2.1" +version = "1.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" dependencies = [ "shlex", ] @@ -236,16 +256,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -255,7 +275,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", - "libc", + "libc 0.2.172", "libloading", ] @@ -309,18 +329,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "curl" @@ -329,7 +349,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9fb4d13a1be2b58f14d60adba57c9834b78c62fd86c3e76a148f732686e9265" dependencies = [ "curl-sys", - "libc", + "libc 0.2.172", "openssl-probe", "openssl-sys", "schannel", @@ -339,12 +359,12 @@ dependencies = [ [[package]] name = "curl-sys" -version = "0.4.78+curl-8.11.0" +version = "0.4.80+curl-8.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eec768341c5c7789611ae51cf6c459099f22e64a5d5d0ce4892434e33821eaf" +checksum = "55f7df2eac63200c3ab25bde3b2268ef2ee56af3d238e76d61f01c3c49bff734" dependencies = [ "cc", - "libc", + "libc 0.2.172", "libz-sys", "openssl-sys", "pkg-config", @@ -365,7 +385,7 @@ dependencies = [ "imago", "intaglio", "ipnetwork", - "libc", + "libc 1.0.0-alpha.1", "log", "lru", "nix 0.24.3", @@ -397,7 +417,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ - "libc", + "libc 0.2.172", "option-ext", "redox_users", "windows-sys 0.48.0", @@ -405,9 +425,9 @@ dependencies = [ [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "env_logger" @@ -424,17 +444,17 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.10" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ - "libc", + "libc 0.2.172", "windows-sys 0.59.0", ] @@ -446,9 +466,9 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -456,9 +476,9 @@ dependencies = [ [[package]] name = "foldhash" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "foreign-types" @@ -566,25 +586,25 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", - "libc", + "libc 0.2.172", "wasi 0.11.0+wasi-snapshot-preview1", ] [[package]] name = "getrandom" -version = "0.3.1" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", - "libc", - "wasi 0.13.3+wasi-0.2.2", - "windows-targets 0.52.6", + "libc 0.2.172", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -595,15 +615,15 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "hashbrown" -version = "0.15.1" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "allocator-api2", "equivalent", @@ -622,7 +642,7 @@ version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ - "libc", + "libc 0.2.172", ] [[package]] @@ -633,9 +653,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "humantime" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] name = "hvf" @@ -649,14 +669,15 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -676,7 +697,7 @@ version = "0.1.3" dependencies = [ "async-trait", "bincode", - "libc", + "libc 1.0.0-alpha.1", "miniz_oxide", "rustc_version", "serde", @@ -687,9 +708,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", @@ -722,18 +743,28 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -758,22 +789,22 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4933174d0cc4b77b958578cd45784071cc5ae212c2d78fbd755aaaa6dfa71a" +checksum = "3b13baf7bdfda2e10bcb109fcb099ef40cff82374eb6b7cdcf4695bdec4e522c" dependencies = [ "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "337d1afa126368bbd6a5c328048f71a69a737e9afe7e436b392a8f8d770c9171" +checksum = "083c460d5a272c2f22205973e319147b791d92a288d7d7a8d4c6194f95229440" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "kvm-bindings", - "libc", + "libc 0.2.172", "vmm-sys-util", ] @@ -791,9 +822,14 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.164" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "libc" +version = "1.0.0-alpha.1" +source = "git+https://github.com/frc4533-lincoln/libc.git?branch=main#0908643caf9c6a05be361820653322e3ea69fab9" [[package]] name = "libkrun" @@ -804,7 +840,7 @@ dependencies = [ "env_logger", "hvf", "ipnetwork", - "libc", + "libc 1.0.0-alpha.1", "log", "once_cell", "polly", @@ -814,12 +850,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.53.0", ] [[package]] @@ -828,8 +864,8 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.6.0", - "libc", + "bitflags 2.9.1", + "libc 0.2.172", ] [[package]] @@ -838,11 +874,11 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65f3a4b81b2a2d8c7f300643676202debd1b7c929dbf5c9bb89402ea11d19810" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "cc", "convert_case", "cookie-factory", - "libc", + "libc 0.2.172", "libspa-sys", "nix 0.27.1", "nom", @@ -855,40 +891,40 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf0d9716420364790e85cbb9d3ac2c950bde16a7dd36f3209b7dfdfc4a24d01f" dependencies = [ - "bindgen", + "bindgen 0.69.5", "cc", "system-deps", ] [[package]] name = "libz-sys" -version = "1.1.20" +version = "1.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" +checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" dependencies = [ "cc", - "libc", + "libc 0.2.172", "pkg-config", "vcpkg", ] [[package]] name = "linux-raw-sys" -version = "0.4.15" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" [[package]] name = "log" -version = "0.4.22" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" -version = "0.12.5" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ "hashbrown", ] @@ -925,9 +961,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -940,7 +976,7 @@ checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" dependencies = [ "bitflags 1.3.2", "cfg-if", - "libc", + "libc 0.2.172", "memoffset 0.6.5", ] @@ -952,7 +988,7 @@ checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" dependencies = [ "bitflags 1.3.2", "cfg-if", - "libc", + "libc 0.2.172", "memoffset 0.7.1", "pin-utils", ] @@ -963,9 +999,9 @@ version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "cfg-if", - "libc", + "libc 0.2.172", ] [[package]] @@ -989,29 +1025,29 @@ dependencies = [ [[package]] name = "object" -version = "0.36.5" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl" -version = "0.10.68" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "cfg-if", "foreign-types", - "libc", + "libc 0.2.172", "once_cell", "openssl-macros", "openssl-sys", @@ -1030,18 +1066,18 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.104" +version = "0.9.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" dependencies = [ "cc", - "libc", + "libc 0.2.172", "pkg-config", "vcpkg", ] @@ -1054,9 +1090,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "pin-project-lite" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -1071,8 +1107,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08e645ba5c45109106d56610b3ee60eb13a6f2beb8b74f8dc8186cf261788dda" dependencies = [ "anyhow", - "bitflags 2.6.0", - "libc", + "bitflags 2.9.1", + "libc 0.2.172", "libspa", "libspa-sys", "nix 0.27.1", @@ -1087,39 +1123,49 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "849e188f90b1dda88fe2bfe1ad31fe5f158af2c98f80fb5d13726c44f3f01112" dependencies = [ - "bindgen", + "bindgen 0.69.5", "libspa-sys", "system-deps", ] [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "polly" version = "0.0.1" dependencies = [ - "libc", + "libc 0.2.172", "utils", ] [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy 0.8.25", +] + +[[package]] +name = "prettyplease" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ - "zerocopy 0.7.35", + "proc-macro2", + "syn", ] [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -1136,25 +1182,31 @@ dependencies = [ "flate2", "hex", "lazy_static", - "libc", + "libc 0.2.172", ] [[package]] name = "quote" -version = "1.0.37" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", + "libc 0.2.172", "rand_chacha", "rand_core", ] @@ -1175,7 +1227,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", ] [[package]] @@ -1193,7 +1245,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.15", + "getrandom 0.2.16", "libredox", "thiserror", ] @@ -1229,9 +1281,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "remain" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46aef80f842736de545ada6ec65b81ee91504efd6853f4b96de7414c42ae7443" +checksum = "d7ef12e84481ab4006cb942f8682bba28ece7270743e649442027c5db87df126" dependencies = [ "proc-macro2", "quote", @@ -1250,6 +1302,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -1261,24 +1319,30 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.44" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "errno", - "libc", + "libc 0.2.172", "linux-raw-sys", "windows-sys 0.59.0", ] +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + [[package]] name = "rutabaga_gfx" version = "0.1.2" dependencies = [ "anyhow", "cfg-if", - "libc", + "libc 0.2.172", "log", "nix 0.26.4", "pkg-config", @@ -1290,30 +1354,30 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] [[package]] name = "semver" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -1329,18 +1393,18 @@ dependencies = [ [[package]] name = "serde_bytes" -version = "0.11.15" +version = "0.11.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" dependencies = [ "serde", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -1349,9 +1413,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -1384,7 +1448,7 @@ dependencies = [ "hex", "iocuddle", "lazy_static", - "libc", + "libc 0.2.172", "openssl", "rdrand", "serde", @@ -1411,9 +1475,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "smbios" @@ -1424,11 +1488,11 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ - "libc", + "libc 0.2.172", "windows-sys 0.52.0", ] @@ -1440,9 +1504,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.87" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -1470,13 +1534,12 @@ checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tempfile" -version = "3.17.1" +version = "3.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" dependencies = [ - "cfg-if", "fastrand", - "getrandom 0.3.1", + "getrandom 0.3.3", "once_cell", "rustix", "windows-sys 0.59.0", @@ -1513,9 +1576,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.41.1" +version = "1.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" dependencies = [ "backtrace", "pin-project-lite", @@ -1523,9 +1586,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.19" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" dependencies = [ "serde", "serde_spanned", @@ -1535,18 +1598,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.22.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ "indexmap", "serde", @@ -1557,9 +1620,9 @@ dependencies = [ [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -1568,9 +1631,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", @@ -1579,18 +1642,18 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-segmentation" @@ -1610,18 +1673,20 @@ version = "0.1.0" dependencies = [ "bitflags 1.3.2", "env_logger", - "libc", + "libc 1.0.0-alpha.1", "log", "vmm-sys-util", ] [[package]] name = "uuid" -version = "1.11.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ + "js-sys", "serde", + "wasm-bindgen", ] [[package]] @@ -1638,9 +1703,12 @@ checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" [[package]] name = "virtio-bindings" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1711e61c00f8cb450bd15368152a1e37a12ef195008ddc7d0f4812f9e2b30a68" +checksum = "cd2fe65550801ac106389d41f34cb1b32c4f7aaedf1b6cda1da3a211880de7f6" +dependencies = [ + "bindgen 0.71.1", +] [[package]] name = "vm-fdt" @@ -1654,7 +1722,7 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1720e7240cdc739f935456eb77f370d7e9b2a3909204da1e2b47bef1137a013" dependencies = [ - "libc", + "libc 0.2.172", "thiserror", "winapi", ] @@ -1676,7 +1744,7 @@ dependencies = [ "kernel", "kvm-bindings", "kvm-ioctls", - "libc", + "libc 1.0.0-alpha.1", "log", "nix 0.24.3", "polly", @@ -1697,7 +1765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" dependencies = [ "bitflags 1.3.2", - "libc", + "libc 0.2.172", ] [[package]] @@ -1708,33 +1776,33 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasi" -version = "0.13.3+wasi-0.2.2" +version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -1743,9 +1811,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1753,9 +1821,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -1766,9 +1834,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "winapi" @@ -1803,11 +1874,61 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.52.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ - "windows-targets 0.52.6", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", ] [[package]] @@ -1861,13 +1982,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -1880,6 +2017,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -1892,6 +2035,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -1904,12 +2053,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -1922,6 +2083,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -1934,6 +2101,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -1946,6 +2119,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -1958,22 +2137,28 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" -version = "0.6.20" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" dependencies = [ "memchr", ] [[package]] name = "wit-bindgen-rt" -version = "0.33.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", ] [[package]] @@ -1997,12 +2182,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" dependencies = [ - "byteorder", - "zerocopy-derive 0.7.35", + "zerocopy-derive 0.8.25", ] [[package]] @@ -2018,9 +2202,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index a8da0b5b0..9d4be212e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,15 @@ members = ["src/libkrun"] resolver = "2" +[workspace.dependencies] +libc = { git = "https://github.com/frc4533-lincoln/libc.git", branch = "main", features = ["extra_traits"] } +vmm-sys-util = "0.12.1" +kvm-bindings = { version = "0.11", features = ["fam-wrappers"] } +kvm-ioctls = "0.21" +vm-memory = { version = "0.16", features = ["backend-mmap"] } +tokio = { version = "1", features = ["rt", "sync"] } +serde = { version = "1", features = ["derive"] } + [profile.dev] #panic = "abort" diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index baaedda55..a03bbf1b6 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -10,16 +10,16 @@ amd-sev = [ "tee" ] efi = [] [dependencies] -libc = ">=0.2.39" -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +libc.workspace = true +vm-memory.workspace = true arch_gen = { path = "../arch_gen" } smbios = { path = "../smbios" } utils = { path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings.workspace = true +kvm-ioctls.workspace = true [target.'cfg(target_arch = "aarch64")'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 41c53aee0..5c09b309c 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -5,8 +5,8 @@ authors = ["Amazon Firecracker team "] edition = "2021" [dependencies] -vmm-sys-util = ">=0.11" +vmm-sys-util.workspace = true [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings.workspace = true +kvm-ioctls.workspace = true diff --git a/src/cpuid/src/transformer/common.rs b/src/cpuid/src/transformer/common.rs index d4a48c743..a89099d0d 100644 --- a/src/cpuid/src/transformer/common.rs +++ b/src/cpuid/src/transformer/common.rs @@ -123,7 +123,7 @@ pub fn use_host_cpuid_function( edx: entry.edx, padding: [0, 0, 0], }) - .map_err(FamError)?; + .map_err(|e| FamError(e))?; count += 1; } diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 0fe5f265c..f230f35ec 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -19,14 +19,14 @@ intaglio = "1.10.0" bitflags = "1.2.0" crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = { git = "https://github.com/frc4533-lincoln/libc.git", branch = "main" } +libc.workspace = true log = "0.4.0" nix = { version = "0.24.1", features = ["poll"] } pw = { package = "pipewire", version = "0.8.0", optional = true } rand = "0.8.5" thiserror = { version = "1.0", optional = true } virtio-bindings = "0.2.0" -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true zerocopy = { version = "0.6.3", optional = true } zerocopy-derive = { version = "0.6.3", optional = true } ipnetwork = "0.21" diff --git a/src/devices/src/virtio/fs/linux/overlayfs.rs b/src/devices/src/virtio/fs/linux/overlayfs.rs index 19b6b0c83..8ecb36b73 100644 --- a/src/devices/src/virtio/fs/linux/overlayfs.rs +++ b/src/devices/src/virtio/fs/linux/overlayfs.rs @@ -1996,6 +1996,8 @@ impl OverlayFs { // Perform the rename let res = unsafe { + #[cfg(target_env = "gnu")] + { libc::renameat2( old_parent_data.file.as_raw_fd(), old_name.as_ptr(), @@ -2003,6 +2005,16 @@ impl OverlayFs { new_name.as_ptr(), flags, ) + } + #[cfg(target_env = "musl")] + { + libc::renameat( + old_parent_data.file.as_raw_fd(), + old_name.as_ptr(), + new_parent_data.file.as_raw_fd(), + new_name.as_ptr(), + ) + } }; if res < 0 { diff --git a/src/imago/Cargo.toml b/src/imago/Cargo.toml index 75b041d8c..3f169a9fe 100644 --- a/src/imago/Cargo.toml +++ b/src/imago/Cargo.toml @@ -47,25 +47,20 @@ version = "0.8" features = ["std"] [dependencies.serde] -version = "1.0" -features = ["derive"] +workspace = true [dependencies.tokio] -version = "1" -features = [ - "rt", - "sync", -] +workspace = true [dependencies.tracing] version = "0.1" [dependencies.vm-memory] -version = "0.16" optional = true +workspace = true [build-dependencies.rustc_version] version = "0.4.0" [target."cfg(unix)".dependencies.libc] -version = "0.2" +workspace = true diff --git a/src/kernel/Cargo.toml b/src/kernel/Cargo.toml index e8e96d88c..dc72320b8 100644 --- a/src/kernel/Cargo.toml +++ b/src/kernel/Cargo.toml @@ -4,6 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true utils = { path = "../utils" } diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index 920c40f73..e6ab26106 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -18,7 +18,7 @@ virgl_resource_map2 = [] [dependencies] crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = ">=0.2.39" +libc.workspace = true log = "0.4.0" once_cell = "1.4.1" ipnetwork = "0.21" diff --git a/src/smbios/Cargo.toml b/src/smbios/Cargo.toml index de9836ec0..863971c62 100644 --- a/src/smbios/Cargo.toml +++ b/src/smbios/Cargo.toml @@ -4,4 +4,4 @@ version = "0.1.0" edition = "2021" [dependencies] -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index e4ecd3420..29b1ef222 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -7,6 +7,6 @@ edition = "2021" [dependencies] bitflags = "1.2.0" env_logger = "0.9.0" -libc = ">=0.2.85" +libc.workspace = true log = "0.4.0" -vmm-sys-util = ">=0.11" +vmm-sys-util.workspace = true diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index a44ff4fb5..04f9b1843 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -16,9 +16,9 @@ snd = [] [dependencies] crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = ">=0.2.39" +libc.workspace = true log = "0.4.0" -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true arch = { path = "../arch" } devices = { path = "../devices" } @@ -42,11 +42,11 @@ ipnetwork = "0.21" cpuid = { path = "../cpuid" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.10", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings.workspace = true +kvm-ioctls.workspace = true [target.'cfg(target_os = "macos")'.dependencies] hvf = { path = "../hvf" } [dev-dependencies] -vmm-sys-util = ">=0.11" +vmm-sys-util.workspace = true diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 777268806..1a2ec92f9 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -864,7 +864,7 @@ impl Vcpu { /// Registers a signal handler which makes use of TLS and kvm immediate exit to /// kick the vcpu running on the current thread, if there is one. pub fn register_kick_signal_handler() { - extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { + extern "C" fn handle_signal(_: c_int, _: *mut nix::libc::siginfo_t, _: *mut nix::libc::c_void) { // This is safe because it's temporarily aliasing the `Vcpu` object, but we are // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. unsafe { @@ -1175,9 +1175,11 @@ impl Vcpu { self.fd .set_sregs(&state.sregs) .map_err(Error::VcpuSetSregs)?; - self.fd - .set_xsave(&state.xsave) - .map_err(Error::VcpuSetXsave)?; + unsafe { + self.fd + .set_xsave(&state.xsave) + .map_err(Error::VcpuSetXsave)?; + } self.fd.set_xcrs(&state.xcrs).map_err(Error::VcpuSetXcrs)?; self.fd .set_debug_regs(&state.debug_regs) diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index 9a6067bfc..f8c6a0a57 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -23,7 +23,7 @@ static CONSOLE_SIGINT_FD: AtomicI32 = AtomicI32::new(-1); /// /// Increments the `seccomp.num_faults` metric, logs an error message and terminates the process /// with a specific exit code. -extern "C" fn sigsys_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigsys_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; let si_code = unsafe { (*info).si_code }; @@ -52,7 +52,7 @@ extern "C" fn sigsys_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_v /// Signal handler for `SIGBUS` and `SIGSEGV`. /// /// Logs an error message and terminates the process with a specific exit code. -extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; let si_code = unsafe { (*info).si_code }; @@ -80,7 +80,7 @@ extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut siginfo_t, _unused: }; } -extern "C" fn sigwinch_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigwinch_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; @@ -95,7 +95,7 @@ extern "C" fn sigwinch_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c let _ = unsafe { libc::write(console_fd, &val as *const _ as *const c_void, 8) }; } -extern "C" fn sigint_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigint_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; From 55e6354a3cb995e2fc3ce9867858734b1e83f2a4 Mon Sep 17 00:00:00 2001 From: frc4533-lincoln <132951735+frc4533-lincoln@users.noreply.github.com> Date: Thu, 5 Jun 2025 12:04:54 -0400 Subject: [PATCH 14/14] Switch out libc::renameat for libc::renameat2 and consolidate musl/glibc --- src/devices/src/virtio/fs/linux/overlayfs.rs | 25 +++++++------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/src/devices/src/virtio/fs/linux/overlayfs.rs b/src/devices/src/virtio/fs/linux/overlayfs.rs index 8ecb36b73..8eb1b2a35 100644 --- a/src/devices/src/virtio/fs/linux/overlayfs.rs +++ b/src/devices/src/virtio/fs/linux/overlayfs.rs @@ -1996,24 +1996,15 @@ impl OverlayFs { // Perform the rename let res = unsafe { - #[cfg(target_env = "gnu")] + #[cfg(any(target_env = "gnu", target_env = "musl"))] { - libc::renameat2( - old_parent_data.file.as_raw_fd(), - old_name.as_ptr(), - new_parent_data.file.as_raw_fd(), - new_name.as_ptr(), - flags, - ) - } - #[cfg(target_env = "musl")] - { - libc::renameat( - old_parent_data.file.as_raw_fd(), - old_name.as_ptr(), - new_parent_data.file.as_raw_fd(), - new_name.as_ptr(), - ) + libc::renameat2( + old_parent_data.file.as_raw_fd(), + old_name.as_ptr(), + new_parent_data.file.as_raw_fd(), + new_name.as_ptr(), + flags, + ) } };