diff --git a/.gitignore b/.gitignore index aa03a7cab..7837b63a7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ __pycache__ /libkrun.pc init/init examples/chroot_vm +ignore/ diff --git a/Cargo.lock b/Cargo.lock index 42e60ad7c..f70a2941f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -44,7 +44,7 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ - "libc", + "libc 0.2.172", ] [[package]] @@ -59,9 +59,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" [[package]] name = "arch" @@ -70,7 +70,7 @@ dependencies = [ "arch_gen", "kvm-bindings", "kvm-ioctls", - "libc", + "libc 1.0.0-alpha.1", "smbios", "utils", "vm-fdt", @@ -83,9 +83,9 @@ version = "0.1.0" [[package]] name = "async-trait" -version = "0.1.83" +version = "0.1.88" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" +checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", @@ -99,7 +99,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", - "libc", + "libc 0.2.172", "winapi", ] @@ -111,13 +111,13 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "backtrace" -version = "0.3.74" +version = "0.3.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" +checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002" dependencies = [ "addr2line", "cfg-if", - "libc", + "libc 0.2.172", "miniz_oxide", "object", "rustc-demangle", @@ -146,16 +146,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088" dependencies = [ "annotate-snippets", - "bitflags 2.6.0", + "bitflags 2.9.1", "cexpr", "clang-sys", - "itertools", + "itertools 0.12.1", "lazy_static", "lazycell", "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 1.1.0", + "shlex", + "syn", +] + +[[package]] +name = "bindgen" +version = "0.71.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" +dependencies = [ + "bitflags 2.9.1", + "cexpr", + "clang-sys", + "itertools 0.13.0", + "log", + "prettyplease", + "proc-macro2", + "quote", + "regex", + "rustc-hash 2.1.1", "shlex", "syn", ] @@ -174,15 +194,15 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.6.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "bumpalo" -version = "3.16.0" +version = "3.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" +checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" [[package]] name = "byteorder" @@ -196,15 +216,15 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "190baaad529bcfbde9e1a19022c42781bdb6ff9de25721abdb8fd98c0807730b" dependencies = [ - "libc", + "libc 0.2.172", "thiserror", ] [[package]] name = "cc" -version = "1.2.1" +version = "1.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" dependencies = [ "shlex", ] @@ -236,16 +256,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.38" +version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-targets 0.52.6", + "windows-link", ] [[package]] @@ -255,7 +275,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" dependencies = [ "glob", - "libc", + "libc 0.2.172", "libloading", ] @@ -309,18 +329,18 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.13" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33480d6946193aa8033910124896ca395333cae7e2d1113d1fef6c3272217df2" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" -version = "0.8.20" +version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "curl" @@ -329,7 +349,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9fb4d13a1be2b58f14d60adba57c9834b78c62fd86c3e76a148f732686e9265" dependencies = [ "curl-sys", - "libc", + "libc 0.2.172", "openssl-probe", "openssl-sys", "schannel", @@ -339,12 +359,12 @@ dependencies = [ [[package]] name = "curl-sys" -version = "0.4.78+curl-8.11.0" +version = "0.4.80+curl-8.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eec768341c5c7789611ae51cf6c459099f22e64a5d5d0ce4892434e33821eaf" +checksum = "55f7df2eac63200c3ab25bde3b2268ef2ee56af3d238e76d61f01c3c49bff734" dependencies = [ "cc", - "libc", + "libc 0.2.172", "libz-sys", "openssl-sys", "pkg-config", @@ -363,7 +383,9 @@ dependencies = [ "env_logger", "hvf", "imago", - "libc", + "intaglio", + "ipnetwork", + "libc 1.0.0-alpha.1", "log", "lru", "nix 0.24.3", @@ -371,6 +393,7 @@ dependencies = [ "polly", "rand", "rutabaga_gfx", + "tempfile", "thiserror", "utils", "virtio-bindings", @@ -394,7 +417,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ - "libc", + "libc 0.2.172", "option-ext", "redox_users", "windows-sys 0.48.0", @@ -402,9 +425,9 @@ dependencies = [ [[package]] name = "either" -version = "1.13.0" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "env_logger" @@ -421,15 +444,31 @@ dependencies = [ [[package]] name = "equivalent" -version = "1.0.1" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "errno" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +dependencies = [ + "libc 0.2.172", + "windows-sys 0.59.0", +] + +[[package]] +name = "fastrand" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "flate2" -version = "1.0.35" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" +checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" dependencies = [ "crc32fast", "miniz_oxide", @@ -437,9 +476,9 @@ dependencies = [ [[package]] name = "foldhash" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "foreign-types" @@ -547,13 +586,25 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.15" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc 0.2.172", + "wasi 0.11.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", - "libc", - "wasi", + "libc 0.2.172", + "r-efi", + "wasi 0.14.2+wasi-0.2.4", ] [[package]] @@ -564,15 +615,15 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "glob" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "hashbrown" -version = "0.15.1" +version = "0.15.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" dependencies = [ "allocator-api2", "equivalent", @@ -591,7 +642,7 @@ version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ - "libc", + "libc 0.2.172", ] [[package]] @@ -602,9 +653,9 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "humantime" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] name = "hvf" @@ -618,14 +669,15 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.61" +version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", + "log", "wasm-bindgen", "windows-core", ] @@ -645,7 +697,7 @@ version = "0.1.3" dependencies = [ "async-trait", "bincode", - "libc", + "libc 1.0.0-alpha.1", "miniz_oxide", "rustc_version", "serde", @@ -656,20 +708,32 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", "hashbrown", ] +[[package]] +name = "intaglio" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7785e397d45f5a00bd35df6c293518c240c321b734b15a02718aa21103de1ce9" + [[package]] name = "iocuddle" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8972d5be69940353d5347a1344cb375d9b457d6809b428b05bb1ca2fb9ce007" +[[package]] +name = "ipnetwork" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf370abdafd54d13e54a620e8c3e1145f28e46cc9d704bc6d94414559df41763" + [[package]] name = "itertools" version = "0.12.1" @@ -679,18 +743,28 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -715,22 +789,22 @@ dependencies = [ [[package]] name = "kvm-bindings" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4933174d0cc4b77b958578cd45784071cc5ae212c2d78fbd755aaaa6dfa71a" +checksum = "3b13baf7bdfda2e10bcb109fcb099ef40cff82374eb6b7cdcf4695bdec4e522c" dependencies = [ "vmm-sys-util", ] [[package]] name = "kvm-ioctls" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "337d1afa126368bbd6a5c328048f71a69a737e9afe7e436b392a8f8d770c9171" +checksum = "083c460d5a272c2f22205973e319147b791d92a288d7d7a8d4c6194f95229440" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "kvm-bindings", - "libc", + "libc 0.2.172", "vmm-sys-util", ] @@ -748,9 +822,14 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.164" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" + +[[package]] +name = "libc" +version = "1.0.0-alpha.1" +source = "git+https://github.com/frc4533-lincoln/libc.git?branch=main#0908643caf9c6a05be361820653322e3ea69fab9" [[package]] name = "libkrun" @@ -760,7 +839,8 @@ dependencies = [ "devices", "env_logger", "hvf", - "libc", + "ipnetwork", + "libc 1.0.0-alpha.1", "log", "once_cell", "polly", @@ -770,12 +850,12 @@ dependencies = [ [[package]] name = "libloading" -version = "0.8.5" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.53.0", ] [[package]] @@ -784,8 +864,8 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.6.0", - "libc", + "bitflags 2.9.1", + "libc 0.2.172", ] [[package]] @@ -794,11 +874,11 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65f3a4b81b2a2d8c7f300643676202debd1b7c929dbf5c9bb89402ea11d19810" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "cc", "convert_case", "cookie-factory", - "libc", + "libc 0.2.172", "libspa-sys", "nix 0.27.1", "nom", @@ -811,34 +891,40 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf0d9716420364790e85cbb9d3ac2c950bde16a7dd36f3209b7dfdfc4a24d01f" dependencies = [ - "bindgen", + "bindgen 0.69.5", "cc", "system-deps", ] [[package]] name = "libz-sys" -version = "1.1.20" +version = "1.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d16453e800a8cf6dd2fc3eb4bc99b786a9b90c663b8559a5b1a041bf89e472" +checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" dependencies = [ "cc", - "libc", + "libc 0.2.172", "pkg-config", "vcpkg", ] +[[package]] +name = "linux-raw-sys" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" + [[package]] name = "log" -version = "0.4.22" +version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "lru" -version = "0.12.5" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ "hashbrown", ] @@ -875,9 +961,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.0" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" +checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" dependencies = [ "adler2", ] @@ -890,7 +976,7 @@ checksum = "fa52e972a9a719cecb6864fb88568781eb706bac2cd1d4f04a648542dbf78069" dependencies = [ "bitflags 1.3.2", "cfg-if", - "libc", + "libc 0.2.172", "memoffset 0.6.5", ] @@ -902,7 +988,7 @@ checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" dependencies = [ "bitflags 1.3.2", "cfg-if", - "libc", + "libc 0.2.172", "memoffset 0.7.1", "pin-utils", ] @@ -913,9 +999,9 @@ version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "cfg-if", - "libc", + "libc 0.2.172", ] [[package]] @@ -939,29 +1025,29 @@ dependencies = [ [[package]] name = "object" -version = "0.36.5" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ "memchr", ] [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "openssl" -version = "0.10.68" +version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.6.0", + "bitflags 2.9.1", "cfg-if", "foreign-types", - "libc", + "libc 0.2.172", "once_cell", "openssl-macros", "openssl-sys", @@ -980,18 +1066,18 @@ dependencies = [ [[package]] name = "openssl-probe" -version = "0.1.5" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" [[package]] name = "openssl-sys" -version = "0.9.104" +version = "0.9.108" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +checksum = "e145e1651e858e820e4860f7b9c5e169bc1d8ce1c86043be79fa7b7634821847" dependencies = [ "cc", - "libc", + "libc 0.2.172", "pkg-config", "vcpkg", ] @@ -1004,9 +1090,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "pin-project-lite" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" +checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" [[package]] name = "pin-utils" @@ -1021,8 +1107,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08e645ba5c45109106d56610b3ee60eb13a6f2beb8b74f8dc8186cf261788dda" dependencies = [ "anyhow", - "bitflags 2.6.0", - "libc", + "bitflags 2.9.1", + "libc 0.2.172", "libspa", "libspa-sys", "nix 0.27.1", @@ -1037,39 +1123,49 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "849e188f90b1dda88fe2bfe1ad31fe5f158af2c98f80fb5d13726c44f3f01112" dependencies = [ - "bindgen", + "bindgen 0.69.5", "libspa-sys", "system-deps", ] [[package]] name = "pkg-config" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "polly" version = "0.0.1" dependencies = [ - "libc", + "libc 0.2.172", "utils", ] [[package]] name = "ppv-lite86" -version = "0.2.20" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy 0.8.25", +] + +[[package]] +name = "prettyplease" +version = "0.2.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +checksum = "664ec5419c51e34154eec046ebcba56312d5a2fc3b09a06da188e1ad21afadf6" dependencies = [ - "zerocopy 0.7.35", + "proc-macro2", + "syn", ] [[package]] name = "proc-macro2" -version = "1.0.89" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f139b0662de085916d1fb67d2b4169d1addddda1919e696f3252b740b629986e" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -1086,25 +1182,31 @@ dependencies = [ "flate2", "hex", "lazy_static", - "libc", + "libc 0.2.172", ] [[package]] name = "quote" -version = "1.0.37" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "libc", + "libc 0.2.172", "rand_chacha", "rand_core", ] @@ -1125,7 +1227,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom", + "getrandom 0.2.16", ] [[package]] @@ -1143,7 +1245,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom", + "getrandom 0.2.16", "libredox", "thiserror", ] @@ -1179,9 +1281,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "remain" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46aef80f842736de545ada6ec65b81ee91504efd6853f4b96de7414c42ae7443" +checksum = "d7ef12e84481ab4006cb942f8682bba28ece7270743e649442027c5db87df126" dependencies = [ "proc-macro2", "quote", @@ -1200,6 +1302,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.1" @@ -1209,13 +1317,32 @@ dependencies = [ "semver", ] +[[package]] +name = "rustix" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" +dependencies = [ + "bitflags 2.9.1", + "errno", + "libc 0.2.172", + "linux-raw-sys", + "windows-sys 0.59.0", +] + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + [[package]] name = "rutabaga_gfx" version = "0.1.2" dependencies = [ "anyhow", "cfg-if", - "libc", + "libc 0.2.172", "log", "nix 0.26.4", "pkg-config", @@ -1227,30 +1354,30 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] [[package]] name = "semver" -version = "1.0.23" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" +checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" [[package]] name = "serde" -version = "1.0.215" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] @@ -1266,18 +1393,18 @@ dependencies = [ [[package]] name = "serde_bytes" -version = "0.11.15" +version = "0.11.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "387cc504cb06bb40a96c8e04e951fe01854cf6bc921053c954e4a606d9675c6a" +checksum = "8437fd221bde2d4ca316d61b90e337e9e702b3820b87d63caa9ba6c02bd06d96" dependencies = [ "serde", ] [[package]] name = "serde_derive" -version = "1.0.215" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -1286,9 +1413,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.133" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -1321,7 +1448,7 @@ dependencies = [ "hex", "iocuddle", "lazy_static", - "libc", + "libc 0.2.172", "openssl", "rdrand", "serde", @@ -1348,9 +1475,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "smbios" @@ -1361,11 +1488,11 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ - "libc", + "libc 0.2.172", "windows-sys 0.52.0", ] @@ -1377,9 +1504,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.87" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -1405,6 +1532,19 @@ version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +[[package]] +name = "tempfile" +version = "3.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -1436,9 +1576,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.41.1" +version = "1.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "75ef51a33ef1da925cea3e4eb122833cb377c61439ca401b770f54902b806779" dependencies = [ "backtrace", "pin-project-lite", @@ -1446,9 +1586,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.19" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e" +checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" dependencies = [ "serde", "serde_spanned", @@ -1458,18 +1598,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.22.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" dependencies = [ "indexmap", "serde", @@ -1480,9 +1620,9 @@ dependencies = [ [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -1491,9 +1631,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", @@ -1502,18 +1642,18 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", ] [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unicode-segmentation" @@ -1533,18 +1673,20 @@ version = "0.1.0" dependencies = [ "bitflags 1.3.2", "env_logger", - "libc", + "libc 1.0.0-alpha.1", "log", "vmm-sys-util", ] [[package]] name = "uuid" -version = "1.11.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ + "js-sys", "serde", + "wasm-bindgen", ] [[package]] @@ -1561,9 +1703,12 @@ checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b" [[package]] name = "virtio-bindings" -version = "0.2.4" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1711e61c00f8cb450bd15368152a1e37a12ef195008ddc7d0f4812f9e2b30a68" +checksum = "cd2fe65550801ac106389d41f34cb1b32c4f7aaedf1b6cda1da3a211880de7f6" +dependencies = [ + "bindgen 0.71.1", +] [[package]] name = "vm-fdt" @@ -1577,7 +1722,7 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1720e7240cdc739f935456eb77f370d7e9b2a3909204da1e2b47bef1137a013" dependencies = [ - "libc", + "libc 0.2.172", "thiserror", "winapi", ] @@ -1594,11 +1739,12 @@ dependencies = [ "devices", "env_logger", "hvf", + "ipnetwork", "kbs-types", "kernel", "kvm-bindings", "kvm-ioctls", - "libc", + "libc 1.0.0-alpha.1", "log", "nix 0.24.3", "polly", @@ -1619,7 +1765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d1435039746e20da4f8d507a72ee1b916f7b4b05af7a91c093d2c6561934ede" dependencies = [ "bitflags 1.3.2", - "libc", + "libc 0.2.172", ] [[package]] @@ -1628,26 +1774,35 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", + "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", - "once_cell", "proc-macro2", "quote", "syn", @@ -1656,9 +1811,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1666,9 +1821,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", @@ -1679,9 +1834,12 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +dependencies = [ + "unicode-ident", +] [[package]] name = "winapi" @@ -1716,11 +1874,61 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-core" -version = "0.52.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ - "windows-targets 0.52.6", + "windows-implement", + "windows-interface", + "windows-link", + "windows-result", + "windows-strings", +] + +[[package]] +name = "windows-implement" +version = "0.60.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-interface" +version = "0.59.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "windows-link" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" + +[[package]] +name = "windows-result" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +dependencies = [ + "windows-link", +] + +[[package]] +name = "windows-strings" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +dependencies = [ + "windows-link", ] [[package]] @@ -1774,13 +1982,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -1793,6 +2017,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -1805,6 +2035,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -1817,12 +2053,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -1835,6 +2083,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -1847,6 +2101,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -1859,6 +2119,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -1871,15 +2137,30 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" -version = "0.6.20" +version = "0.7.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c1fec1a2bb5866f07c25f68c26e565c4c200aebb96d7e55710c19d3e8ac49b" +checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" dependencies = [ "memchr", ] +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags 2.9.1", +] + [[package]] name = "yansi-term" version = "0.1.2" @@ -1901,12 +2182,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" dependencies = [ - "byteorder", - "zerocopy-derive 0.7.35", + "zerocopy-derive 0.8.25", ] [[package]] @@ -1922,9 +2202,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.35" +version = "0.8.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index a8da0b5b0..9d4be212e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,15 @@ members = ["src/libkrun"] resolver = "2" +[workspace.dependencies] +libc = { git = "https://github.com/frc4533-lincoln/libc.git", branch = "main", features = ["extra_traits"] } +vmm-sys-util = "0.12.1" +kvm-bindings = { version = "0.11", features = ["fam-wrappers"] } +kvm-ioctls = "0.21" +vm-memory = { version = "0.16", features = ["backend-mmap"] } +tokio = { version = "1", features = ["rt", "sync"] } +serde = { version = "1", features = ["derive"] } + [profile.dev] #panic = "abort" diff --git a/Makefile b/Makefile index 36f980747..a323d2a86 100644 --- a/Makefile +++ b/Makefile @@ -83,8 +83,13 @@ debug: $(LIBRARY_DEBUG_$(OS)) libkrun.pc ifeq ($(BUILD_INIT),1) INIT_BINARY = init/init $(INIT_BINARY): $(INIT_SRC) +ifeq ($(OS),Darwin) + @echo "Building on macOS, using ./build_on_krunvm.sh" + ./build_on_krunvm.sh +else gcc -O2 -static -Wall $(INIT_DEFS) -o $@ $(INIT_SRC) $(INIT_DEFS) endif +endif $(LIBRARY_RELEASE_$(OS)): $(INIT_BINARY) cargo build --release $(FEATURE_FLAGS) diff --git a/build_on_krunvm.sh b/build_on_krunvm.sh old mode 100644 new mode 100755 diff --git a/examples/Makefile b/examples/Makefile index 8c163059e..34c204f2d 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -2,9 +2,9 @@ ARCH = $(shell uname -m) OS = $(shell uname -s) LDFLAGS_x86_64_Linux = -lkrun LDFLAGS_aarch64_Linux = -lkrun -LDFLAGS_arm64_Darwin = -L/opt/homebrew/lib -lkrun +LDFLAGS_arm64_Darwin = -L/usr/local/lib -lkrun LDFLAGS_sev = -lkrun-sev -LDFLAGS_efi = -L/opt/homebrew/lib -lkrun-efi +LDFLAGS_efi = -L/usr/local/lib -lkrun-efi CFLAGS = -O2 -g -I../include ROOTFS_DISTRO := fedora ROOTFS_DIR = rootfs_$(ROOTFS_DISTRO) diff --git a/include/libkrun.h b/include/libkrun.h index efd3127a6..ffc1c45e7 100644 --- a/include/libkrun.h +++ b/include/libkrun.h @@ -59,9 +59,34 @@ int32_t krun_set_vm_config(uint32_t ctx_id, uint8_t num_vcpus, uint32_t ram_mib) * * Returns: * Zero on success or a negative error number on failure. + * Documented errors: + * -EEXIST when a root device is already set + * + * Notes: + * This function is mutually exclusive with krun_set_overlayfs_root. */ int32_t krun_set_root(uint32_t ctx_id, const char *root_path); +/** + * Sets up an OverlayFS to be used as root for the microVM. Not available in libkrun-SEV. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "root_layers" - an array of string pointers to filesystem paths representing + * the layers to be used for the OverlayFS. The array must be + * NULL-terminated and contain at least one layer. + * + * Returns: + * Zero on success or a negative error number on failure. + * Documented errors: + * -EINVAL when no layers are provided + * -EEXIST when a root device is already set + * + * Notes: + * This function is mutually exclusive with krun_set_root. + */ +int32_t krun_set_overlayfs_root(uint32_t ctx_id, const char *const root_layers[]); + /** * DEPRECATED. Use krun_add_disk instead. * @@ -293,6 +318,35 @@ int32_t krun_set_net_mac(uint32_t ctx_id, uint8_t *const c_mac); */ int32_t krun_set_port_map(uint32_t ctx_id, const char *const port_map[]); +/** + * Configures the static IP, subnet, and scope for the TSI network backend. + * + * Arguments: + * "ctx_id" - the configuration context ID. + * "c_ip" - an optional null-terminated string representing the guest's static IPv4 address. + * "c_subnet" - an optional null-terminated string representing the guest's subnet in CIDR notation (e.g., "192.168.1.0/24"). + * If scope is 1 and subnet is not provided, all connections will be blocked. + * "scope" - an integer specifying the scope (0-3): + * 0: None - Block all IP communication + * 1: Group - Allow within subnet (if specified; otherwise, block all like scope 0) + * 2: Public - Allow public IPs + * 3: Any - Allow any IP + * + * Returns: + * Zero on success or a negative error number on failure. + * Documented errors: + * -EINVAL if scope value is > 3 or IP/subnet strings are invalid. + * -ENOTSUP if the network mode is not TSI. + * + * Notes: + * This function is only effective when the default TSI network backend is used (i.e., neither + * krun_set_passt_fd nor krun_set_gvproxy_path has been called). + */ +int32_t krun_set_tsi_scope(uint32_t ctx_id, + const char *c_ip, + const char *c_subnet, + uint8_t scope); + /* Flags for virglrenderer. Copied from virglrenderer bindings. */ #define VIRGLRENDERER_USE_EGL 1 << 0 #define VIRGLRENDERER_THREAD_SYNC 1 << 1 diff --git a/init/init.c b/init/init.c index 93553b736..0ab327656 100644 --- a/init/init.c +++ b/init/init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "jsmn.h" @@ -389,6 +390,167 @@ static int chroot_luks() } #endif +/* mkdir -p (recursively create all parents) */ +static int mkdir_p(const char *path, mode_t mode) +{ + char tmp[256]; + char *p = NULL; + size_t len; + + if (!path || !*path) return -1; + len = strnlen(path, sizeof(tmp) - 1); + memcpy(tmp, path, len); + tmp[len] = '\0'; + + if (tmp[len - 1] == '/') + tmp[len - 1] = '\0'; + + for (p = tmp + 1; *p; ++p) { + if (*p == '/') { + *p = '\0'; + if (mkdir(tmp, mode) < 0 && errno != EEXIST) + return -1; + *p = '/'; + } + } + if (mkdir(tmp, mode) < 0 && errno != EEXIST) + return -1; + return 0; +} + +/* Return: 1 = same fs already mounted + * -1 = dir busy with different type/tag + * 0 = not mounted yet + */ +static int is_mounted(const char *dir, const char *src, const char *type) +{ + FILE *fp = setmntent("/proc/self/mounts", "r"); + if (!fp) return 0; /* silent best-effort */ + + struct mntent *m; + int found = 0; + + while ((m = getmntent(fp)) != NULL) { + if (strcmp(m->mnt_dir, dir) == 0) { + if (strcmp(m->mnt_type, type) != 0) + found = -1; /* same dir, other fstype */ + else + found = (strcmp(m->mnt_fsname, src) == 0) ? 1 : -1; + break; + } + } + endmntent(fp); + return found; +} + +/* Strip the single word "defaults" (and empty commas) from opt string. + * Returns pointer inside `buf`. buf must persist until mount(2) call. + */ +static const char *clean_opts(const char *orig, char *buf, size_t buflen) +{ + if (!orig || !*orig) return NULL; + if (strcmp(orig, "defaults") == 0) return NULL; + + /* quick path: if the substring "defaults" not present, pass as-is */ + if (!strstr(orig, "defaults")) return orig; + + /* otherwise build a filtered copy */ + char *dst = buf; + const char *tok; + char tmp[256]; + strncpy(tmp, orig, sizeof(tmp)-1); + tmp[sizeof(tmp)-1] = '\0'; + + for (tok = strtok(tmp, ","); tok; tok = strtok(NULL, ",")) { + if (strcmp(tok, "defaults") == 0 || *tok == '\0') + continue; + size_t n = snprintf(dst, buflen - (dst - buf), "%s,", tok); + dst += n; + } + if (dst != buf) *(dst - 1) = '\0'; /* remove trailing comma */ + return (dst == buf) ? NULL : buf; /* all stripped? -> NULL */ +} + +/* Mount every virtiofs entry found in /etc/fstab. + * Idempotent, silent on success, logs only actionable errors. */ +static int mount_fstab_virtiofs(void) +{ + FILE *fp = setmntent("/etc/fstab", "r"); + if (!fp) /* no fstab → nothing to do, not an error */ + return 0; + + struct mntent *e; + int rc = 0; + + while ((e = getmntent(fp)) != NULL) { + /* ─────────── 1. we only care about virtiofs rows ─────────── */ + if (strcmp(e->mnt_type, "virtiofs") != 0) + continue; + + if (!e->mnt_fsname[0] || !e->mnt_dir[0]) { + fprintf(stderr, + "virtiofs-init: malformed fstab line – skipped\n"); + rc = -1; + continue; + } + + /* ─────────── 2. make local copies BEFORE is_mounted() ─────── */ + char fsname[256], dir[256], opts[256]; + strncpy(fsname, e->mnt_fsname, sizeof(fsname) - 1); + strncpy(dir, e->mnt_dir, sizeof(dir) - 1); + strncpy(opts, e->mnt_opts, sizeof(opts) - 1); + fsname[sizeof(fsname) - 1] = + dir[sizeof(dir) - 1] = + opts[sizeof(opts) - 1] = '\0'; + + /* ─────────── 3. ensure mount‑point exists (mkdir -p) ───────── */ + if (mkdir_p(dir, 0755) < 0) { + fprintf(stderr, + "virtiofs-init: cannot create %s: %s\n", + dir, strerror(errno)); + rc = -1; + continue; + } + + /* ─────────── 4. skip if already mounted / busy ─────────────── */ + switch (is_mounted(dir, fsname, "virtiofs")) { + case 1: continue; /* identical mount already there */ + case -1: fprintf(stderr, + "virtiofs-init: %s busy – skipped\n", dir); + rc = -1; + continue; + } + + /* ─────────── 5. translate common flags BEFORE they vanish ──── */ + unsigned long flags = 0; + struct mntent fake = { .mnt_opts = opts }; + if (hasmntopt(&fake, "ro")) flags |= MS_RDONLY; + if (hasmntopt(&fake, "nosuid")) flags |= MS_NOSUID; + if (hasmntopt(&fake, "nodev")) flags |= MS_NODEV; + if (hasmntopt(&fake, "noexec")) flags |= MS_NOEXEC; + + /* Clean "defaults" out of the option list */ + char optbuf[256]; + const char *data = clean_opts(opts, optbuf, sizeof(optbuf)); + + /* ─────────── 6. actual mount attempt ───────────────────────── */ + if (mount(fsname, dir, "virtiofs", flags, data) < 0) { + if (errno == ENODEV || errno == ENOENT) { + fprintf(stderr, + "virtiofs-init: tag %s absent – skipped\n", fsname); + } else if (errno != EBUSY) { + fprintf(stderr, + "virtiofs-init: mount %s→%s failed: %s\n", + fsname, dir, strerror(errno)); + rc = -1; + } + } + } + + endmntent(fp); + return rc; +} + static int mount_filesystems() { char *const DIRS_LEVEL1[] = {"/dev", "/proc", "/sys"}; @@ -448,6 +610,9 @@ static int mount_filesystems() /* May fail if already exists and that's fine. */ symlink("/proc/self/fd", "/dev/fd"); + /* Mount virtiofs shares from /etc/fstab (if any) */ + mount_fstab_virtiofs(); + return 0; } diff --git a/src/arch/Cargo.toml b/src/arch/Cargo.toml index baaedda55..a03bbf1b6 100644 --- a/src/arch/Cargo.toml +++ b/src/arch/Cargo.toml @@ -10,16 +10,16 @@ amd-sev = [ "tee" ] efi = [] [dependencies] -libc = ">=0.2.39" -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +libc.workspace = true +vm-memory.workspace = true arch_gen = { path = "../arch_gen" } smbios = { path = "../smbios" } utils = { path = "../utils" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings.workspace = true +kvm-ioctls.workspace = true [target.'cfg(target_arch = "aarch64")'.dependencies] vm-fdt = ">= 0.2.0" diff --git a/src/cpuid/Cargo.toml b/src/cpuid/Cargo.toml index 41c53aee0..5c09b309c 100644 --- a/src/cpuid/Cargo.toml +++ b/src/cpuid/Cargo.toml @@ -5,8 +5,8 @@ authors = ["Amazon Firecracker team "] edition = "2021" [dependencies] -vmm-sys-util = ">=0.11" +vmm-sys-util.workspace = true [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.8", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings.workspace = true +kvm-ioctls.workspace = true diff --git a/src/cpuid/src/transformer/common.rs b/src/cpuid/src/transformer/common.rs index d4a48c743..a89099d0d 100644 --- a/src/cpuid/src/transformer/common.rs +++ b/src/cpuid/src/transformer/common.rs @@ -123,7 +123,7 @@ pub fn use_host_cpuid_function( edx: entry.edx, padding: [0, 0, 0], }) - .map_err(FamError)?; + .map_err(|e| FamError(e))?; count += 1; } diff --git a/src/devices/Cargo.toml b/src/devices/Cargo.toml index 7e5163467..f230f35ec 100644 --- a/src/devices/Cargo.toml +++ b/src/devices/Cargo.toml @@ -15,24 +15,29 @@ snd = ["pw", "thiserror"] virgl_resource_map2 = [] [dependencies] +intaglio = "1.10.0" bitflags = "1.2.0" crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = ">=0.2.39" +libc.workspace = true log = "0.4.0" nix = { version = "0.24.1", features = ["poll"] } pw = { package = "pipewire", version = "0.8.0", optional = true } rand = "0.8.5" thiserror = { version = "1.0", optional = true } virtio-bindings = "0.2.0" -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true zerocopy = { version = "0.6.3", optional = true } zerocopy-derive = { version = "0.6.3", optional = true } +ipnetwork = "0.21" arch = { path = "../arch" } utils = { path = "../utils" } polly = { path = "../polly" } -rutabaga_gfx = { path = "../rutabaga_gfx", features = ["virgl_renderer", "virgl_renderer_next"], optional = true } +rutabaga_gfx = { path = "../rutabaga_gfx", features = [ + "virgl_renderer", + "virgl_renderer_next", +], optional = true } imago = { path = "../imago", features = ["sync-wrappers", "vm-memory"] } @@ -43,3 +48,6 @@ lru = ">=0.9" [target.'cfg(target_os = "linux")'.dependencies] rutabaga_gfx = { path = "../rutabaga_gfx", features = ["x"], optional = true } caps = "0.5.5" + +[dev-dependencies] +tempfile = "3.17.1" diff --git a/src/devices/src/virtio/fs/device.rs b/src/devices/src/virtio/fs/device.rs index 9d7a21e04..53d87a0fb 100644 --- a/src/devices/src/virtio/fs/device.rs +++ b/src/devices/src/virtio/fs/device.rs @@ -15,6 +15,8 @@ use vm_memory::{ByteValued, GuestMemoryMmap}; use super::super::{ ActivateResult, DeviceState, FsError, Queue as VirtQueue, VirtioDevice, VirtioShmRegion, }; +use super::kinds::{FsImplConfig, FsImplShare}; +use super::overlayfs; use super::passthrough; use super::worker::FsWorker; use super::ExportTable; @@ -51,7 +53,7 @@ pub struct Fs { device_state: DeviceState, config: VirtioFsConfig, shm_region: Option, - passthrough_cfg: passthrough::Config, + fs_config: FsImplConfig, worker_thread: Option>, worker_stopfd: EventFd, #[cfg(target_os = "macos")] @@ -61,7 +63,7 @@ pub struct Fs { impl Fs { pub(crate) fn with_queues( fs_id: String, - shared_dir: String, + fs_share: FsImplShare, queues: Vec, ) -> super::Result { let mut queue_events = Vec::new(); @@ -76,10 +78,15 @@ impl Fs { let mut config = VirtioFsConfig::default(); config.tag[..tag.len()].copy_from_slice(tag.as_slice()); config.num_request_queues = 1; - - let fs_cfg = passthrough::Config { - root_dir: shared_dir, - ..Default::default() + let fs_config = match fs_share { + FsImplShare::Passthrough(root_dir) => FsImplConfig::Passthrough(passthrough::Config { + root_dir, + ..Default::default() + }), + FsImplShare::Overlayfs(layers) => FsImplConfig::Overlayfs(overlayfs::Config { + layers, + ..Default::default() + }), }; Ok(Fs { @@ -94,7 +101,7 @@ impl Fs { device_state: DeviceState::Inactive, config, shm_region: None, - passthrough_cfg: fs_cfg, + fs_config, worker_thread: None, worker_stopfd: EventFd::new(EFD_NONBLOCK).map_err(FsError::EventFd)?, #[cfg(target_os = "macos")] @@ -102,12 +109,12 @@ impl Fs { }) } - pub fn new(fs_id: String, shared_dir: String) -> super::Result { + pub fn new(fs_id: String, fs_share: FsImplShare) -> super::Result { let queues: Vec = defs::QUEUE_SIZES .iter() .map(|&max_size| VirtQueue::new(max_size)) .collect(); - Self::with_queues(fs_id, shared_dir, queues) + Self::with_queues(fs_id, fs_share, queues) } pub fn id(&self) -> &str { @@ -124,11 +131,20 @@ impl Fs { pub fn set_export_table(&mut self, export_table: ExportTable) -> u64 { static FS_UNIQUE_ID: AtomicU64 = AtomicU64::new(0); + let fsid = FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); - self.passthrough_cfg.export_fsid = FS_UNIQUE_ID.fetch_add(1, Ordering::Relaxed); - self.passthrough_cfg.export_table = Some(export_table); + match &mut self.fs_config { + FsImplConfig::Passthrough(cfg) => { + cfg.export_fsid = fsid; + cfg.export_table = Some(export_table); + } + FsImplConfig::Overlayfs(cfg) => { + cfg.export_fsid = fsid; + cfg.export_table = Some(export_table); + } + } - self.passthrough_cfg.export_fsid + fsid } #[cfg(target_os = "macos")] @@ -215,6 +231,7 @@ impl VirtioDevice for Fs { .iter() .map(|e| e.try_clone().unwrap()) .collect(); + let worker = FsWorker::new( self.queues.clone(), queue_evts, @@ -224,13 +241,13 @@ impl VirtioDevice for Fs { self.irq_line, mem.clone(), self.shm_region.clone(), - self.passthrough_cfg.clone(), + self.fs_config.clone(), self.worker_stopfd.try_clone().unwrap(), #[cfg(target_os = "macos")] self.map_sender.clone(), ); - self.worker_thread = Some(worker.run()); + self.worker_thread = Some(worker.run()); self.device_state = DeviceState::Activated(mem); Ok(()) } diff --git a/src/devices/src/virtio/fs/filesystem.rs b/src/devices/src/virtio/fs/filesystem.rs index ed1284d8b..b55346cbf 100644 --- a/src/devices/src/virtio/fs/filesystem.rs +++ b/src/devices/src/virtio/fs/filesystem.rs @@ -25,6 +25,7 @@ pub use fuse::RemovemappingOne; pub use fuse::SetattrValid; /// Information about a path in the filesystem. +#[derive(Debug)] pub struct Entry { /// An `Inode` that uniquely identifies this path. During `lookup`, setting this to `0` means a /// negative entry. Returning `ENOENT` also means a negative entry but setting this to `0` diff --git a/src/devices/src/virtio/fs/kinds.rs b/src/devices/src/virtio/fs/kinds.rs new file mode 100644 index 000000000..d76da42e2 --- /dev/null +++ b/src/devices/src/virtio/fs/kinds.rs @@ -0,0 +1,651 @@ + + +use std::{ffi::CStr, io, path::PathBuf, time::Duration}; + +#[cfg(target_os = "macos")] +use crossbeam_channel::Sender; +#[cfg(target_os = "macos")] +use hvf::MemoryMapping; + +use crate::virtio::bindings; + +use super::{ + filesystem::{ + Context, DirEntry, Entry, Extensions, FileSystem, GetxattrReply, ListxattrReply, + ZeroCopyReader, ZeroCopyWriter, + }, + fuse::{FsOptions, OpenOptions, RemovemappingOne, SetattrValid}, + overlayfs::{self, OverlayFs}, + passthrough::{self, PassthroughFs}, +}; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +#[derive(Clone, Debug)] +pub enum FsImplConfig { + Passthrough(passthrough::Config), + Overlayfs(overlayfs::Config), +} + +pub enum FsImpl { + Passthrough(PassthroughFs), + Overlayfs(OverlayFs), +} + +#[derive(Clone, Debug)] +pub enum FsImplShare { + Passthrough(String), + Overlayfs(Vec), +} + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +impl FileSystem for FsImpl { + type Inode = u64; + type Handle = u64; + + fn init(&self, capable: FsOptions) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.init(capable), + FsImpl::Overlayfs(fs) => fs.init(capable), + } + } + + fn destroy(&self) { + match self { + FsImpl::Passthrough(fs) => fs.destroy(), + FsImpl::Overlayfs(fs) => fs.destroy(), + } + } + + fn lookup(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.lookup(ctx, parent, name), + FsImpl::Overlayfs(fs) => fs.lookup(ctx, parent, name), + } + } + + fn forget(&self, ctx: Context, inode: Self::Inode, count: u64) { + match self { + FsImpl::Passthrough(fs) => fs.forget(ctx, inode, count), + FsImpl::Overlayfs(fs) => fs.forget(ctx, inode, count), + } + } + + fn batch_forget(&self, ctx: Context, requests: Vec<(Self::Inode, u64)>) { + match self { + FsImpl::Passthrough(fs) => fs.batch_forget(ctx, requests), + FsImpl::Overlayfs(fs) => fs.batch_forget(ctx, requests), + } + } + + fn getattr( + &self, + ctx: Context, + inode: Self::Inode, + handle: Option, + ) -> io::Result<(bindings::stat64, Duration)> { + match self { + FsImpl::Passthrough(fs) => fs.getattr(ctx, inode, handle), + FsImpl::Overlayfs(fs) => fs.getattr(ctx, inode, handle), + } + } + + fn setattr( + &self, + ctx: Context, + inode: Self::Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + match self { + FsImpl::Passthrough(fs) => fs.setattr(ctx, inode, attr, handle, valid), + FsImpl::Overlayfs(fs) => fs.setattr(ctx, inode, attr, handle, valid), + } + } + + fn readlink(&self, ctx: Context, inode: Self::Inode) -> io::Result> { + match self { + FsImpl::Passthrough(fs) => fs.readlink(ctx, inode), + FsImpl::Overlayfs(fs) => fs.readlink(ctx, inode), + } + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Self::Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.symlink(ctx, linkname, parent, name, extensions), + FsImpl::Overlayfs(fs) => fs.symlink(ctx, linkname, parent, name, extensions), + } + } + + #[allow(clippy::too_many_arguments)] + fn mknod( + &self, + ctx: Context, + inode: Self::Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.mknod(ctx, inode, name, mode, rdev, umask, extensions), + FsImpl::Overlayfs(fs) => fs.mknod(ctx, inode, name, mode, rdev, umask, extensions), + } + } + + fn mkdir( + &self, + ctx: Context, + parent: Self::Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.mkdir(ctx, parent, name, mode, umask, extensions), + FsImpl::Overlayfs(fs) => fs.mkdir(ctx, parent, name, mode, umask, extensions), + } + } + + fn unlink(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.unlink(ctx, parent, name), + FsImpl::Overlayfs(fs) => fs.unlink(ctx, parent, name), + } + } + + fn rmdir(&self, ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.rmdir(ctx, parent, name), + FsImpl::Overlayfs(fs) => fs.rmdir(ctx, parent, name), + } + } + + fn rename( + &self, + ctx: Context, + olddir: Self::Inode, + oldname: &CStr, + newdir: Self::Inode, + newname: &CStr, + flags: u32, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.rename(ctx, olddir, oldname, newdir, newname, flags), + FsImpl::Overlayfs(fs) => fs.rename(ctx, olddir, oldname, newdir, newname, flags), + } + } + + fn link( + &self, + ctx: Context, + inode: Self::Inode, + newparent: Self::Inode, + newname: &CStr, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.link(ctx, inode, newparent, newname), + FsImpl::Overlayfs(fs) => fs.link(ctx, inode, newparent, newname), + } + } + + fn open( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + match self { + FsImpl::Passthrough(fs) => fs.open(ctx, inode, flags), + FsImpl::Overlayfs(fs) => fs.open(ctx, inode, flags), + } + } + + #[allow(clippy::too_many_arguments)] + fn create( + &self, + ctx: Context, + parent: Self::Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + match self { + FsImpl::Passthrough(fs) => fs.create(ctx, parent, name, mode, flags, umask, extensions), + FsImpl::Overlayfs(fs) => fs.create(ctx, parent, name, mode, flags, umask, extensions), + } + } + + #[allow(clippy::too_many_arguments)] + fn read( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + w: W, + size: u32, + offset: u64, + lock_owner: Option, + flags: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => { + fs.read(ctx, inode, handle, w, size, offset, lock_owner, flags) + } + FsImpl::Overlayfs(fs) => { + fs.read(ctx, inode, handle, w, size, offset, lock_owner, flags) + } + } + } + + #[allow(clippy::too_many_arguments)] + fn write( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + r: R, + size: u32, + offset: u64, + lock_owner: Option, + delayed_write: bool, + kill_priv: bool, + flags: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.write( + ctx, + inode, + handle, + r, + size, + offset, + lock_owner, + delayed_write, + kill_priv, + flags, + ), + FsImpl::Overlayfs(fs) => fs.write( + ctx, + inode, + handle, + r, + size, + offset, + lock_owner, + delayed_write, + kill_priv, + flags, + ), + } + } + + fn flush( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + lock_owner: u64, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.flush(ctx, inode, handle, lock_owner), + FsImpl::Overlayfs(fs) => fs.flush(ctx, inode, handle, lock_owner), + } + } + + fn fsync( + &self, + ctx: Context, + inode: Self::Inode, + datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.fsync(ctx, inode, datasync, handle), + FsImpl::Overlayfs(fs) => fs.fsync(ctx, inode, datasync, handle), + } + } + + fn fallocate( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.fallocate(ctx, inode, handle, mode, offset, length), + FsImpl::Overlayfs(fs) => fs.fallocate(ctx, inode, handle, mode, offset, length), + } + } + + #[allow(clippy::too_many_arguments)] + fn release( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + handle: Self::Handle, + flush: bool, + flock_release: bool, + lock_owner: Option, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => { + fs.release(ctx, inode, flags, handle, flush, flock_release, lock_owner) + } + FsImpl::Overlayfs(fs) => { + fs.release(ctx, inode, flags, handle, flush, flock_release, lock_owner) + } + } + } + + fn statfs(&self, ctx: Context, inode: Self::Inode) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.statfs(ctx, inode), + FsImpl::Overlayfs(fs) => fs.statfs(ctx, inode), + } + } + + fn setxattr( + &self, + ctx: Context, + inode: Self::Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setxattr(ctx, inode, name, value, flags), + FsImpl::Overlayfs(fs) => fs.setxattr(ctx, inode, name, value, flags), + } + } + + fn getxattr( + &self, + ctx: Context, + inode: Self::Inode, + name: &CStr, + size: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.getxattr(ctx, inode, name, size), + FsImpl::Overlayfs(fs) => fs.getxattr(ctx, inode, name, size), + } + } + + fn listxattr(&self, ctx: Context, inode: Self::Inode, size: u32) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.listxattr(ctx, inode, size), + FsImpl::Overlayfs(fs) => fs.listxattr(ctx, inode, size), + } + } + + fn removexattr(&self, ctx: Context, inode: Self::Inode, name: &CStr) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.removexattr(ctx, inode, name), + FsImpl::Overlayfs(fs) => fs.removexattr(ctx, inode, name), + } + } + + fn opendir( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + match self { + FsImpl::Passthrough(fs) => fs.opendir(ctx, inode, flags), + FsImpl::Overlayfs(fs) => fs.opendir(ctx, inode, flags), + } + } + + fn readdir( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + match self { + FsImpl::Passthrough(fs) => fs.readdir(ctx, inode, handle, size, offset, add_entry), + FsImpl::Overlayfs(fs) => fs.readdir(ctx, inode, handle, size, offset, add_entry), + } + } + + fn readdirplus( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry, Entry) -> io::Result, + { + match self { + FsImpl::Passthrough(fs) => fs.readdirplus(ctx, inode, handle, size, offset, add_entry), + FsImpl::Overlayfs(fs) => fs.readdirplus(ctx, inode, handle, size, offset, add_entry), + } + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Self::Inode, + datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.fsyncdir(ctx, inode, datasync, handle), + FsImpl::Overlayfs(fs) => fs.fsyncdir(ctx, inode, datasync, handle), + } + } + + fn releasedir( + &self, + ctx: Context, + inode: Self::Inode, + flags: u32, + handle: Self::Handle, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.releasedir(ctx, inode, flags, handle), + FsImpl::Overlayfs(fs) => fs.releasedir(ctx, inode, flags, handle), + } + } + + fn access(&self, ctx: Context, inode: Self::Inode, mask: u32) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.access(ctx, inode, mask), + FsImpl::Overlayfs(fs) => fs.access(ctx, inode, mask), + } + } + + fn lseek( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + offset: u64, + whence: u32, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.lseek(ctx, inode, handle, offset, whence), + FsImpl::Overlayfs(fs) => fs.lseek(ctx, inode, handle, offset, whence), + } + } + + #[allow(clippy::too_many_arguments)] + fn copyfilerange( + &self, + ctx: Context, + inode_in: Self::Inode, + handle_in: Self::Handle, + offset_in: u64, + inode_out: Self::Inode, + handle_out: Self::Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + match self { + FsImpl::Passthrough(fs) => fs.copyfilerange( + ctx, inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ), + FsImpl::Overlayfs(fs) => fs.copyfilerange( + ctx, inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ), + } + } + + #[allow(clippy::too_many_arguments)] + fn setupmapping( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setupmapping( + ctx, + inode, + handle, + foffset, + len, + flags, + moffset, + host_shm_base, + shm_size, + #[cfg(target_os = "macos")] map_sender, + ), + FsImpl::Overlayfs(fs) => fs.setupmapping( + ctx, + inode, + handle, + foffset, + len, + flags, + moffset, + host_shm_base, + shm_size, + #[cfg(target_os = "macos")] map_sender, + ), + } + } + + fn removemapping( + &self, + ctx: Context, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + #[cfg(target_os = "macos")] map_sender: &Option>, + ) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => { + fs.removemapping(ctx, requests, host_shm_base, shm_size, #[cfg(target_os = "macos")] map_sender) + } + FsImpl::Overlayfs(fs) => { + fs.removemapping(ctx, requests, host_shm_base, shm_size, #[cfg(target_os = "macos")] map_sender) + } + } + } + + #[allow(clippy::too_many_arguments)] + fn ioctl( + &self, + ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + flags: u32, + cmd: u32, + arg: u64, + in_size: u32, + out_size: u32, + ) -> io::Result> { + match self { + FsImpl::Passthrough(fs) => { + fs.ioctl(ctx, inode, handle, flags, cmd, arg, in_size, out_size) + } + FsImpl::Overlayfs(fs) => { + fs.ioctl(ctx, inode, handle, flags, cmd, arg, in_size, out_size) + } + } + } + + fn getlk(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.getlk(), + FsImpl::Overlayfs(fs) => fs.getlk(), + } + } + + fn setlk(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setlk(), + FsImpl::Overlayfs(fs) => fs.setlk(), + } + } + + fn setlkw(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.setlkw(), + FsImpl::Overlayfs(fs) => fs.setlkw(), + } + } + + fn bmap(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.bmap(), + FsImpl::Overlayfs(fs) => fs.bmap(), + } + } + + fn poll(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.poll(), + FsImpl::Overlayfs(fs) => fs.poll(), + } + } + + fn notify_reply(&self) -> io::Result<()> { + match self { + FsImpl::Passthrough(fs) => fs.notify_reply(), + FsImpl::Overlayfs(fs) => fs.notify_reply(), + } + } +} diff --git a/src/devices/src/virtio/fs/linux/mod.rs b/src/devices/src/virtio/fs/linux/mod.rs index b8edbc7f9..94cbd09d2 100644 --- a/src/devices/src/virtio/fs/linux/mod.rs +++ b/src/devices/src/virtio/fs/linux/mod.rs @@ -1,2 +1,3 @@ pub mod fs_utils; pub mod passthrough; +pub mod overlayfs; diff --git a/src/devices/src/virtio/fs/linux/overlayfs.rs b/src/devices/src/virtio/fs/linux/overlayfs.rs new file mode 100644 index 000000000..8eb1b2a35 --- /dev/null +++ b/src/devices/src/virtio/fs/linux/overlayfs.rs @@ -0,0 +1,3402 @@ +use std::{ + collections::{btree_map, BTreeMap, HashSet}, + ffi::{CStr, CString}, + fs::File, + io, + mem::{self, MaybeUninit}, + os::{ + fd::{AsRawFd, FromRawFd, RawFd}, + unix::{ffi::OsStrExt, fs::MetadataExt}, + }, + path::PathBuf, + sync::{ + atomic::{AtomicBool, AtomicU64, Ordering}, + Arc, LazyLock, RwLock, + }, + time::Duration, +}; + +use caps::{has_cap, CapSet, Capability}; +use intaglio::{cstr::SymbolTable, Symbol}; +use nix::request_code_read; + +use crate::virtio::{ + bindings, + fs::{ + filesystem::{ + self, Context, DirEntry, Entry, ExportTable, Extensions, FileSystem, FsOptions, + GetxattrReply, ListxattrReply, OpenOptions, SetattrValid, ZeroCopyReader, + ZeroCopyWriter, + }, + fuse, + multikey::MultikeyBTreeMap, + }, +}; + +//-------------------------------------------------------------------------------------------------- +// Modules +//-------------------------------------------------------------------------------------------------- + +#[path = "../tests/overlayfs/mod.rs"] +#[cfg(test)] +mod tests; + +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +/// The prefix for whiteout files +const WHITEOUT_PREFIX: &str = ".wh."; + +/// The marker for opaque directories +const OPAQUE_MARKER: &str = ".wh..wh..opq"; + +/// Maximum allowed number of layers for the overlay filesystem. +const MAX_LAYERS: usize = 128; + +#[cfg(not(feature = "efi"))] +static INIT_BINARY: &[u8] = include_bytes!("../../../../../../init/init"); + +/// The name of the init binary +const INIT_CSTR: &[u8] = b"init.krun\0"; + +/// The name of the empty directory +const EMPTY_CSTR: LazyLock<&CStr> = + LazyLock::new(|| unsafe { CStr::from_bytes_with_nul_unchecked(b"\0") }); + +/// The name of the `/proc/self/fd` directory +const PROC_SELF_FD_CSTR: LazyLock<&CStr> = + LazyLock::new(|| unsafe { CStr::from_bytes_with_nul_unchecked(b"/proc/self/fd\0") }); + +/// FICLONE ioctl for copy-on-write file cloning +/// Defined in Linux's fs.h as _IOW(0x94, 9, int) +const FICLONE: u64 = (0x94 << 8) | 9 | (std::mem::size_of::() as u64) << 16 | 1 << 30; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// Type alias for inode identifiers +type Inode = u64; + +/// Type alias for file handle identifiers +type Handle = u64; + +/// Alternative key for looking up inodes by device and inode number +#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] +struct InodeAltKey { + /// The inode number from the host filesystem + ino: libc::ino64_t, + + /// The device ID from the host filesystem + dev: libc::dev_t, + + /// The mount ID from the host filesystem + mnt_id: u64, +} + +/// Data associated with an inode +#[derive(Debug)] +pub(crate) struct InodeData { + /// The inode number in the overlay filesystem + pub(crate) inode: Inode, + + /// The file handle for the inode + pub(crate) file: File, + + /// The device ID from the host filesystem + pub(crate) dev: libc::dev_t, + + /// The mount ID from the host filesystem + pub(crate) mnt_id: u64, + + /// Reference count for this inode from the perspective of [`FileSystem::lookup`] + pub(crate) refcount: AtomicU64, + + /// Path to inode + pub(crate) path: Vec, + + /// The layer index this inode belongs to + pub(crate) layer_idx: usize, +} + +/// Data associated with an open file handle +#[derive(Debug)] +pub(crate) struct HandleData { + /// The inode this handle refers to + inode: Inode, + + /// The underlying file object + file: RwLock, + + /// Whether the file handle is exported + exported: AtomicBool, +} + +pub(crate) struct ScopedGid; + +pub(crate) struct ScopedUid; + +/// The caching policy that the file system should report to the FUSE client. By default the FUSE +/// protocol uses close-to-open consistency. This means that any cached contents of the file are +/// invalidated the next time that file is opened. +#[derive(Default, Debug, Clone)] +pub enum CachePolicy { + /// The client should never cache file data and all I/O should be directly forwarded to the + /// server. This policy must be selected when file contents may change without the knowledge of + /// the FUSE client (i.e., the file system does not have exclusive access to the directory). + Never, + + /// The client is free to choose when and how to cache file data. This is the default policy and + /// uses close-to-open consistency as described in the enum documentation. + #[default] + Auto, + + /// The client should always cache file data. This means that the FUSE client will not + /// invalidate any cached data that was returned by the file system the last time the file was + /// opened. This policy should only be selected when the file system has exclusive access to the + /// directory. + Always, +} + +/// Configuration options that control the behavior of the file system. +#[derive(Debug, Clone)] +pub struct Config { + /// How long the FUSE client should consider directory entries to be valid. If the contents of a + /// directory can only be modified by the FUSE client (i.e., the file system has exclusive + /// access), then this should be a large value. + /// + /// The default value for this option is 5 seconds. + pub entry_timeout: Duration, + + /// How long the FUSE client should consider file and directory attributes to be valid. If the + /// attributes of a file or directory can only be modified by the FUSE client (i.e., the file + /// system has exclusive access), then this should be set to a large value. + /// + /// The default value for this option is 5 seconds. + pub attr_timeout: Duration, + + /// The caching policy the file system should use. See the documentation of `CachePolicy` for + /// more details. + pub cache_policy: CachePolicy, + + /// Whether the file system should enabled writeback caching. This can improve performance as it + /// allows the FUSE client to cache and coalesce multiple writes before sending them to the file + /// system. However, enabling this option can increase the risk of data corruption if the file + /// contents can change without the knowledge of the FUSE client (i.e., the server does **NOT** + /// have exclusive access). Additionally, the file system should have read access to all files + /// in the directory it is serving as the FUSE client may send read requests even for files + /// opened with `O_WRONLY`. + /// + /// Therefore callers should only enable this option when they can guarantee that: 1) the file + /// system has exclusive access to the directory and 2) the file system has read permissions for + /// all files in that directory. + /// + /// The default value for this option is `false`. + pub writeback: bool, + + /// The path of the root directory. + /// + /// The default is `/`. + pub root_dir: String, + + /// Whether the file system should support Extended Attributes (xattr). Enabling this feature may + /// have a significant impact on performance, especially on write parallelism. This is the result + /// of FUSE attempting to remove the special file privileges after each write request. + /// + /// The default value for this options is `false`. + pub xattr: bool, + + /// Optional file descriptor for /proc/self/fd. Callers can obtain a file descriptor and pass it + /// here, so there's no need to open it in the filesystem implementation. This is specially useful + /// for sandboxing. + /// + /// The default is `None`. + pub proc_sfd_rawfd: Option, + + /// ID of this filesystem to uniquely identify exports. + pub export_fsid: u64, + + /// Table of exported FDs to share with other subsystems. + pub export_table: Option, + + /// Layers to be used for the overlay filesystem + pub layers: Vec, +} + +/// An overlay filesystem implementation that combines multiple layers into a single logical filesystem. +/// +/// This implementation follows standard overlay filesystem concepts, similar to Linux's OverlayFS, +/// while using OCI image specification's layer filesystem changeset format for whiteouts: +/// +/// - Uses OCI-style whiteout files (`.wh.` prefixed files) to mark deleted files in upper layers +/// - Uses OCI-style opaque directory markers (`.wh..wh..opq`) to mask lower layer directories +/// +/// ## Layer Structure +/// +/// The overlay filesystem consists of: +/// - A single top layer (upperdir) that is writable +/// - Zero or more lower layers that are read-only +/// +/// ## Layer Ordering +/// +/// When creating an overlay filesystem, layers are provided in order from lowest to highest: +/// The last layer in the provided sequence becomes the top layer (upperdir), while +/// the others become read-only lower layers. This matches the OCI specification where: +/// - The top layer (upperdir) handles all modifications +/// - Lower layers provide the base content +/// - Changes in the top layer shadow content in lower layers +/// +/// ## Layer Behavior +/// +/// - All write operations occur in the top layer +/// - When reading, the top layer takes precedence over lower layers +/// - Whiteout files in the top layer hide files from lower layers +/// - Opaque directory markers completely mask lower layer directory contents +/// - It is undefined behavior for whiteouts and their corresponding entries to exist at the same level in the same directory. +/// For example, looking up such entry can result in different behavior depending on which is found first. +/// The filesystem will try to prevent adding whiteout entries directly. +/// +/// TODO: Need to implement entry caching to improve the performance of [`Self::lookup_segment_by_segment`]. +pub struct OverlayFs { + /// Map of inodes by ID and alternative keys. The alternative keys allow looking up inodes by their + /// underlying host filesystem inode number, device ID and mount ID. + inodes: RwLock>>, + + /// Counter for generating the next inode ID. Each new inode gets a unique ID from this counter. + next_inode: AtomicU64, + + /// The initial inode ID (typically 1 for the root directory) + init_inode: u64, + + /// Map of open file handles by ID. Each open file gets a unique handle ID that maps to the + /// underlying file descriptor and associated data. + handles: RwLock>>, + + /// Counter for generating the next handle ID. Each new file handle gets a unique ID from this counter. + next_handle: AtomicU64, + + /// The initial handle ID + init_handle: u64, + + /// File descriptor pointing to the `/proc/self/fd` directory. This is used to convert an fd from + /// `inodes` into one that can go into `handles`. This is accomplished by reading the + /// `/proc/self/fd/{}` symlink. + proc_self_fd: File, + + /// Whether writeback caching is enabled for this directory. This will only be true when + /// `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`. + writeback: AtomicBool, + + /// Whether to announce submounts. When true, the filesystem will report when directories are + /// mount points for other filesystems. + announce_submounts: AtomicBool, + + /// The UID of the process if it doesn't have CAP_SETUID capability, None otherwise. + /// Used to restrict UID changes to privileged processes. + my_uid: Option, + + /// The GID of the process if it doesn't have CAP_SETGID capability, None otherwise. + /// Used to restrict GID changes to privileged processes. + my_gid: Option, + + /// Whether the process has CAP_FOWNER capability. + cap_fowner: bool, + + /// Configuration options for the filesystem + config: Config, + + /// Symbol table for interned filenames to efficiently store and compare path components + filenames: Arc>, + + /// Root inodes for each layer, ordered from bottom to top. The last element is the upperdir + /// (writable layer) while all others are read-only lower layers. + layer_roots: Arc>>, +} + +/// Represents either a file or a path +enum FileOrPath { + /// A file + File(File), + + /// A path + Path(CString), +} + +/// Represents either a file descriptor or a path +enum FileId { + /// A file descriptor + Fd(RawFd), + + /// A path + Path(CString), +} + +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- + +impl ScopedGid { + fn new(gid: libc::gid_t) -> io::Result { + let res = unsafe { libc::syscall(libc::SYS_setresgid, -1, gid, -1) }; + if res != 0 { + return Err(io::Error::last_os_error()); + } + + Ok(Self {}) + } +} + +impl ScopedUid { + fn new(uid: libc::uid_t) -> io::Result { + let res = unsafe { libc::syscall(libc::SYS_setresuid, -1, uid, -1) }; + if res != 0 { + return Err(io::Error::last_os_error()); + } + + Ok(Self {}) + } +} + +impl InodeAltKey { + fn new(ino: libc::ino64_t, dev: libc::dev_t, mnt_id: u64) -> Self { + Self { ino, dev, mnt_id } + } +} + +impl OverlayFs { + /// Creates a new OverlayFs with the given layers + pub fn new(config: Config) -> io::Result { + if config.layers.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "at least one layer must be provided", + )); + } + + if config.layers.len() > MAX_LAYERS { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "maximum overlayfs layer count exceeded", + )); + } + + let mut next_inode = 1; + let mut inodes = MultikeyBTreeMap::new(); + + // Initialize the root inodes for all layers + let layer_roots = Self::init_root_inodes(&config.layers, &mut inodes, &mut next_inode)?; + + // Set the `init.krun` inode + let init_inode = next_inode; + next_inode += 1; + + // Get the file descriptor for /proc/self/fd + let proc_self_fd = if let Some(fd) = config.proc_sfd_rawfd { + fd + } else { + // Safe because this doesn't modify any memory and we check the return value. + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + PROC_SELF_FD_CSTR.as_ptr(), + libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + fd + }; + + // Get the UID of the process + let my_uid = if has_cap(None, CapSet::Effective, Capability::CAP_SETUID).unwrap_or_default() + { + None + } else { + // SAFETY: This syscall is always safe to call and always succeeds. + Some(unsafe { libc::getuid() }) + }; + + // Get the GID of the process + let my_gid = if has_cap(None, CapSet::Effective, Capability::CAP_SETGID).unwrap_or_default() + { + None + } else { + // SAFETY: This syscall is always safe to call and always succeeds. + Some(unsafe { libc::getgid() }) + }; + + let cap_fowner = + has_cap(None, CapSet::Effective, Capability::CAP_FOWNER).unwrap_or_default(); + + // SAFETY: We just opened this fd or it was provided by our caller. + let proc_self_fd = unsafe { File::from_raw_fd(proc_self_fd) }; + + Ok(OverlayFs { + inodes: RwLock::new(inodes), + next_inode: AtomicU64::new(next_inode), + init_inode, + handles: RwLock::new(BTreeMap::new()), + next_handle: AtomicU64::new(1), + init_handle: 0, + proc_self_fd, + writeback: AtomicBool::new(false), + announce_submounts: AtomicBool::new(false), + my_uid, + my_gid, + cap_fowner, + config, + filenames: Arc::new(RwLock::new(SymbolTable::new())), + layer_roots: Arc::new(RwLock::new(layer_roots)), + }) + } + + /// Initialize root inodes for all layers + /// + /// This function processes layers from top to bottom, creating root inodes for each layer. + /// + /// Parameters: + /// - layers: Slice of paths to the layer roots, ordered from bottom to top + /// - inodes: Mutable reference to the inodes map to populate + /// - next_inode: Mutable reference to the next inode counter + /// + /// Returns: + /// - io::Result> containing the root inodes for each layer + fn init_root_inodes( + layers: &[PathBuf], + inodes: &mut MultikeyBTreeMap>, + next_inode: &mut u64, + ) -> io::Result> { + // Pre-allocate layer_roots with the right size + let mut layer_roots = vec![0; layers.len()]; + + // Process layers from top to bottom + for (i, layer_path) in layers.iter().enumerate().rev() { + let layer_idx = i; // Layer index from bottom to top + + // Get the stat information for this layer's root + let c_path = CString::new(layer_path.to_string_lossy().as_bytes())?; + + // Open the directory + let file = Self::open_path_file(&c_path)?; + + // Get statx information + let (st, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + // Create the alt key for this inode + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev, mnt_id); + + // Create the inode data + let inode_id = *next_inode; + *next_inode += 1; + + let inode_data = Arc::new(InodeData { + inode: inode_id, + file, + dev: st.st_dev, + mnt_id, + refcount: AtomicU64::new(1), + path: vec![], + layer_idx, + }); + + // Insert the inode into the map + inodes.insert(inode_id, alt_key, inode_data); + + // Store the root inode for this layer + layer_roots[layer_idx] = inode_id; + } + + Ok(layer_roots) + } + + /// Opens a file without following symlinks. + fn open_file(path: &CStr, flags: i32) -> io::Result { + let fd = unsafe { libc::open(path.as_ptr(), flags | libc::O_NOFOLLOW, 0) }; + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Opens a file relative to a parent without following symlinks. + fn open_file_at(parent: RawFd, name: &CStr, flags: i32) -> io::Result { + let fd = unsafe { libc::openat(parent, name.as_ptr(), flags | libc::O_NOFOLLOW, 0) }; + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Opens a path as an O_PATH file. + fn open_path_file(path: &CStr) -> io::Result { + Self::open_file(path, libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC) + } + + /// Opens a path relative to a parent as an O_PATH file. + fn open_path_file_at(parent: RawFd, name: &CStr) -> io::Result { + Self::open_file_at( + parent, + name, + libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC, + ) + } + + /// Performs a statx syscall without any modifications to the returned stat structure. + fn statx(fd: RawFd, name: Option<&CStr>) -> io::Result<(libc::stat64, u64)> { + let mut stx = MaybeUninit::::zeroed(); + let res = unsafe { + libc::statx( + fd, + name.unwrap_or(&*EMPTY_CSTR).as_ptr(), + libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, + libc::STATX_BASIC_STATS | libc::STATX_MNT_ID, + stx.as_mut_ptr(), + ) + }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because the kernel guarantees that the struct is now fully initialized. + let stx = unsafe { stx.assume_init() }; + + // Unfortunately, we cannot use an initializer to create the stat64 object, + // because it may contain padding and reserved fields (depending on the + // architecture), and it does not implement the Default trait. + // So we take a zeroed struct and set what we can. (Zero in all fields is + // wrong, but safe.) + let mut st = unsafe { MaybeUninit::::zeroed().assume_init() }; + + st.st_dev = libc::makedev(stx.stx_dev_major, stx.stx_dev_minor); + st.st_ino = stx.stx_ino; + st.st_mode = stx.stx_mode as _; + st.st_nlink = stx.stx_nlink as _; + st.st_uid = stx.stx_uid; + st.st_gid = stx.stx_gid; + st.st_rdev = libc::makedev(stx.stx_rdev_major, stx.stx_rdev_minor); + st.st_size = stx.stx_size as _; + st.st_blksize = stx.stx_blksize as _; + st.st_blocks = stx.stx_blocks as _; + st.st_atime = stx.stx_atime.tv_sec; + st.st_atime_nsec = stx.stx_atime.tv_nsec as _; + st.st_mtime = stx.stx_mtime.tv_sec; + st.st_mtime_nsec = stx.stx_mtime.tv_nsec as _; + st.st_ctime = stx.stx_ctime.tv_sec; + st.st_ctime_nsec = stx.stx_ctime.tv_nsec as _; + + Ok((st, stx.stx_mnt_id)) + } + + /// Turns an inode data into a file descriptor string. + fn data_to_fd_str(data: &InodeData) -> io::Result { + let fd = format!("{}", data.file.as_raw_fd()); + CString::new(fd).map_err(|_| einval()) + } + + /// Turns an inode data into a path. + fn data_to_path(data: &InodeData) -> io::Result { + let path = format!("/proc/self/fd/{}", data.file.as_raw_fd()); + CString::new(path).map_err(|_| einval()) + } + + /// Turns an inode into an opened file. + fn open_inode(&self, inode: Inode, mut flags: i32) -> io::Result { + let data = self.get_inode_data(inode)?; + let fd_str = Self::data_to_fd_str(&data)?; + + // When writeback caching is enabled, the kernel may send read requests even if the + // userspace program opened the file write-only. So we need to ensure that we have opened + // the file for reading as well as writing. + let writeback = self.writeback.load(Ordering::Relaxed); + if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY { + flags &= !libc::O_ACCMODE; + flags |= libc::O_RDWR; + } + + // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`. + // However, this breaks atomicity as the file may have changed on disk, invalidating the + // cached copy of the data in the kernel and the offset that the kernel thinks is the end of + // the file. Just allow this for now as it is the user's responsibility to enable writeback + // caching only for directories that are not shared. It also means that we need to clear the + // `O_APPEND` flag. + if writeback && flags & libc::O_APPEND != 0 { + flags &= !libc::O_APPEND; + } + + // If the file is a symlink, just clone existing file. + if data.file.metadata()?.is_symlink() { + return Ok(data.file.try_clone()?); + } + + // Safe because this doesn't modify any memory and we check the return value. We don't + // really check `flags` because if the kernel can't handle poorly specified flags then we + // have much bigger problems. + // + // It is safe to follow here since symlinks are returned early as O_PATH files. + let fd = unsafe { + libc::openat( + self.proc_self_fd.as_raw_fd(), + fd_str.as_ptr(), + flags | libc::O_CLOEXEC & (!libc::O_NOFOLLOW), + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Turns an inode into an opened file or a path. + fn open_inode_or_path(&self, inode: Inode, flags: i32) -> io::Result { + match self.open_inode(inode, flags) { + Ok(file) => Ok(FileOrPath::File(file)), + Err(e) if e.raw_os_error() == Some(libc::ELOOP) => { + let data = self.get_inode_data(inode)?; + let path = Self::data_to_path(&data)?; + Ok(FileOrPath::Path(path)) + } + Err(e) => Err(e), + } + } + + pub fn get_config(&self) -> &Config { + &self.config + } + + pub fn get_filenames(&self) -> &Arc> { + &self.filenames + } + + fn get_layer_root(&self, layer_idx: usize) -> io::Result> { + let layer_roots = self.layer_roots.read().unwrap(); + + // Check if the layer index is valid + if layer_idx >= layer_roots.len() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "layer index out of bounds", + )); + } + + // Get the inode for this layer + let inode = layer_roots[layer_idx]; + if inode == 0 { + return Err(io::Error::new(io::ErrorKind::NotFound, "layer not found")); + } + + // Get the inode data + self.get_inode_data(inode) + } + + /// Creates a new inode and adds it to the inode map + fn create_inode( + &self, + file: File, + ino: libc::ino64_t, + dev: libc::dev_t, + mnt_id: u64, + path: Vec, + layer_idx: usize, + ) -> (Inode, Arc) { + let inode = self.next_inode.fetch_add(1, Ordering::SeqCst); + + let data = Arc::new(InodeData { + inode, + file, + dev, + mnt_id, + refcount: AtomicU64::new(1), + path, + layer_idx, + }); + + let alt_key = InodeAltKey::new(ino, dev, mnt_id); + self.inodes + .write() + .unwrap() + .insert(inode, alt_key, data.clone()); + + (inode, data) + } + + /// Creates an Entry from stat information and inode data + fn create_entry(&self, inode: Inode, st: bindings::stat64) -> Entry { + Entry { + inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + } + } + + fn create_whiteout_path(&self, name: &CStr) -> io::Result { + let name_str = name.to_str().map_err(|_| einval())?; + let whiteout_path = format!("{WHITEOUT_PREFIX}{name_str}"); + CString::new(whiteout_path).map_err(|_| einval()) + } + + /// Checks for whiteout file in top layer + fn check_whiteout(&self, parent: RawFd, name: &CStr) -> io::Result { + let whiteout_cpath = self.create_whiteout_path(name)?; + + match Self::statx(parent, Some(&whiteout_cpath)) { + Ok(_) => { + Ok(true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Ok(false) + } + Err(e) => { + Err(e) + } + } + } + + /// Checks for an opaque directory marker in the given parent directory path. + fn check_opaque_marker(&self, parent: RawFd) -> io::Result { + let opaque_cpath = CString::new(OPAQUE_MARKER).map_err(|_| einval())?; + + match Self::statx(parent, Some(&opaque_cpath)) { + Ok(_) => { + Ok(true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + Ok(false) + } + Err(e) => { + Err(e) + } + } + } + + /// Interns a name and returns the corresponding Symbol + fn intern_name(&self, name: &CStr) -> io::Result { + // Clone the name to avoid lifetime issues + let name_to_intern = CString::new(name.to_bytes()).map_err(|_| einval())?; + + // Get a write lock to intern it + let mut filenames = self.filenames.write().unwrap(); + filenames.intern(name_to_intern).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to intern filename: {}", e), + ) + }) + } + + /// Gets the InodeData for an inode + pub(super) fn get_inode_data(&self, inode: Inode) -> io::Result> { + self.inodes + .read() + .unwrap() + .get(&inode) + .cloned() + .ok_or_else(ebadf) + } + + /// Gets the HandleData for a handle + pub(super) fn get_inode_handle_data( + &self, + inode: Inode, + handle: Handle, + ) -> io::Result> { + self.handles + .read() + .unwrap() + .get(&handle) + .filter(|hd| hd.inode == inode) + .cloned() + .ok_or_else(ebadf) + } + + fn get_top_layer_idx(&self) -> usize { + self.layer_roots.read().unwrap().len() - 1 + } + + fn bump_refcount(&self, inode: Inode) { + let inodes = self.inodes.write().unwrap(); + let inode_data = inodes.get(&inode).unwrap(); + inode_data.refcount.fetch_add(1, Ordering::SeqCst); + } + + /// Validates a name to prevent path traversal attacks and special overlay markers + /// + /// This function checks if a name contains: + /// - Path traversal sequences like ".." + /// - Other potentially dangerous patterns like slashes + /// - Whiteout markers (.wh. prefix) + /// - Opaque directory markers (.wh..wh..opq) + /// + /// Returns: + /// - Ok(()) if the name is safe + /// - Err(io::Error) if the name contains invalid patterns + fn validate_name(name: &CStr) -> io::Result<()> { + let name_bytes = name.to_bytes(); + + // Check for empty name + if name_bytes.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "empty name is not allowed", + )); + } + + // Check for path traversal sequences + if name_bytes == b".." || name_bytes.contains(&b'/') || name_bytes.contains(&b'\\') { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "path traversal attempt detected", + )); + } + + // Check for null bytes + if name_bytes.contains(&0) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains null bytes", + )); + } + + // Convert to str for string pattern matching + let name_str = match std::str::from_utf8(name_bytes) { + Ok(s) => s, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains invalid UTF-8", + )) + } + }; + + // Check for whiteout prefix + if name_str.starts_with(".wh.") { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot start with whiteout prefix", + )); + } + + // Check for opaque marker + if name_str == ".wh..wh..opq" { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot be an opaque directory marker", + )); + } + + Ok(()) + } + + /// Looks up a path segment by segment in a given layer + /// + /// This function traverses a path one segment at a time within a specific layer, + /// handling whiteouts and opaque markers along the way. + /// + /// ### Arguments + /// * `layer_root` - Root inode data for the layer being searched + /// * `path_segments` - Path components to traverse, as interned symbols + /// * `path_inodes` - Vector to store inode data for each path segment traversed + /// + /// # Return Value + /// Returns `Option>` where: + /// - `Some(Ok(stat))` - Successfully found the file/directory and retrieved its stats + /// - `Some(Err(e))` - Encountered an error during lookup that should be propagated: + /// - If error is `NotFound`, caller should try next layer + /// - For any other IO error, caller should stop searching entirely + /// - `None` - Stop searching lower layers because either: + /// - Found a whiteout file for this path (file was deleted in this layer) + /// - Found an opaque directory marker (directory contents are masked in this layer) + /// + /// # Example Return Flow + /// 1. If path exists: `Some(Ok(stat))` + /// 2. If path has whiteout: `None` + /// 3. If path not found: `Some(Err(NotFound))` + /// 4. If directory has opaque marker: `None` + /// 5. If IO error occurs: `Some(Err(io_error))` + /// + /// # Side Effects + /// - Creates inodes for each path segment if they don't already exist + /// - Updates path_inodes with inode data for each segment traversed + /// - Increments reference counts for existing inodes that are reused + /// + /// # Path Resolution + /// For a path like "foo/bar/baz", the function: + /// 1. Starts at layer_root + /// 2. Looks up "foo", checking for whiteouts/opaque markers + /// 3. If "foo" exists, creates/reuses its inode and adds to path_inodes + /// 4. Repeats for "bar" and "baz" + /// 5. Returns stats for "baz" if found + fn lookup_segment_by_segment( + &self, + layer_root: &Arc, + path_segments: &[Symbol], + path_inodes: &mut Vec>, + ) -> Option> { + let mut opaque_marker_found = false; + + // Start from layer root + let root_file = match layer_root.file.try_clone() { + Ok(file) => file, + Err(e) => { + return Some(Err(e)); + } + }; + + // Set current. + let mut current = match Self::statx(root_file.as_raw_fd(), None) { + Ok((stat, mnt_id)) => (root_file, stat, mnt_id), + Err(e) => return Some(Err(e)), + }; + + // Traverse each path segment + for (depth, segment) in path_segments.iter().enumerate() { + // Get the current segment name and parent vol path + let filenames = self.filenames.read().unwrap(); + let segment_name = filenames.get(*segment).unwrap(); + + // Check for whiteout at current level + match self.check_whiteout(current.0.as_raw_fd(), segment_name) { + Ok(true) => { + return None; // Found whiteout, stop searching + } + Ok(false) => (), // No whiteout, continue + Err(e) => { + return Some(Err(e)); + } + } + + // Check for opaque marker at current level + match self.check_opaque_marker(current.0.as_raw_fd()) { + Ok(true) => { + opaque_marker_found = true; + } + Ok(false) => (), + Err(e) => { + return Some(Err(e)); + } + } + + let segment_name = segment_name.to_owned(); + + drop(filenames); // Now safe to drop filenames lock + + match Self::statx(current.0.as_raw_fd(), Some(&segment_name)) { + Ok((st, mnt_id)) => { + // Open the current segment + let new_file = + match Self::open_path_file_at(current.0.as_raw_fd(), &segment_name) { + Ok(file) => { + file + } + Err(e) => { + return Some(Err(e)); + } + }; + + // Update parent for next iteration + current = match new_file.try_clone() { + Ok(file) => (file, st, mnt_id), + Err(e) => { + return Some(Err(e)); + } + }; + + // Create or get inode for this path segment + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev, mnt_id); + let inode_data = { + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + data.clone() + } else { + drop(inodes); // Drop read lock before write lock + + let mut path = path_inodes[depth].path.clone(); + path.push(*segment); + + // Safe because we just opened this fd. + let (_, data) = self.create_inode( + new_file, + st.st_ino, + st.st_dev, + mnt_id, + path, + layer_root.layer_idx, + ); + + data + } + }; + + // Update path_inodes with the current segment's inode data + if (depth + 1) >= path_inodes.len() { + // Haven't seen this depth before, append + path_inodes.push(inode_data); + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound && opaque_marker_found => { + // For example, for a lookup of /foo/bar/baz, where /foo/bar has an opaque marker, + // then if we cannot find /foo/bar/baz in the current layer, we cannot find it + // in any other layer as /foo/bar is masked. + return None; + } + Err(e) => { + return Some(Err(e)); + } + } + } + + Some(Ok(current)) + } + + /// Looks up a file or directory entry across multiple filesystem layers. + /// + /// This function starts from the specified upper layer (given by start_layer_idx) and searches downwards + /// through the layers to locate the file represented by the provided path segments (an interned path). + /// At each layer, it calls lookup_segment_by_segment to traverse the path step by step while handling + /// whiteout files and opaque directory markers. If an entry is found in a layer, the function returns + /// an Entry structure containing the file metadata along with a vector of InodeData for each path segment traversed. + /// + /// ## Arguments + /// + /// * `start_layer_idx` - The index of the starting layer (from the topmost, which may be the writable layer). + /// * `path_segments` - A slice of interned symbols representing the path components to traverse. + /// + /// ## Returns + /// + /// On success, returns a tuple containing: + /// - An Entry representing the located file or directory along with its attributes. + /// - A vector of Arc corresponding to the inodes for each traversed path segment. + /// + /// ## Errors + /// + /// Returns an io::Error if: + /// - The file is not found in any layer (ENOENT), or + /// - An error occurs during the lookup process in one of the layers. + fn lookup_layer_by_layer<'a>( + &'a self, + start_layer_idx: usize, + path_segments: &[Symbol], + ) -> io::Result<(Entry, Arc, Vec>)> { + let mut path_inodes = vec![]; + + // Start from the start_layer_idx and try each layer down to layer 0 + for layer_idx in (0..=start_layer_idx).rev() { + let layer_root = self.get_layer_root(layer_idx)?; + + // If path_inodes has only the root inode or is empty, we need to restart the lookup with the new layer root. + if path_inodes.len() < 2 { + path_inodes = vec![layer_root.clone()]; + } + + match self.lookup_segment_by_segment(&layer_root, &path_segments, &mut path_inodes) { + Some(Ok((file, st, mnt_id))) => { + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev, mnt_id); + + // Check if we already have this inode + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + return Ok((self.create_entry(data.inode, st), data.clone(), path_inodes)); + } + + drop(inodes); + + // Open the path + let path = path_segments.to_vec(); + + // Create new inode + let (inode, data) = + self.create_inode(file, st.st_ino, st.st_dev, mnt_id, path, layer_idx); + path_inodes.push(data.clone()); + + return Ok((self.create_entry(inode, st), data, path_inodes)); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + // Continue to check lower layers + continue; + } + Some(Err(e)) => { + return Err(e); + } + None => { + // Hit a whiteout or opaque marker, stop searching lower layers + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + } + } + + // Not found in any layer + Err(io::Error::from_raw_os_error(libc::ENOENT)) + } + + /// Performs a lookup operation + pub(crate) fn do_lookup( + &self, + parent: Inode, + name: &CStr, + ) -> io::Result<(Entry, Vec>)> { + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Create path segments for lookup by appending the new name + let mut path_segments = parent_data.path.clone(); + let symbol = self.intern_name(name)?; + path_segments.push(symbol); + + let (mut entry, child_data, path_inodes) = + self.lookup_layer_by_layer(parent_data.layer_idx, &path_segments)?; + + // Set the submount flag if the endirectory is a mount point + let mut attr_flags = 0; + if (entry.attr.st_mode & libc::S_IFMT) == libc::S_IFDIR + && self.announce_submounts.load(Ordering::Relaxed) + && (child_data.dev != parent_data.dev || child_data.mnt_id != parent_data.mnt_id) + { + attr_flags |= fuse::ATTR_SUBMOUNT; + } + + entry.attr_flags = attr_flags; + + Ok((entry, path_inodes)) + } + + /// Copies up a file or directory from a lower layer to the top layer + pub(crate) fn copy_up(&self, path_inodes: &[Arc]) -> io::Result<()> { + // Get the top layer root + let top_layer_idx = self.get_top_layer_idx(); + let top_layer_root = self.get_layer_root(top_layer_idx)?; + + // Start from root and copy up each segment that's not in the top layer + let mut parent = top_layer_root.file.try_clone()?; + + // Skip the root inode + for inode_data in path_inodes.iter().skip(1) { + // Skip if this segment is already in the top layer + if inode_data.layer_idx == top_layer_idx { + parent = inode_data.file.try_clone()?; + continue; + } + + // Get the current segment name + let segment_name = { + let name = inode_data.path.last().unwrap(); + let filenames = self.filenames.read().unwrap(); + filenames.get(*name).unwrap().to_owned() + }; + + let (src_stat, _) = Self::statx(inode_data.file.as_raw_fd(), None)?; + let file_type = src_stat.st_mode & libc::S_IFMT; + + // Copy up the file + match file_type { + libc::S_IFREG => { + // Open source file with O_RDONLY + let src_file = self.open_inode(inode_data.inode, libc::O_RDONLY)?; + + // Open destination file with O_WRONLY | O_CREAT + let dst_file = Self::open_file_at( + parent.as_raw_fd(), + &segment_name, + libc::O_WRONLY | libc::O_CREAT, + )?; + + // Try to use FICLONE ioctl for CoW copying first (works on modern Linux filesystems like Btrfs, XFS, etc.) + let result = unsafe { + libc::ioctl(dst_file.as_raw_fd(), FICLONE as _, src_file.as_raw_fd()) + }; + + if result < 0 { + debug!("FICLONE failed, falling back to regular copy"); + let err = io::Error::last_os_error(); + // If FICLONE fails (e.g., across filesystems), fall back to regular copy + if err.raw_os_error() == Some(libc::EXDEV) + || err.raw_os_error() == Some(libc::EINVAL) + || err.raw_os_error() == Some(libc::ETXTBSY) + || err.raw_os_error() == Some(libc::EOPNOTSUPP) + { + // Fall back to regular copy + self.copy_file_contents( + src_file.as_raw_fd(), + dst_file.as_raw_fd(), + (src_stat.st_mode & 0o777) as u32, + )?; + } else { + return Err(err); + } + } + } + libc::S_IFDIR => { + // Directory: just create it with the same permissions + unsafe { + if libc::mkdirat( + parent.as_raw_fd(), + segment_name.as_ptr(), + src_stat.st_mode & 0o777, + ) < 0 + { + return Err(io::Error::last_os_error()); + } + } + } + libc::S_IFLNK => { + // Symbolic link: read target and recreate link + let mut buf = vec![0u8; libc::PATH_MAX as usize]; + let len = unsafe { + libc::readlinkat( + inode_data.file.as_raw_fd(), + EMPTY_CSTR.as_ptr(), + buf.as_mut_ptr() as *mut _, + buf.len(), + ) + }; + + if len < 0 { + return Err(io::Error::last_os_error()); + } + + buf.truncate(len as usize); + + unsafe { + if libc::symlinkat( + buf.as_ptr() as *const _, + parent.as_raw_fd(), + segment_name.as_ptr(), + ) < 0 + { + return Err(io::Error::last_os_error()); + } + + if libc::fchmodat( + parent.as_raw_fd(), + segment_name.as_ptr(), + src_stat.st_mode & 0o777, + 0, + ) < 0 + { + return Err(io::Error::last_os_error()); + } + } + } + _ => { + // Other types (devices, sockets, etc.) are not supported yet. + return Err(io::Error::new( + io::ErrorKind::Unsupported, + "unsupported file type for copy up", + )); + } + } + + // Update parent for next iteration + let child = Self::open_path_file_at(parent.as_raw_fd(), &segment_name)?; + let (new_stat, new_mnt_id) = Self::statx(child.as_raw_fd(), None)?; + parent = child.try_clone()?; + + // Update the inode entry to point to the new copy in the top layer + let alt_key = InodeAltKey::new(new_stat.st_ino, new_stat.st_dev, new_mnt_id); + let mut inodes = self.inodes.write().unwrap(); + + // Create new inode data with updated dev/ino/layer_idx but same refcount + let new_data = Arc::new(InodeData { + inode: inode_data.inode, + file: child, + dev: new_stat.st_dev, + mnt_id: new_mnt_id, + refcount: AtomicU64::new(inode_data.refcount.load(Ordering::SeqCst)), + path: inode_data.path.clone(), + layer_idx: top_layer_idx, + }); + + // Replace the old entry with the new one + inodes.insert(inode_data.inode, alt_key, new_data); + } + + Ok(()) + } + + /// Helper method to copy file contents when clonefile is not available or fails + fn copy_file_contents(&self, src_fd: RawFd, dst_fd: RawFd, mode: u32) -> io::Result<()> { + unsafe { + // Copy file contents + let mut buf = [0u8; 8192]; + loop { + let n_read = libc::read(src_fd, buf.as_mut_ptr() as *mut _, buf.len()); + if n_read <= 0 { + break; + } + let mut pos = 0; + while pos < n_read { + let n_written = libc::write( + dst_fd, + buf.as_ptr().add(pos as usize) as *const _, + (n_read - pos) as usize, + ); + if n_written <= 0 { + return Err(io::Error::last_os_error()); + } + pos += n_written; + } + } + + // Explicitly set permissions to match source file + // This will override any effects from the umask + if libc::fchmod(dst_fd, mode as libc::mode_t) < 0 { + return Err(io::Error::last_os_error()); + } + } + + Ok(()) + } + + /// Ensures the file is in the top layer by copying it up if necessary. + /// + /// This function: + /// 1. Checks if the file is already in the top layer + /// 2. If not, looks up the complete path to the file + /// 3. Copies the file and all its parent directories to the top layer + /// 4. Returns the inode data for the copied file + /// + /// ### Arguments + /// * `inode_data` - The inode data for the file to copy up + /// + /// ### Returns + /// * `Ok(InodeData)` - The inode data for the file in the top layer + /// * `Err(io::Error)` - If the copy-up operation fails + fn ensure_top_layer(&self, inode_data: Arc) -> io::Result> { + let top_layer_idx = self.get_top_layer_idx(); + + // If already in top layer, return early + if inode_data.layer_idx == top_layer_idx { + return Ok(inode_data); + } + + // Build the path segments + let path_segments = inode_data.path.clone(); + + // Lookup the file to get all path inodes + let (_, _, path_inodes) = self.lookup_layer_by_layer(top_layer_idx, &path_segments)?; + + // Copy up the file + self.copy_up(&path_inodes)?; + + // Get the inode data for the copied file + self.get_inode_data(inode_data.inode) + } + + /// Creates a whiteout file for a given parent directory and name. + /// This is used to hide files that exist in lower layers. + /// + /// # Arguments + /// * `parent` - The inode of the parent directory + /// * `name` - The name of the file to create a whiteout for + /// + /// # Returns + /// * `Ok(())` if the whiteout was created successfully + /// * `Err(io::Error)` if there was an error creating the whiteout + fn create_whiteout_for_lower(&self, parent: Inode, name: &CStr) -> io::Result<()> { + if let Ok((_, mut path_inodes)) = self.do_lookup(parent, name) { + // Copy up the parent directory if needed + path_inodes.pop(); + self.copy_up(&path_inodes)?; + let parent_fd = self.get_inode_data(parent)?.file.as_raw_fd(); + + let whiteout_cpath = self.create_whiteout_path(name)?; + let fd = unsafe { + libc::openat( + parent_fd, + whiteout_cpath.as_ptr(), + libc::O_CREAT | libc::O_WRONLY | libc::O_EXCL | libc::O_NOFOLLOW, + 0o000, // Whiteout files have no permissions + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + unsafe { libc::close(fd) }; + } + + Ok(()) + } + + /// Temporarily changes the effective UID and GID of the current thread to the requested values using RAII guards. + /// + /// If the requested UID or GID is 0 (root) or already matches the current effective UID/GID (as stored in my_uid and my_gid), + /// no credential switching is performed and None is returned for that component. + /// + /// When credential switching is performed, an RAII guard (ScopedUid or ScopedGid) is returned that will restore the + /// effective UID or GID to root (0) when dropped. If the process lacks the required capability (CAP_SETUID or CAP_SETGID) + /// and the requested UID/GID does not match the current credentials, the function returns an EPERM error. + /// + /// # Arguments + /// * `uid` - The requested user ID to switch to. + /// * `gid` - The requested group ID to switch to. + /// + /// # Returns + /// A tuple `(Option, Option)` where: + /// - `Option` is Some if the effective UID was changed, or None if no change was needed. + /// - `Option` is Some if the effective GID was changed, or None if no change was needed. + /// + /// # Errors + /// Returns EPERM if the process lacks the required capability to change to a non-matching UID or GID. + fn set_scoped_credentials( + &self, + uid: libc::uid_t, + gid: libc::gid_t, + ) -> io::Result<(Option, Option)> { + // Handle GID changes first since changing UID to non-root may prevent GID changes + let scoped_gid = if gid == 0 || self.my_gid == Some(gid) { + // If the requested GID is 0 (root) or matches our current GID, + // no credential switching is needed. + None + } else if self.my_gid.is_some() { + // Process doesn't have CAP_SETGID capability and the requested GID + // does not match our current GID, so we cannot switch. + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } else { + // Process has CAP_SETGID capability, attempt to switch to the requested GID + Some(ScopedGid::new(gid)?) + }; + + // Handle UID changes after GID + let scoped_uid = if uid == 0 || self.my_uid == Some(uid) { + // If the requested UID is 0 (root) or matches our current UID, + // no credential switching is needed. + None + } else if self.my_uid.is_some() { + // Process doesn't have CAP_SETUID capability and the requested UID + // does not match our current UID, so we cannot switch. + return Err(io::Error::from_raw_os_error(libc::EPERM)); + } else { + // Process has CAP_SETUID capability, attempt to switch to the requested UID + Some(ScopedUid::new(uid)?) + }; + + Ok((scoped_uid, scoped_gid)) + } + + /// Decrements the reference count for an inode and removes it if the count reaches zero + fn do_forget(&self, inode: Inode, count: u64) { + let mut inodes = self.inodes.write().unwrap(); + if let Some(data) = inodes.get(&inode) { + // Acquiring the write lock on the inode map prevents new lookups from incrementing the + // refcount but there is the possibility that a previous lookup already acquired a + // reference to the inode data and is in the process of updating the refcount so we need + // to loop here until we can decrement successfully. + loop { + let refcount = data.refcount.load(Ordering::Relaxed); + + // Saturating sub because it doesn't make sense for a refcount to go below zero and + // we don't want misbehaving clients to cause integer overflow. + let new_count = refcount.saturating_sub(count); + + if data + .refcount + .compare_exchange(refcount, new_count, Ordering::Release, Ordering::Relaxed) + .unwrap() + == refcount + { + if new_count == 0 { + // We just removed the last refcount for this inode. There's no need for an + // acquire fence here because we hold a write lock on the inode map and any + // thread that is waiting to do a forget on the same inode will have to wait + // until we release the lock. So there's is no other release store for us to + // synchronize with before deleting the entry. + inodes.remove(&inode); + } + break; + } + } + } + } + + /// Performs an open operation + fn do_open(&self, inode: Inode, mut flags: u32) -> io::Result<(Option, OpenOptions)> { + if !self.cap_fowner { + // O_NOATIME can only be used with CAP_FOWNER or if we are the file + // owner. Not worth checking the latter, just drop it if we don't + // have the cap. This makes overlayfs mounts with virtiofs lower dirs + // work. + flags &= !(libc::O_NOATIME as u32); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer + let inode_data = self.ensure_top_layer(inode_data)?; + + // Open the file with the appropriate flags and generate a new unique handle ID + let file = RwLock::new(self.open_inode(inode_data.inode, flags as i32)?); + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + + // Create handle data structure with file and empty dirstream + let data = HandleData { + inode, + file, + exported: Default::default(), + }; + + // Store the handle data in the handles map + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + // Set up OpenOptions based on the cache policy configuration + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + // For CachePolicy::Never, set DIRECT_IO to bypass kernel caching for files (not directories) + CachePolicy::Never => opts.set( + OpenOptions::DIRECT_IO, + flags & (libc::O_DIRECTORY as u32) == 0, + ), + + // For CachePolicy::Always, set different caching options based on whether it's a file or directory + CachePolicy::Always => { + if flags & (libc::O_DIRECTORY as u32) == 0 { + // For files: KEEP_CACHE maintains kernel cache between open/close operations + opts |= OpenOptions::KEEP_CACHE; + } else { + // For directories: CACHE_DIR enables caching of directory entries + opts |= OpenOptions::CACHE_DIR; + } + } + + // For CachePolicy::Auto, use default caching behavior + _ => {} + }; + + // Return the handle and options + Ok((Some(handle), opts)) + } + + /// Performs a release operation + fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> { + let mut handles = self.handles.write().unwrap(); + + if let btree_map::Entry::Occupied(e) = handles.entry(handle) { + if e.get().inode == inode { + if e.get().exported.load(Ordering::Relaxed) { + self.config + .export_table + .as_ref() + .unwrap() + .lock() + .unwrap() + .remove(&(self.config.export_fsid, handle)); + } + + // We don't need to close the file here because that will happen automatically when + // the last `Arc` is dropped. + e.remove(); + return Ok(()); + } + } + + Err(ebadf()) + } + + /// Performs a mkdir operation + fn do_mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Create the directory + let res = unsafe { libc::mkdirat(parent_fd, name.as_ptr(), mode & !umask) }; + if res == 0 { + let file = Self::open_path_file_at(parent_fd, name)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + /// Performs an unlink operation + fn do_unlink(&self, parent: Inode, name: &CStr, flags: libc::c_int) -> io::Result<()> { + let top_layer_idx = self.get_top_layer_idx(); + let (entry, _) = self.do_lookup(parent, name)?; + + // If the inode is in the top layer. the parent will also be in the top layer, we need to unlink it. + let entry_data = self.get_inode_data(entry.inode)?; + if entry_data.layer_idx == top_layer_idx { + let parent_fd = self.get_inode_data(parent)?.file.as_raw_fd(); + + // Remove the inode from the overlayfs + let res = unsafe { libc::unlinkat(parent_fd, name.as_ptr(), flags) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // If after an unlink, the entry still exists in a lower layer, we need to add a whiteout + self.create_whiteout_for_lower(parent, name)?; + + Ok(()) + } + + /// Returns an iterator over all valid entries in the directory across all layers. + /// + /// Note: OverlayFs is a high-level, layered filesystem. A simple readdir on a single directory does not produce the complete view. + /// This function traverses the directory across multiple layers, merging entries while handling duplicates, + /// whiteout files, and opaque markers. + /// + /// ## Arguments + /// * `dir` - The inode of the directory to iterate over. + /// * `add_entry` - A callback function that processes each directory entry. If the callback returns 0, + /// it signals that the directory buffer is full and iteration should stop. + /// + /// ## Returns + /// * `Ok(())` if the directory was iterated successfully. + /// * `Err(io::Error)` if an error occurred during iteration. + pub(super) fn process_dir_entries(&self, dir: Inode, mut add_entry: F) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + // Local state to track iteration over layers + struct LazyReaddirState { + current_layer: isize, // current layer (top-down) + inode_data: Option>, + current_iter: Option, + seen: HashSet>, + } + + let inode_data = self.get_inode_data(dir)?; + let top_layer = self.get_top_layer_idx() as isize; + let path = inode_data.path.clone(); + let mut state = LazyReaddirState { + current_layer: top_layer, + inode_data: None, + current_iter: None, + seen: HashSet::new(), + }; + + let mut current_offset = 0u64; + let mut opaque_marker_found = false; + loop { + // If no current iterator, attempt to initialize one for the current layer + if state.current_iter.is_none() { + if state.current_layer < 0 { + break; // All layers exhausted + } + + let layer_root = self.get_layer_root(state.current_layer as usize)?; + let mut path_inodes = vec![layer_root.clone()]; + + match self.lookup_segment_by_segment(&layer_root, &path, &mut path_inodes) { + Some(Ok(_)) => { + let last_inode = path_inodes.last().unwrap(); + let path = Self::data_to_path(last_inode)?; + let dir_str = path.as_c_str().to_str().map_err(|_| { + io::Error::new(io::ErrorKind::Other, "Invalid path string") + })?; + + state.inode_data = Some(last_inode.clone()); + state.current_iter = Some(std::fs::read_dir(dir_str)?); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + state.current_layer -= 1; + continue; + } + Some(Err(e)) => return Err(e), + None => { + state.current_layer = -1; + continue; + } + } + } + + if let Some(iter) = state.current_iter.as_mut() { + if let Some(entry_result) = iter.next() { + let entry = entry_result?; + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + + if state.seen.contains(name.as_bytes()) { + continue; + } + + // Handle opaque marker and whiteout files + if name_str == OPAQUE_MARKER { + // Opaque marker found; mark it and skip this entry + opaque_marker_found = true; + continue; + } else if name_str.starts_with(WHITEOUT_PREFIX) { + // Whiteout file; skip it + let actual = &name_str[WHITEOUT_PREFIX.len()..]; + state.seen.insert(actual.as_bytes().to_vec()); + continue; + } else { + state.seen.insert(name.as_bytes().to_vec()); + } + + let metadata = entry.metadata()?; + let mode = metadata.mode() as u32; + let s_ifmt = libc::S_IFMT as u32; + let type_ = if mode & s_ifmt == (libc::S_IFDIR as u32) { + libc::DT_DIR + } else if mode & s_ifmt == (libc::S_IFREG as u32) { + libc::DT_REG + } else if mode & s_ifmt == (libc::S_IFLNK as u32) { + libc::DT_LNK + } else if mode & s_ifmt == (libc::S_IFIFO as u32) { + libc::DT_FIFO + } else if mode & s_ifmt == (libc::S_IFCHR as u32) { + libc::DT_CHR + } else if mode & s_ifmt == (libc::S_IFBLK as u32) { + libc::DT_BLK + } else if mode & s_ifmt == (libc::S_IFSOCK as u32) { + libc::DT_SOCK + } else { + libc::DT_UNKNOWN + }; + + current_offset += 1; + + let dir_entry = DirEntry { + ino: metadata.ino(), + offset: current_offset, + type_: type_ as u32, + name: name.as_bytes(), + }; + + if add_entry(dir_entry)? == 0 { + return Ok(()); + } + } else { + state.current_iter = None; + if opaque_marker_found { + break; + } + state.current_layer -= 1; + continue; + } + } + } + + Ok(()) + } + + /// Reads directory entries for the given inode by merging entries from all underlying layers. + /// + /// Unlike conventional filesystems that simply call readdir on a directory file descriptor, + /// OverlayFs must aggregate entries from multiple layers. The `offset` parameter specifies the starting + /// index in the merged list of directory entries. The provided `add_entry` callback is invoked for each + /// entry; a return value of 0 indicates that the directory buffer is full and reading should cease. + /// + /// NOTE: The current implementation of offset does not entirely follow FUSE expected behaviors. + /// Changes to entries in the write layer can affect the offset, potentially causing inconsistencies + /// in directory listing between calls. + /// + /// TODO: Implement a more robust offset handling mechanism that maintains consistency even when + /// the underlying directory structure changes. One way is making offset a composite value of + /// layer (1 MSB) + offset (7 LSB). This will also require having multiple open dirs from lower layers + /// in [HandleData]. + pub(super) fn do_readdir( + &self, + inode: Inode, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + if size == 0 { + return Ok(()); + } + + let mut current_offset = 0u64; + self.process_dir_entries(inode, |entry| { + if current_offset < offset { + current_offset += 1; + return Ok(1); + } + + add_entry(entry) + }) + } + + fn do_create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. We don't + // really check `flags` because if the kernel can't handle poorly specified flags then we + // have much bigger problems. + let fd = unsafe { + libc::openat( + parent_fd, + name.as_ptr(), + flags as i32 | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + mode & !(umask & 0o777), + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + let (stat, mnt_id) = Self::statx(fd, None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created file + let file = unsafe { File::from_raw_fd(fd) }; + let (inode, _) = self.create_inode( + file.try_clone()?, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created file + let entry = self.create_entry(inode, stat); + + // Create the handle for the newly created file + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + let data = HandleData { + inode: entry.inode, + file: RwLock::new(file), + exported: Default::default(), + }; + + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, + CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, + _ => {} + }; + + Ok((entry, Some(handle), opts)) + } + + fn do_getattr(&self, inode: Inode) -> io::Result<(libc::stat64, Duration)> { + let fd = self.get_inode_data(inode)?.file.as_raw_fd(); + let (st, _) = Self::statx(fd, None)?; + + Ok((st, self.config.attr_timeout)) + } + + fn do_rename( + &self, + old_parent: Inode, + old_name: &CStr, + new_parent: Inode, + new_name: &CStr, + flags: u32, + ) -> io::Result<()> { + // Copy up the old path to the top layer if not already in the top layer + let (_, old_path_inodes) = self.do_lookup(old_parent, old_name)?; + self.copy_up(&old_path_inodes)?; + let old_parent_data = self.get_inode_data(old_parent)?; + + // Copy up the new parent to the top layer if not already in the top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; + + // Perform the rename + let res = unsafe { + #[cfg(any(target_env = "gnu", target_env = "musl"))] + { + libc::renameat2( + old_parent_data.file.as_raw_fd(), + old_name.as_ptr(), + new_parent_data.file.as_raw_fd(), + new_name.as_ptr(), + flags, + ) + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // After successful rename, check if we need to add a whiteout for the old path + self.create_whiteout_for_lower(old_parent, old_name)?; + + Ok(()) + } + + fn do_mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Create the node device + let res = unsafe { + libc::mknodat( + parent_fd, + name.as_ptr(), + (mode & !umask) as libc::mode_t, + u64::from(rdev), + ) + }; + + if res == 0 { + let file = Self::open_path_file_at(parent_fd, name)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + fn do_link(&self, inode: Inode, newparent: Inode, newname: &CStr) -> io::Result { + // Get the fd for the source file. + let inode_data = self.get_inode_data(inode)?; + + // Copy up the source file to the top layer if needed + let inode_data = self.ensure_top_layer(inode_data)?; + let old_fd_str = Self::data_to_fd_str(&inode_data)?; + + // Extraneous check to ensure the source file is not a symlink + let stat = Self::statx(inode_data.file.as_raw_fd(), None)?.0; + if stat.st_mode & libc::S_IFMT == libc::S_IFLNK { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot link to a symlink", + )); + } + + // Get and ensure new parent is in top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(newparent)?)?; + let new_parent_fd = new_parent_data.file.as_raw_fd(); + + // Safety: It is expected that old_fd_str has been checked by the kernel to not be a symlink. + let res = unsafe { + libc::linkat( + self.proc_self_fd.as_raw_fd(), + old_fd_str.as_ptr(), + new_parent_fd, + newname.as_ptr(), + libc::AT_SYMLINK_FOLLOW, // Follow is needed to handle /proc/self/fd/ symlink + ) + }; + + if res == 0 { + let file = Self::open_path_file_at(new_parent_fd, newname)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = new_parent_data.path.clone(); + path.push(self.intern_name(newname)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + new_parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + fn do_symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + if extensions.secctx.is_some() { + unimplemented!("SECURITY_CTX is not supported and should not be used by the guest"); + } + + // Set the credentials for the operation + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Ensure parent directory is in the top layer + let parent_data = self.get_inode_data(parent)?; + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the parent file descriptor + let parent_fd = parent_data.file.as_raw_fd(); + + // Create the node device + let res = unsafe { libc::symlinkat(linkname.as_ptr(), parent_fd, name.as_ptr()) }; + + if res == 0 { + let file = Self::open_path_file_at(parent_fd, name)?; + let (stat, mnt_id) = Self::statx(file.as_raw_fd(), None)?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + file, + stat.st_ino, + stat.st_dev, + mnt_id, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, stat); + + return Ok(entry); + } + + // Return the error + Err(io::Error::last_os_error()) + } + + fn do_readlink(&self, inode: Inode) -> io::Result> { + // Get the path for this inode + let inode_data = self.get_inode_data(inode)?; + + // Allocate a buffer for the link target + let mut buf = vec![0; libc::PATH_MAX as usize]; + + // Safe because this will only modify the contents of `buf` and we check the return value. + let res = unsafe { + libc::readlinkat( + inode_data.file.as_raw_fd(), + EMPTY_CSTR.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + buf.len(), + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Resize the buffer to the actual length of the link target + buf.resize(res as usize, 0); + Ok(buf) + } + + fn do_setxattr(&self, inode: Inode, name: &CStr, value: &[u8], flags: u32) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = + match self.open_inode_or_path(inode_data.inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { + libc::fsetxattr( + file.as_raw_fd(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + flags as libc::c_int, + ) + } + } + FileOrPath::Path(path) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { + libc::lsetxattr( + path.as_ptr(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + flags as libc::c_int, + ) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_getxattr(&self, inode: Inode, name: &CStr, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Don't allow getting attributes for init + if inode == self.init_inode { + return Err(io::Error::from_raw_os_error(libc::ENODATA)); + } + + // Safe because this will only modify the contents of `buf` + let mut buf = vec![0; size as usize]; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = match self.open_inode_or_path(inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::fgetxattr( + file.as_raw_fd(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + ) + } + } + FileOrPath::Path(path) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::lgetxattr( + path.as_ptr(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + ) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + if size == 0 { + Ok(GetxattrReply::Count(res as u32)) + } else { + // Truncate the buffer to the actual length of the value + buf.resize(res as usize, 0); + Ok(GetxattrReply::Value(buf)) + } + } + + fn do_listxattr(&self, inode: Inode, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Don't allow getting attributes for init + if inode == self.init_inode { + return Err(io::Error::from_raw_os_error(libc::ENODATA)); + } + + // Safe because this will only modify the contents of `buf` + let mut buf = vec![0; size as usize]; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = match self.open_inode_or_path(inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::flistxattr( + file.as_raw_fd(), + buf.as_mut_ptr() as *mut libc::c_char, + size as libc::size_t, + ) + } + } + FileOrPath::Path(path) => { + // Safe because this will only modify the contents of `buf`. + unsafe { + libc::llistxattr( + path.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + size as libc::size_t, + ) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + if size == 0 { + Ok(ListxattrReply::Count(res as u32)) + } else { + // Truncate the buffer to the actual length of the value + buf.resize(res as usize, 0); + Ok(ListxattrReply::Names(buf)) + } + } + + fn do_removexattr(&self, inode: Inode, name: &CStr) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // The f{set,get,remove,list}xattr functions don't work on an fd opened with `O_PATH` so we + // need to get a new fd. This doesn't work for symlinks, so we use the l* family of + // functions in that case. + let res = + match self.open_inode_or_path(inode_data.inode, libc::O_RDONLY | libc::O_NONBLOCK)? { + FileOrPath::File(file) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { libc::fremovexattr(file.as_raw_fd(), name.as_ptr()) } + } + FileOrPath::Path(path) => { + // Safe because this doesn't modify any memory and we check the return value. + unsafe { libc::lremovexattr(path.as_ptr(), name.as_ptr()) } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_fallocate( + &self, + inode: Inode, + handle: Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::fallocate64( + fd, + mode as libc::c_int, + offset as libc::off64_t, + length as libc::off64_t, + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_lseek(&self, inode: Inode, handle: Handle, offset: u64, whence: u32) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lseek64(fd, offset as libc::off64_t, whence as libc::c_int) }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(res as u64) + } + + fn do_copyfilerange( + &self, + inode_in: Inode, + handle_in: Handle, + offset_in: u64, + inode_out: Inode, + handle_out: Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + let data_in = self.get_inode_handle_data(inode_in, handle_in)?; + let data_out = self.get_inode_handle_data(inode_out, handle_out)?; + let fd_in = data_in.file.write().unwrap().as_raw_fd(); + let fd_out = data_out.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::copy_file_range( + fd_in, + &mut (offset_in as i64) as &mut _ as *mut _, + fd_out, + &mut (offset_out as i64) as &mut _ as *mut _, + len.try_into().unwrap(), + flags.try_into().unwrap(), + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(res as usize) + } + + fn do_setupmapping( + &self, + inode: Inode, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + let open_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::O_RDWR + } else { + libc::O_RDONLY + }; + + let prot_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + + if (moffset + len) > shm_size { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + let addr = host_shm_base + moffset; + + if inode == self.init_inode { + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_PRIVATE | libc::MAP_ANONYMOUS | libc::MAP_FIXED, + -1, + 0, + ) + }; + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + let to_copy = if len as usize > INIT_BINARY.len() { + INIT_BINARY.len() + } else { + len as usize + }; + + unsafe { + libc::memcpy( + addr as *mut libc::c_void, + INIT_BINARY.as_ptr() as *const _, + to_copy, + ) + }; + + return Ok(()); + } + + // Ensure the inode is in the top layer + let inode_data = self.get_inode_data(inode)?; + let inode_data = self.ensure_top_layer(inode_data)?; + + let file = self.open_inode(inode_data.inode, open_flags)?; + let fd = file.as_raw_fd(); + + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + len as usize, + prot_flags, + libc::MAP_SHARED | libc::MAP_FIXED, + fd, + foffset as libc::off_t, + ) + }; + + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn do_removemapping( + &self, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + for req in requests { + let addr = host_shm_base + req.moffset; + if (req.moffset + req.len) > shm_size { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + debug!("removemapping: addr={:x} len={:?}", addr, req.len); + let ret = unsafe { + libc::mmap( + addr as *mut libc::c_void, + req.len as usize, + libc::PROT_NONE, + libc::MAP_ANONYMOUS | libc::MAP_PRIVATE | libc::MAP_FIXED, + -1, + 0_i64, + ) + }; + if ret == libc::MAP_FAILED { + return Err(io::Error::last_os_error()); + } + } + + Ok(()) + } + + fn do_ioctl( + &self, + inode: Inode, + handle: Handle, + cmd: u32, + out_size: u32, + ) -> io::Result> { + const VIRTIO_IOC_MAGIC: u8 = b'v'; + const VIRTIO_IOC_TYPE_EXPORT_FD: u8 = 1; + const VIRTIO_IOC_EXPORT_FD_SIZE: usize = 2 * mem::size_of::(); + const VIRTIO_IOC_EXPORT_FD_REQ: u32 = request_code_read!( + VIRTIO_IOC_MAGIC, + VIRTIO_IOC_TYPE_EXPORT_FD, + VIRTIO_IOC_EXPORT_FD_SIZE + ) as u32; + + match cmd { + VIRTIO_IOC_EXPORT_FD_REQ => { + if out_size as usize != VIRTIO_IOC_EXPORT_FD_SIZE { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + + let mut exports = self + .config + .export_table + .as_ref() + .ok_or(io::Error::from_raw_os_error(libc::EOPNOTSUPP))? + .lock() + .unwrap(); + + let handles = self.handles.read().unwrap(); + let data = handles + .get(&handle) + .filter(|hd| hd.inode == inode) + .ok_or_else(ebadf)?; + + data.exported.store(true, Ordering::Relaxed); + + let fd = data.file.read().unwrap().try_clone()?; + + exports.insert((self.config.export_fsid, handle), fd); + + let mut ret: Vec<_> = self.config.export_fsid.to_ne_bytes().into(); + ret.extend_from_slice(&handle.to_ne_bytes()); + Ok(ret) + } + _ => Err(io::Error::from_raw_os_error(libc::EOPNOTSUPP)), + } + } +} + +//-------------------------------------------------------------------------------------------------- +// Functions +//-------------------------------------------------------------------------------------------------- + +/// Returns a "bad file descriptor" error +fn ebadf() -> io::Error { + io::Error::from_raw_os_error(libc::EBADF) +} + +/// Returns an "invalid argument" error +fn einval() -> io::Error { + io::Error::from_raw_os_error(libc::EINVAL) +} + +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl FileSystem for OverlayFs { + type Inode = Inode; + type Handle = Handle; + + fn init(&self, capable: FsOptions) -> io::Result { + // Set the umask to 0 to ensure that all file permissions are set correctly + unsafe { libc::umask(0o000) }; + + // Enable readdirplus if supported + let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; + + // Enable writeback caching if requested and supported + if self.config.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { + opts |= FsOptions::WRITEBACK_CACHE; + self.writeback.store(true, Ordering::Relaxed); + } + + // Enable submounts if supported + if capable.contains(FsOptions::SUBMOUNTS) { + opts |= FsOptions::SUBMOUNTS; + self.announce_submounts.store(true, Ordering::Relaxed); + } + + Ok(opts) + } + + fn destroy(&self) { + // Clear all handles + self.handles.write().unwrap().clear(); + + // Clear all inodes + self.inodes.write().unwrap().clear(); + } + + fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result { + // Get the inode data + let data = self.get_inode_data(inode)?; + + // Call statvfs64 to get filesystem statistics + // Safe because this will only modify `out` and we check the return value. + let mut out = MaybeUninit::::zeroed(); + let res = unsafe { libc::fstatvfs64(data.file.as_raw_fd(), out.as_mut_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because statvfs64 initialized the struct + Ok(unsafe { out.assume_init() }) + } + + fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result { + Self::validate_name(name)?; + + #[cfg(not(feature = "efi"))] + let init_name = unsafe { CStr::from_bytes_with_nul_unchecked(INIT_CSTR) }; + + #[cfg(not(feature = "efi"))] + if self.init_inode != 0 && name == init_name { + let mut st: bindings::stat64 = unsafe { std::mem::zeroed() }; + st.st_size = INIT_BINARY.len() as i64; + st.st_ino = self.init_inode; + st.st_mode = 0o100_755; + + return Ok(Entry { + inode: self.init_inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + }); + } + + let (entry, _) = self.do_lookup(parent, name)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn forget(&self, _ctx: Context, inode: Inode, count: u64) { + self.do_forget(inode, count); + } + + fn opendir( + &self, + _ctx: Context, + inode: Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.do_open(inode, flags | (libc::O_DIRECTORY as u32)) + } + + fn releasedir( + &self, + _ctx: Context, + inode: Inode, + _flags: u32, + handle: Handle, + ) -> io::Result<()> { + self.do_release(inode, handle) + } + + fn mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mkdir(ctx, parent, name, mode, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { + self.do_unlink(parent, name, libc::AT_REMOVEDIR) + } + + fn readdir( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(filesystem::DirEntry<'_>) -> io::Result, + { + self.do_readdir(inode, size, offset, add_entry) + } + + fn readdirplus( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(filesystem::DirEntry<'_>, Entry) -> io::Result, + { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_readdir(inode, size, offset, |dir_entry| { + let (entry, _) = self.do_lookup(inode, &CString::new(dir_entry.name).unwrap())?; + add_entry(dir_entry, entry) + }) + } + + fn open( + &self, + _ctx: Context, + inode: Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + if inode == self.init_inode { + Ok((Some(self.init_handle), OpenOptions::empty())) + } else { + self.do_open(inode, flags) + } + } + + fn release( + &self, + _ctx: Context, + inode: Inode, + _flags: u32, + handle: Handle, + _flush: bool, + _flock_release: bool, + _lock_owner: Option, + ) -> io::Result<()> { + self.do_release(inode, handle) + } + + fn create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + Self::validate_name(name)?; + let (entry, handle, opts) = + self.do_create(ctx, parent, name, mode, flags, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok((entry, handle, opts)) + } + + fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> { + self.do_unlink(parent, name, 0) + } + + fn read( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + mut w: W, + size: u32, + offset: u64, + _lock_owner: Option, + _flags: u32, + ) -> io::Result { + #[cfg(not(feature = "efi"))] + if inode == self.init_inode { + return w.write(&INIT_BINARY[offset as usize..(offset + (size as u64)) as usize]); + } + + let data = self.get_inode_handle_data(inode, handle)?; + + let f = data.file.read().unwrap(); + w.write_from(&f, size as usize, offset) + } + + fn write( + &self, + ctx: Context, + inode: Inode, + handle: Handle, + mut r: R, + size: u32, + offset: u64, + _lock_owner: Option, + _delayed_write: bool, + kill_priv: bool, + _flags: u32, + ) -> io::Result { + if kill_priv { + // We need to change credentials during a write so that the kernel will remove setuid + // or setgid bits from the file if it was written to by someone other than the owner. + let (_uid, _gid) = self.set_scoped_credentials(ctx.uid, ctx.gid)?; + } + + let data = self.get_inode_handle_data(inode, handle)?; + let f = data.file.read().unwrap(); + r.read_to(&f, size as usize, offset) + } + + fn getattr( + &self, + _ctx: Context, + inode: Inode, + _handle: Option, + ) -> io::Result<(libc::stat64, Duration)> { + self.do_getattr(inode) + } + + fn setattr( + &self, + _ctx: Context, + inode: Inode, + attr: libc::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(libc::stat64, Duration)> { + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get the file identifier - either from handle or path + let file_id = if let Some(handle) = handle { + // Get the handle data + let handles = self.handles.read().unwrap(); + let handle_data = handles.get(&handle).ok_or_else(ebadf)?; + let file = handle_data.file.read().unwrap(); + FileId::Fd(file.as_raw_fd()) + } else { + let fd_str = Self::data_to_fd_str(&inode_data)?; + FileId::Path(fd_str) + }; + + // Handle mode changes + if valid.contains(SetattrValid::MODE) { + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + match file_id { + FileId::Fd(fd) => libc::fchmod(fd, attr.st_mode), + FileId::Path(ref p) => { + libc::fchmodat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), attr.st_mode, 0) + } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle ownership changes + if valid.intersects(SetattrValid::UID | SetattrValid::GID) { + let uid = if valid.contains(SetattrValid::UID) { + attr.st_uid + } else { + // Cannot use -1 here because these are unsigned values. + u32::MAX + }; + + let gid = if valid.contains(SetattrValid::GID) { + attr.st_gid + } else { + // Cannot use -1 here because these are unsigned values. + u32::MAX + }; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::fchownat( + inode_data.file.as_raw_fd(), + EMPTY_CSTR.as_ptr(), + uid, + gid, + libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW, + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle size changes + if valid.contains(SetattrValid::SIZE) { + // Safe because this doesn't modify any memory and we check the return value. + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::ftruncate(fd, attr.st_size) }, + _ => { + // There is no `ftruncateat` so we need to get a new fd and truncate it. + let f = self.open_inode(inode, libc::O_NONBLOCK | libc::O_RDWR)?; + unsafe { libc::ftruncate(f.as_raw_fd(), attr.st_size) } + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle timestamp changes + if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) { + let mut tvs = [ + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + ]; + + if valid.contains(SetattrValid::ATIME_NOW) { + tvs[0].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::ATIME) { + tvs[0].tv_sec = attr.st_atime; + tvs[0].tv_nsec = attr.st_atime_nsec; + } + + if valid.contains(SetattrValid::MTIME_NOW) { + tvs[1].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::MTIME) { + tvs[1].tv_sec = attr.st_mtime; + tvs[1].tv_nsec = attr.st_mtime_nsec; + } + + // Safe because this doesn't modify any memory and we check the return value + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::futimens(fd, tvs.as_ptr()) }, + FileId::Path(ref p) => unsafe { + libc::utimensat(self.proc_self_fd.as_raw_fd(), p.as_ptr(), tvs.as_ptr(), 0) + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Return the updated attributes and timeout + self.do_getattr(inode) + } + + fn rename( + &self, + _ctx: Context, + olddir: Inode, + oldname: &CStr, + newdir: Inode, + newname: &CStr, + flags: u32, + ) -> io::Result<()> { + Self::validate_name(oldname)?; + Self::validate_name(newname)?; + self.do_rename(olddir, oldname, newdir, newname, flags) + } + + fn mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mknod(ctx, parent, name, mode, rdev, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn link( + &self, + _ctx: Context, + inode: Inode, + newparent: Inode, + newname: &CStr, + ) -> io::Result { + Self::validate_name(newname)?; + let entry = self.do_link(inode, newparent, newname)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_symlink(ctx, linkname, parent, name, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result> { + self.do_readlink(inode) + } + + fn flush( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + _lock_owner: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + // Since this method is called whenever an fd is closed in the client, we can emulate that + // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe + // because this doesn't modify any memory and we check the return values. + unsafe { + let newfd = libc::dup(data.file.write().unwrap().as_raw_fd()); + if newfd < 0 { + return Err(io::Error::last_os_error()); + } + + if libc::close(newfd) < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + } + + fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return values. + let res = unsafe { + if datasync { + libc::fdatasync(fd) + } else { + libc::fsync(fd) + } + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Inode, + datasync: bool, + handle: Handle, + ) -> io::Result<()> { + self.fsync(ctx, inode, datasync, handle) + } + + fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> { + let inode_data = self.get_inode_data(inode)?; + let fd = inode_data.file.as_raw_fd(); + + let (st, _) = Self::statx(fd, None)?; + let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); + + if mode == libc::F_OK { + // The file exists since we were able to call `stat(2)` on it. + return Ok(()); + } + + if (mode & libc::R_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0) + && st.st_mode & 0o004 == 0 + { + return Err(io::Error::from_raw_os_error(libc::EACCES)); + } + + if (mode & libc::W_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0) + && st.st_mode & 0o002 == 0 + { + return Err(io::Error::from_raw_os_error(libc::EACCES)); + } + + // root can only execute something if it is executable by one of the owner, the group, or + // everyone. + if (mode & libc::X_OK) != 0 + && (ctx.uid != 0 || st.st_mode & 0o111 == 0) + && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0) + && st.st_mode & 0o001 == 0 + { + return Err(io::Error::from_raw_os_error(libc::EACCES)); + } + + Ok(()) + } + + fn setxattr( + &self, + _ctx: Context, + inode: Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + self.do_setxattr(inode, name, value, flags) + } + + fn getxattr( + &self, + _ctx: Context, + inode: Inode, + name: &CStr, + size: u32, + ) -> io::Result { + self.do_getxattr(inode, name, size) + } + + fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result { + self.do_listxattr(inode, size) + } + + fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> { + self.do_removexattr(inode, name) + } + + fn fallocate( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + self.do_fallocate(inode, handle, mode, offset, length) + } + + fn lseek( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + offset: u64, + whence: u32, + ) -> io::Result { + self.do_lseek(inode, handle, offset, whence) + } + + fn copyfilerange( + &self, + _ctx: Context, + inode_in: Inode, + handle_in: Handle, + offset_in: u64, + inode_out: Inode, + handle_out: Handle, + offset_out: u64, + len: u64, + flags: u64, + ) -> io::Result { + self.do_copyfilerange( + inode_in, handle_in, offset_in, inode_out, handle_out, offset_out, len, flags, + ) + } + + fn setupmapping( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + self.do_setupmapping(inode, foffset, len, flags, moffset, host_shm_base, shm_size) + } + + fn removemapping( + &self, + _ctx: Context, + requests: Vec, + host_shm_base: u64, + shm_size: u64, + ) -> io::Result<()> { + self.do_removemapping(requests, host_shm_base, shm_size) + } + + fn ioctl( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + _flags: u32, + cmd: u32, + _arg: u64, + _in_size: u32, + out_size: u32, + ) -> io::Result> { + self.do_ioctl(inode, handle, cmd, out_size) + } +} + +impl Drop for ScopedGid { + fn drop(&mut self) { + let res = unsafe { libc::syscall(libc::SYS_setresgid, -1, 0, -1) }; + if res != 0 { + log::error!( + "failed to restore gid back to root: {}", + io::Error::last_os_error() + ); + } + } +} + +impl Drop for ScopedUid { + fn drop(&mut self) { + let res = unsafe { libc::syscall(libc::SYS_setresuid, -1, 0, -1) }; + if res != 0 { + log::error!( + "failed to restore uid back to root: {}", + io::Error::last_os_error() + ); + } + } +} + +impl Default for Config { + fn default() -> Self { + Config { + entry_timeout: Duration::from_secs(5), + attr_timeout: Duration::from_secs(5), + cache_policy: Default::default(), + writeback: false, + root_dir: String::from("/"), + xattr: true, + proc_sfd_rawfd: None, + export_fsid: 0, + export_table: None, + layers: vec![], + } + } +} diff --git a/src/devices/src/virtio/fs/macos/mod.rs b/src/devices/src/virtio/fs/macos/mod.rs index b8edbc7f9..0dcdeab84 100644 --- a/src/devices/src/virtio/fs/macos/mod.rs +++ b/src/devices/src/virtio/fs/macos/mod.rs @@ -1,2 +1,3 @@ pub mod fs_utils; +pub mod overlayfs; pub mod passthrough; diff --git a/src/devices/src/virtio/fs/macos/overlayfs.rs b/src/devices/src/virtio/fs/macos/overlayfs.rs new file mode 100644 index 000000000..ba8f11489 --- /dev/null +++ b/src/devices/src/virtio/fs/macos/overlayfs.rs @@ -0,0 +1,3366 @@ +use std::collections::{btree_map, BTreeMap, HashMap, HashSet}; +use std::ffi::{CStr, CString}; +use std::fs::File; +use std::io; +use std::mem::MaybeUninit; +use std::os::unix::ffi::OsStrExt; +use std::os::unix::fs::MetadataExt; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::path::PathBuf; +use std::ptr::null_mut; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::{Arc, Mutex, RwLock}; +use std::time::Duration; + +use crossbeam_channel::{unbounded, Sender}; +use hvf::MemoryMapping; +use intaglio::cstr::SymbolTable; +use intaglio::Symbol; + +use crate::virtio::bindings; +use crate::virtio::fs::filesystem::{ + Context, DirEntry, Entry, ExportTable, Extensions, FileSystem, FsOptions, GetxattrReply, + ListxattrReply, OpenOptions, SecContext, SetattrValid, ZeroCopyReader, ZeroCopyWriter, +}; +use crate::virtio::fs::fuse; +use crate::virtio::fs::multikey::MultikeyBTreeMap; +use crate::virtio::linux_errno::{linux_error, LINUX_ERANGE}; + + +//-------------------------------------------------------------------------------------------------- +// Modules +//-------------------------------------------------------------------------------------------------- + +#[path = "../tests/overlayfs/mod.rs"] +mod tests; + +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +/// The prefix for whiteout files +const WHITEOUT_PREFIX: &str = ".wh."; + +/// The marker for opaque directories +const OPAQUE_MARKER: &str = ".wh..wh..opq"; + +/// The volume directory +const VOL_DIR: &str = ".vol"; + +/// The owner and permissions attribute +const OWNER_PERMS_XATTR_KEY: &[u8] = b"user.vm.owner_perms\0"; + +/// Maximum allowed number of layers for the overlay filesystem. +const MAX_LAYERS: usize = 128; + +#[cfg(not(feature = "efi"))] +static INIT_BINARY: &[u8] = include_bytes!("../../../../../../init/init"); + +const INIT_CSTR: &[u8] = b"init.krun\0"; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// Type alias for inode identifiers +type Inode = u64; + +/// Type alias for file handle identifiers +type Handle = u64; + +/// Alternative key for looking up inodes by device and inode number +#[derive(Debug, Clone, Copy, PartialOrd, Ord, PartialEq, Eq)] +struct InodeAltKey { + /// The inode number from the host filesystem + ino: u64, + + /// The device ID from the host filesystem + dev: i32, +} + +/// Data associated with an inode +#[derive(Debug)] +pub(crate) struct InodeData { + /// The inode number in the overlay filesystem + pub(crate) inode: Inode, + + /// The inode number from the host filesystem + pub(crate) ino: u64, + + /// The device ID from the host filesystem + pub(crate) dev: i32, + + /// Reference count for this inode from the perspective of [`FileSystem::lookup`] + pub(crate) refcount: AtomicU64, + + /// Path to inode + pub(crate) path: Vec, + + /// The layer index this inode belongs to + pub(crate) layer_idx: usize, +} + +/// The caching policy that the file system should report to the FUSE client. By default the FUSE +/// protocol uses close-to-open consistency. This means that any cached contents of the file are +/// invalidated the next time that file is opened. +#[derive(Debug, Default, Clone)] +pub enum CachePolicy { + /// The client should never cache file data and all I/O should be directly forwarded to the + /// server. This policy must be selected when file contents may change without the knowledge of + /// the FUSE client (i.e., the file system does not have exclusive access to the directory). + Never, + + /// The client is free to choose when and how to cache file data. This is the default policy and + /// uses close-to-open consistency as described in the enum documentation. + #[default] + Auto, + + /// The client should always cache file data. This means that the FUSE client will not + /// invalidate any cached data that was returned by the file system the last time the file was + /// opened. This policy should only be selected when the file system has exclusive access to the + /// directory. + Always, +} + +/// Data associated with an open file handle +#[derive(Debug)] +pub(crate) struct HandleData { + /// The inode this handle refers to + pub(crate) inode: Inode, + + /// The underlying file object + pub(crate) file: RwLock, +} + +/// Represents either a file descriptor or a path +#[derive(Clone)] +enum FileId { + /// A file descriptor + Fd(RawFd), + + /// A path + Path(CString), +} + +/// Configuration for the overlay filesystem +#[derive(Debug, Clone)] +pub struct Config { + /// How long the FUSE client should consider directory entries to be valid. + /// If the contents of a directory can only be modified by the FUSE client, + /// this should be a large value. + pub entry_timeout: Duration, + + /// How long the FUSE client should consider file and directory attributes to be valid. + /// If the attributes of a file or directory can only be modified by the FUSE client, + /// this should be a large value. + /// + /// The default value is 5 seconds. + pub attr_timeout: Duration, + + /// The caching policy the file system should use. + pub cache_policy: CachePolicy, + + /// Whether writeback caching is enabled. + /// This can improve performance but increases the risk of data corruption if file + /// contents can change without the knowledge of the FUSE client. + pub writeback: bool, + + /// Whether the filesystem should support Extended Attributes (xattr). + /// Enabling this feature may have a significant impact on performance. + pub xattr: bool, + + /// Optional file descriptor for /proc/self/fd. + /// Callers can obtain a file descriptor and pass it here, so there's no need to open it in + /// OverlayFs::new(). This is specially useful for sandboxing. + /// + /// The default is `None`. + pub proc_sfd_rawfd: Option, + + /// ID of this filesystem to uniquely identify exports. + pub export_fsid: u64, + + /// Table of exported FDs to share with other subsystems. + pub export_table: Option, + + /// Layers to be used for the overlay filesystem + pub layers: Vec, +} + +/// An overlay filesystem implementation that combines multiple layers into a single logical filesystem. +/// +/// This implementation follows standard overlay filesystem concepts, similar to Linux's OverlayFS, +/// while using OCI image specification's layer filesystem changeset format for whiteouts: +/// +/// - Uses OCI-style whiteout files (`.wh.` prefixed files) to mark deleted files in upper layers +/// - Uses OCI-style opaque directory markers (`.wh..wh..opq`) to mask lower layer directories +/// +/// ## Layer Structure +/// +/// The overlay filesystem consists of: +/// - A single top layer (upperdir) that is writable +/// - Zero or more lower layers that are read-only +/// +/// ## Layer Ordering +/// +/// When creating an overlay filesystem, layers are provided in order from lowest to highest: +/// The last layer in the provided sequence becomes the top layer (upperdir), while +/// the others become read-only lower layers. This matches the OCI specification where: +/// - The top layer (upperdir) handles all modifications +/// - Lower layers provide the base content +/// - Changes in the top layer shadow content in lower layers +/// +/// ## Layer Behavior +/// +/// - All write operations occur in the top layer +/// - When reading, the top layer takes precedence over lower layers +/// - Whiteout files in the top layer hide files from lower layers +/// - Opaque directory markers completely mask lower layer directory contents +/// - It is undefined behavior for whiteouts and their corresponding entries to exist at the same level in the same directory. +/// For example, looking up such entry can result in different behavior depending on which is found first. +/// The filesystem will try to prevent adding whiteout entries directly. +/// +/// TODO: Need to implement entry caching to improve the performance of [`Self::lookup_segment_by_segment`]. +pub struct OverlayFs { + /// Map of inodes by ID and alternative keys + inodes: RwLock>>, + + /// Counter for generating the next inode ID + next_inode: AtomicU64, + + /// The `init.krun` inode ID + init_inode: u64, + + /// Map of open file handles by ID + handles: RwLock>>, + + /// Counter for generating the next handle ID + next_handle: AtomicU64, + + /// The `init.krun` handle ID + init_handle: u64, + + /// Map of memory-mapped windows + map_windows: Mutex>, + + /// Whether writeback caching is enabled + writeback: AtomicBool, + + /// Whether submounts are supported + announce_submounts: AtomicBool, + + /// Configuration options + config: Config, + + /// Symbol table for interned filenames + filenames: Arc>, + + /// Root inodes for each layer, ordered from bottom to top + layer_roots: Arc>>, +} + +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- + +impl InodeAltKey { + fn new(ino: u64, dev: i32) -> Self { + Self { ino, dev } + } +} + +impl OverlayFs { + /// Creates a new OverlayFs with the given layers + pub fn new(config: Config) -> io::Result { + if config.layers.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "at least one layer must be provided", + )); + } + + if config.layers.len() > MAX_LAYERS { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "maximum overlayfs layer count exceeded", + )); + } + + let mut next_inode = 1; + let mut inodes = MultikeyBTreeMap::new(); + + // Initialize the root inodes for all layers + let layer_roots = Self::init_root_inodes(&config.layers, &mut inodes, &mut next_inode)?; + + // Set the `init.krun` inode + let init_inode = next_inode; + next_inode += 1; + + Ok(OverlayFs { + inodes: RwLock::new(inodes), + next_inode: AtomicU64::new(next_inode), + init_inode, + handles: RwLock::new(BTreeMap::new()), + next_handle: AtomicU64::new(1), + init_handle: 0, + map_windows: Mutex::new(HashMap::new()), + writeback: AtomicBool::new(false), + announce_submounts: AtomicBool::new(false), + config, + filenames: Arc::new(RwLock::new(SymbolTable::new())), + layer_roots: Arc::new(RwLock::new(layer_roots)), + }) + } + + /// Initialize root inodes for all layers + /// + /// This function processes layers from top to bottom, creating root inodes for each layer. + /// + /// Parameters: + /// - layers: Slice of paths to the layer roots, ordered from bottom to top + /// - inodes: Mutable reference to the inodes map to populate + /// - next_inode: Mutable reference to the next inode counter + /// + /// Returns: + /// - io::Result> containing the root inodes for each layer + fn init_root_inodes( + layers: &[PathBuf], + inodes: &mut MultikeyBTreeMap>, + next_inode: &mut u64, + ) -> io::Result> { + // Pre-allocate layer_roots with the right size + let mut layer_roots = vec![0; layers.len()]; + + // Process layers from top to bottom + for (i, layer_path) in layers.iter().enumerate().rev() { + let layer_idx = i; // Layer index from bottom to top + + // Get the stat information for this layer's root + let c_path = CString::new(layer_path.to_string_lossy().as_bytes())?; + let st = Self::unpatched_stat(&FileId::Path(c_path))?; + + // Create the alt key for this inode + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev as i32); + + // Create the inode data + let inode_id = *next_inode; + *next_inode += 1; + + let inode_data = Arc::new(InodeData { + inode: inode_id, + ino: st.st_ino, + dev: st.st_dev as i32, + refcount: AtomicU64::new(1), + path: vec![], + layer_idx, + }); + + // Insert the inode into the map + inodes.insert(inode_id, alt_key, inode_data); + + // Store the root inode for this layer + layer_roots[layer_idx] = inode_id; + } + + Ok(layer_roots) + } + + pub fn get_config(&self) -> &Config { + &self.config + } + + pub fn get_filenames(&self) -> &Arc> { + &self.filenames + } + + fn get_layer_root(&self, layer_idx: usize) -> io::Result> { + let layer_roots = self.layer_roots.read().unwrap(); + + // Check if the layer index is valid + if layer_idx >= layer_roots.len() { + return Err(io::Error::new( + io::ErrorKind::NotFound, + "layer index out of bounds", + )); + } + + // Get the inode for this layer + let inode = layer_roots[layer_idx]; + if inode == 0 { + return Err(io::Error::new(io::ErrorKind::NotFound, "layer not found")); + } + + // Get the inode data + self.get_inode_data(inode) + } + + /// Creates a new inode and adds it to the inode map + fn create_inode( + &self, + ino: u64, + dev: i32, + path: Vec, + layer_idx: usize, + ) -> (Inode, Arc) { + let inode = self.next_inode.fetch_add(1, Ordering::SeqCst); + + let data = Arc::new(InodeData { + inode, + ino, + dev, + refcount: AtomicU64::new(1), + path, + layer_idx, + }); + + let alt_key = InodeAltKey::new(ino, dev); + self.inodes + .write() + .unwrap() + .insert(inode, alt_key, data.clone()); + + (inode, data) + } + + /// Gets the InodeData for an inode + pub(super) fn get_inode_data(&self, inode: Inode) -> io::Result> { + self.inodes + .read() + .unwrap() + .get(&inode) + .cloned() + .ok_or_else(ebadf) + } + + /// Gets the HandleData for a handle + pub(super) fn get_inode_handle_data( + &self, + inode: Inode, + handle: Handle, + ) -> io::Result> { + self.handles + .read() + .unwrap() + .get(&handle) + .filter(|hd| hd.inode == inode) + .cloned() + .ok_or_else(ebadf) + } + + fn get_top_layer_idx(&self) -> usize { + self.layer_roots.read().unwrap().len() - 1 + } + + fn bump_refcount(&self, inode: Inode) { + let inodes = self.inodes.write().unwrap(); + let inode_data = inodes.get(&inode).unwrap(); + inode_data.refcount.fetch_add(1, Ordering::SeqCst); + } + + fn set_secctx(file: &FileId, secctx: SecContext, symlink: bool) -> io::Result<()> { + let options = if symlink { libc::XATTR_NOFOLLOW } else { 0 }; + let ret = match file { + FileId::Path(path) => unsafe { + libc::setxattr( + path.as_ptr(), + secctx.name.as_ptr(), + secctx.secctx.as_ptr() as *const libc::c_void, + secctx.secctx.len(), + 0, + options, + ) + }, + FileId::Fd(fd) => unsafe { + libc::fsetxattr( + *fd, + secctx.name.as_ptr(), + secctx.secctx.as_ptr() as *const libc::c_void, + secctx.secctx.len(), + 0, + options, + ) + }, + }; + + if ret != 0 { + Err(io::Error::last_os_error()) + } else { + Ok(()) + } + } + + /// Converts a dev/ino pair to a volume path + fn dev_ino_to_vol_path(&self, dev: i32, ino: u64) -> io::Result { + let path = format!("/{}/{}/{}", VOL_DIR, dev, ino); + CString::new(path).map_err(|_| einval()) + } + + /// Converts a dev/ino pair and name to a volume path + fn dev_ino_and_name_to_vol_path(&self, dev: i32, ino: u64, name: &CStr) -> io::Result { + let path = format!("/{}/{}/{}/{}", VOL_DIR, dev, ino, name.to_string_lossy()); + CString::new(path).map_err(|_| einval()) + } + + fn dev_ino_and_name_to_vol_whiteout_path( + &self, + dev: i32, + ino: u64, + name: &CStr, + ) -> io::Result { + // Create whiteout file (.wh.) in parent directory + let whiteout_name = format!( + "{}{}", + WHITEOUT_PREFIX, + name.to_str().map_err(|_| einval())? + ); + + let whiteout_cstr = CString::new(whiteout_name).map_err(|_| einval())?; + + // Get full path for whiteout file + self.dev_ino_and_name_to_vol_path(dev, ino, &whiteout_cstr) + } + + /// Converts an inode number to a volume path + fn inode_number_to_vol_path(&self, inode: Inode) -> io::Result { + let data = self.get_inode_data(inode)?; + self.dev_ino_to_vol_path(data.dev, data.ino) + } + + /// Turns an inode into an opened file. + fn open_inode(&self, inode: Inode, mut flags: i32) -> io::Result { + // When writeback caching is enabled, the kernel may send read requests even if the + // userspace program opened the file write-only. So we need to ensure that we have opened + // the file for reading as well as writing. + let writeback = self.writeback.load(Ordering::Relaxed); + if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY { + flags &= !libc::O_ACCMODE; + flags |= libc::O_RDWR; + } + + // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`. + // However, this breaks atomicity as the file may have changed on disk, invalidating the + // cached copy of the data in the kernel and the offset that the kernel thinks is the end of + // the file. Just allow this for now as it is the user's responsibility to enable writeback + // caching only for directories that are not shared. It also means that we need to clear the + // `O_APPEND` flag. + if writeback && flags & libc::O_APPEND != 0 { + flags &= !libc::O_APPEND; + } + + let c_path = self.inode_number_to_vol_path(inode)?; + + let fd = unsafe { + libc::open( + c_path.as_ptr(), + (flags | libc::O_CLOEXEC) & (!libc::O_NOFOLLOW) & (!libc::O_EXLOCK), + ) + }; + + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Safe because we just opened this fd. + Ok(unsafe { File::from_raw_fd(fd) }) + } + + /// Parses open flags + fn parse_open_flags(&self, flags: i32) -> i32 { + let mut mflags: i32 = flags & 0b11; + + if (flags & bindings::LINUX_O_NONBLOCK) != 0 { + mflags |= libc::O_NONBLOCK; + } + if (flags & bindings::LINUX_O_APPEND) != 0 { + mflags |= libc::O_APPEND; + } + if (flags & bindings::LINUX_O_CREAT) != 0 { + mflags |= libc::O_CREAT; + } + if (flags & bindings::LINUX_O_TRUNC) != 0 { + mflags |= libc::O_TRUNC; + } + if (flags & bindings::LINUX_O_EXCL) != 0 { + mflags |= libc::O_EXCL; + } + if (flags & bindings::LINUX_O_NOFOLLOW) != 0 { + mflags |= libc::O_NOFOLLOW; + } + if (flags & bindings::LINUX_O_CLOEXEC) != 0 { + mflags |= libc::O_CLOEXEC; + } + + mflags + } + + /// Creates an Entry from stat information and inode data + fn create_entry(&self, inode: Inode, st: bindings::stat64) -> Entry { + Entry { + inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + } + } + + /// Checks for whiteout file in top layer + fn check_whiteout(&self, parent_path: &CStr, name: &CStr) -> io::Result { + let parent_str = parent_path.to_str().map_err(|_| einval())?; + let name_str = name.to_str().map_err(|_| einval())?; + + let whiteout_path = format!("{}/{}{}", parent_str, WHITEOUT_PREFIX, name_str); + let whiteout_cpath = CString::new(whiteout_path).map_err(|_| einval())?; + + match Self::unpatched_stat(&FileId::Path(whiteout_cpath)) { + Ok(_) => Ok(true), + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(e), + } + } + + /// Interns a name and returns the corresponding Symbol + fn intern_name(&self, name: &CStr) -> io::Result { + // Clone the name to avoid lifetime issues + let name_to_intern = CString::new(name.to_bytes()).map_err(|_| einval())?; + + // Get a write lock to intern it + let mut filenames = self.filenames.write().unwrap(); + filenames.intern(name_to_intern).map_err(|e| { + io::Error::new( + io::ErrorKind::Other, + format!("Failed to intern filename: {}", e), + ) + }) + } + + /// Checks for an opaque directory marker in the given parent directory path. + fn check_opaque_marker(&self, parent_path: &CStr) -> io::Result { + let parent_str = parent_path.to_str().map_err(|_| einval())?; + let opaque_path = format!("{}/{}", parent_str, OPAQUE_MARKER); + let opaque_cpath = CString::new(opaque_path).map_err(|_| einval())?; + match Self::unpatched_stat(&FileId::Path(opaque_cpath)) { + Ok(_) => Ok(true), + Err(e) if e.kind() == io::ErrorKind::NotFound => Ok(false), + Err(e) => Err(e), + } + } + + /// Validates a name to prevent path traversal attacks and special overlay markers + /// + /// This function checks if a name contains: + /// - Path traversal sequences like ".." + /// - Other potentially dangerous patterns like slashes + /// - Whiteout markers (.wh. prefix) + /// - Opaque directory markers (.wh..wh..opq) + /// + /// Returns: + /// - Ok(()) if the name is safe + /// - Err(io::Error) if the name contains invalid patterns + fn validate_name(name: &CStr) -> io::Result<()> { + let name_bytes = name.to_bytes(); + + // Check for empty name + if name_bytes.is_empty() { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "empty name is not allowed", + )); + } + + // Check for path traversal sequences + if name_bytes == b".." || name_bytes.contains(&b'/') || name_bytes.contains(&b'\\') { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + "path traversal attempt detected", + )); + } + + // Check for null bytes + if name_bytes.contains(&0) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains null bytes", + )); + } + + // Convert to str for string pattern matching + let name_str = match std::str::from_utf8(name_bytes) { + Ok(s) => s, + Err(_) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name contains invalid UTF-8", + )) + } + }; + + // Check for whiteout prefix + if name_str.starts_with(".wh.") { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot start with whiteout prefix", + )); + } + + // Check for opaque marker + if name_str == ".wh..wh..opq" { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "name cannot be an opaque directory marker", + )); + } + + Ok(()) + } + + /// Looks up a path segment by segment in a given layer + /// + /// This function traverses a path one segment at a time within a specific layer, + /// handling whiteouts and opaque markers along the way. + /// + /// ### Arguments + /// * `layer_root` - Root inode data for the layer being searched + /// * `path_segments` - Path components to traverse, as interned symbols + /// * `path_inodes` - Vector to store inode data for each path segment traversed + /// + /// # Return Value + /// Returns `Option>` where: + /// - `Some(Ok(stat))` - Successfully found the file/directory and retrieved its stats + /// - `Some(Err(e))` - Encountered an error during lookup that should be propagated: + /// - If error is `NotFound`, caller should try next layer + /// - For any other IO error, caller should stop searching entirely + /// - `None` - Stop searching lower layers because either: + /// - Found a whiteout file for this path (file was deleted in this layer) + /// - Found an opaque directory marker (directory contents are masked in this layer) + /// + /// # Example Return Flow + /// 1. If path exists: `Some(Ok(stat))` + /// 2. If path has whiteout: `None` + /// 3. If path not found: `Some(Err(NotFound))` + /// 4. If directory has opaque marker: `None` + /// 5. If IO error occurs: `Some(Err(io_error))` + /// + /// # Side Effects + /// - Creates inodes for each path segment if they don't already exist + /// - Updates path_inodes with inode data for each segment traversed + /// - Increments reference counts for existing inodes that are reused + /// + /// # Path Resolution + /// For a path like "foo/bar/baz", the function: + /// 1. Starts at layer_root + /// 2. Looks up "foo", checking for whiteouts/opaque markers + /// 3. If "foo" exists, creates/reuses its inode and adds to path_inodes + /// 4. Repeats for "bar" and "baz" + /// 5. Returns stats for "baz" if found + fn lookup_segment_by_segment( + &self, + layer_root: &Arc, + path_segments: &[Symbol], + path_inodes: &mut Vec>, + ) -> Option> { + let mut current_stat; + let mut parent_dev = layer_root.dev; + let mut parent_ino = layer_root.ino; + let mut opaque_marker_found = false; + + // Start from layer root + let root_vol_path = match self.dev_ino_to_vol_path(parent_dev, parent_ino) { + Ok(path) => path, + Err(e) => return Some(Err(e)), + }; + + current_stat = match Self::patched_stat(&FileId::Path(root_vol_path)) { + Ok(stat) => stat, + Err(e) => return Some(Err(e)), + }; + + // Traverse each path segment + for (depth, segment) in path_segments.iter().enumerate() { + // Get the current segment name and parent vol path + let filenames = self.filenames.read().unwrap(); + let segment_name = filenames.get(*segment).unwrap(); + let parent_vol_path = match self.dev_ino_to_vol_path(parent_dev, parent_ino) { + Ok(path) => path, + Err(e) => return Some(Err(e)), + }; + + // Check for whiteout at current level + match self.check_whiteout(&parent_vol_path, segment_name) { + Ok(true) => return None, // Found whiteout, stop searching + Ok(false) => (), // No whiteout, continue + Err(e) => return Some(Err(e)), + } + + // Check for opaque marker at current level + match self.check_opaque_marker(&parent_vol_path) { + Ok(true) => { + opaque_marker_found = true; + } + Ok(false) => (), + Err(e) => return Some(Err(e)), + } + + // Try to stat the current segment using parent dev/ino + let current_vol_path = + match self.dev_ino_and_name_to_vol_path(parent_dev, parent_ino, segment_name) { + Ok(path) => path, + Err(e) => return Some(Err(e)), + }; + + drop(filenames); // Now safe to drop filenames lock + + match Self::patched_stat(&FileId::Path(current_vol_path)) { + Ok(st) => { + // Update parent dev/ino for next iteration + parent_dev = st.st_dev as i32; + parent_ino = st.st_ino; + current_stat = st; + + // Create or get inode for this path segment + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev as i32); + let inode_data = { + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + data.clone() + } else { + drop(inodes); // Drop read lock before write lock + + let mut path = path_inodes[depth].path.clone(); + path.push(*segment); + + let (_, data) = self.create_inode( + st.st_ino, + st.st_dev as i32, + path, + layer_root.layer_idx, + ); + + data + } + }; + + // Update path_inodes with the current segment's inode data + if (depth + 1) >= path_inodes.len() { + // Haven't seen this depth before, append + path_inodes.push(inode_data); + } + } + Err(e) if e.kind() == io::ErrorKind::NotFound && opaque_marker_found => { + // For example, for a lookup of /foo/bar/baz, where /foo/bar has an opaque marker, + // then if we cannot find /foo/bar/baz in the current layer, we cannot find it + // in any other layer as /foo/bar is masked. + return None; + } + Err(e) => return Some(Err(e)), + } + } + + Some(Ok(current_stat)) + } + + /// Looks up a file or directory entry across multiple filesystem layers. + /// + /// This function starts from the specified upper layer (given by start_layer_idx) and searches downwards + /// through the layers to locate the file represented by the provided path segments (an interned path). + /// At each layer, it calls lookup_segment_by_segment to traverse the path step by step while handling + /// whiteout files and opaque directory markers. If an entry is found in a layer, the function returns + /// an Entry structure containing the file metadata along with a vector of InodeData for each path segment traversed. + /// + /// ## Arguments + /// + /// * `start_layer_idx` - The index of the starting layer (from the topmost, which may be the writable layer). + /// * `path_segments` - A slice of interned symbols representing the path components to traverse. + /// + /// ## Returns + /// + /// On success, returns a tuple containing: + /// - An Entry representing the located file or directory along with its attributes. + /// - A vector of Arc corresponding to the inodes for each traversed path segment. + /// + /// ## Errors + /// + /// Returns an io::Error if: + /// - The file is not found in any layer (ENOENT), or + /// - An error occurs during the lookup process in one of the layers. + fn lookup_layer_by_layer<'a>( + &'a self, + start_layer_idx: usize, + path_segments: &[Symbol], + ) -> io::Result<(Entry, Arc, Vec>)> { + let mut path_inodes = vec![]; + + // Start from the start_layer_idx and try each layer down to layer 0 + for layer_idx in (0..=start_layer_idx).rev() { + let layer_root = self.get_layer_root(layer_idx)?; + + // If path_inodes has only the root inode or is empty, we need to restart the lookup with the new layer root. + if path_inodes.len() < 2 { + path_inodes = vec![layer_root.clone()]; + } + + match self.lookup_segment_by_segment(&layer_root, &path_segments, &mut path_inodes) { + Some(Ok(st)) => { + let alt_key = InodeAltKey::new(st.st_ino, st.st_dev as i32); + + // Check if we already have this inode + let inodes = self.inodes.read().unwrap(); + if let Some(data) = inodes.get_alt(&alt_key) { + return Ok((self.create_entry(data.inode, st), data.clone(), path_inodes)); + } + + drop(inodes); + + // Create new inode + let (inode, data) = self.create_inode( + st.st_ino, + st.st_dev as i32, + path_segments.to_vec(), + layer_idx, + ); + path_inodes.push(data.clone()); + + return Ok((self.create_entry(inode, st), data, path_inodes)); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + // Continue to check lower layers + continue; + } + Some(Err(e)) => { + return Err(e); + } + None => { + // Hit a whiteout or opaque marker, stop searching lower layers + return Err(io::Error::from_raw_os_error(libc::ENOENT)); + } + } + } + + // Not found in any layer + Err(io::Error::from_raw_os_error(libc::ENOENT)) + } + + /// Performs a lookup operation + pub(crate) fn do_lookup( + &self, + parent: Inode, + name: &CStr, + ) -> io::Result<(Entry, Vec>)> { + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Create path segments for lookup by appending the new name + let mut path_segments = parent_data.path.clone(); + let symbol = self.intern_name(name)?; + path_segments.push(symbol); + + let (mut entry, child_data, path_inodes) = self.lookup_layer_by_layer(parent_data.layer_idx, &path_segments)?; + + // Set the submount flag if the entry is a directory and the submounts are announced + let mut attr_flags = 0; + if (entry.attr.st_mode & libc::S_IFMT) == libc::S_IFDIR + && self.announce_submounts.load(Ordering::Relaxed) + && child_data.dev != parent_data.dev + { + attr_flags |= fuse::ATTR_SUBMOUNT; + } + + entry.attr_flags = attr_flags; + + Ok((entry, path_inodes)) + } + + /// Performs a raw stat syscall without any modifications to the returned stat structure. + /// + /// This function directly calls the OS's stat syscall and returns the raw stat information + /// exactly as provided by the filesystem. It does not apply any overlayfs-specific + /// modifications like owner/permission overrides from extended attributes. + /// + /// ## Arguments + /// * `file` - A FileId containing either a path or file descriptor to stat + /// + /// ## Returns + /// * `io::Result` - The raw stat information from the filesystem + /// + /// ## Safety + /// This function performs raw syscalls but handles all unsafe operations internally. + fn unpatched_stat(file: &FileId) -> io::Result { + let mut st = MaybeUninit::::zeroed(); + + let ret = unsafe { + match file { + FileId::Path(path) => { + libc::lstat(path.as_ptr(), st.as_mut_ptr() as *mut libc::stat) + } + FileId::Fd(fd) => libc::fstat(*fd, st.as_mut_ptr() as *mut libc::stat), + } + }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(unsafe { st.assume_init() }) + } + + /// Performs a stat syscall and patches the returned stat structure with overlayfs metadata. + /// + /// This function extends unpatched_stat by applying overlayfs-specific modifications: + /// 1. Gets the raw stat information using unpatched_stat + /// 2. Reads extended attributes storing overlayfs owner/permission overrides + /// 3. Updates the stat structure with any owner (uid/gid) overrides found + /// 4. Updates the permission bits with any mode overrides found + /// + /// This provides the overlayfs view of file metadata, where file ownership and permissions + /// can be modified independently of the underlying filesystem. + /// + /// ## Arguments + /// * `file` - A FileId containing either a path or file descriptor to stat + /// + /// ## Returns + /// * `io::Result` - The stat information with overlayfs patches applied + /// + /// ## Safety + /// This function performs raw syscalls but handles all unsafe operations internally. + fn patched_stat(file: &FileId) -> io::Result { + let mut stat = Self::unpatched_stat(file)?; + + // Get owner and permissions from xattr + if let Ok(Some((uid, gid, mode))) = Self::get_owner_perms_attr(file, &stat) { + // Update the stat with the xattr values if available + stat.st_uid = uid; + stat.st_gid = gid; + // Make sure we only modify the permission bits (lower 12 bits) + stat.st_mode = (stat.st_mode & !0o7777u16) | mode; + } + + Ok(stat) + } + + fn get_owner_perms_attr( + file: &FileId, + st: &bindings::stat64, + ) -> io::Result> { + // Try to get the owner and permissions from xattr + let mut buf: Vec = vec![0; 32]; + + // Get options based on file type + let options = if (st.st_mode & libc::S_IFMT) == libc::S_IFLNK { + libc::XATTR_NOFOLLOW + } else { + 0 + }; + + // Helper function to convert byte slice to u32 value + fn item_to_value(item: &[u8], radix: u32) -> Option { + match std::str::from_utf8(item) { + Ok(val) => match u32::from_str_radix(val, radix) { + Ok(i) => Some(i), + Err(_) => None, + }, + Err(_) => None, + } + } + + // Get the xattr + let res = match file { + FileId::Path(path) => unsafe { + libc::getxattr( + path.as_ptr(), + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + 0, + options, + ) + }, + FileId::Fd(fd) => unsafe { + libc::fgetxattr( + *fd, + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + 0, + options, + ) + }, + }; + + if res < 0 { + let err = io::Error::last_os_error(); + if err.raw_os_error() == Some(libc::ENOATTR) { + return Ok(None); + } + return Err(err); + } + + let len = res as usize; + buf.truncate(len); + + // Parse the xattr value + let parts: Vec<&[u8]> = buf.split(|&b| b == b':').collect(); + if parts.len() != 3 { + return Ok(None); + } + + let uid = item_to_value(parts[0], 10).unwrap_or(st.st_uid); + let gid = item_to_value(parts[1], 10).unwrap_or(st.st_gid); + let mode = item_to_value(parts[2], 8).unwrap_or(st.st_mode as u32) as u16; + + Ok(Some((uid, gid, mode))) + } + + fn set_owner_perms_attr( + file: &FileId, + st: &bindings::stat64, + owner: Option<(u32, u32)>, + mode: Option, + ) -> io::Result<()> { + // Get the current values to use as defaults + let (uid, gid) = if let Some((uid, gid)) = owner { + (uid, gid) + } else { + (st.st_uid, st.st_gid) + }; + + let mode = mode.unwrap_or(st.st_mode); + + // Format the xattr value + let value = format!("{}:{}:{:o}", uid, gid, mode & 0o7777); + let value_bytes = value.as_bytes(); + + // Get options based on file type + let options = if (st.st_mode & libc::S_IFMT) == libc::S_IFLNK { + libc::XATTR_NOFOLLOW + } else { + 0 + }; + + // Set the xattr + let res = match file { + FileId::Path(path) => unsafe { + libc::setxattr( + path.as_ptr(), + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + value_bytes.as_ptr() as *const libc::c_void, + value_bytes.len(), + 0, + options, + ) + }, + FileId::Fd(fd) => unsafe { + libc::fsetxattr( + *fd, + OWNER_PERMS_XATTR_KEY.as_ptr() as *const i8, + value_bytes.as_ptr() as *const libc::c_void, + value_bytes.len(), + 0, + options, + ) + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + Ok(()) + } + + /// Copies up a file or directory from a lower layer to the top layer + pub(crate) fn copy_up(&self, path_inodes: &[Arc]) -> io::Result<()> { + // Get the top layer root + let top_layer_idx = self.get_top_layer_idx(); + let top_layer_root = self.get_layer_root(top_layer_idx)?; + + // Start from root and copy up each segment that's not in the top layer + let mut parent_dev = top_layer_root.dev; + let mut parent_ino = top_layer_root.ino; + + // Skip the root inode + for inode_data in path_inodes.iter().skip(1) { + // Skip if this segment is already in the top layer + if inode_data.layer_idx == top_layer_idx { + parent_dev = inode_data.dev; + parent_ino = inode_data.ino; + continue; + } + + // Get the current segment name + let segment_name = { + let name = inode_data.path.last().unwrap(); + let filenames = self.filenames.read().unwrap(); + filenames.get(*name).unwrap().to_owned() + }; + + // Get source and destination paths + let src_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + let dst_path = + self.dev_ino_and_name_to_vol_path(parent_dev, parent_ino, &segment_name)?; + + // Get source file/directory stats + let src_stat = Self::patched_stat(&FileId::Path(src_path.clone()))?; + let file_type = src_stat.st_mode & libc::S_IFMT; + + // Copy up the file/directory + match file_type { + libc::S_IFREG => { + // Regular file: use clonefile for COW semantics if available + // Use clonefile for COW semantics + let result = unsafe { clonefile(src_path.as_ptr(), dst_path.as_ptr(), 0) }; + + if result < 0 { + let err = io::Error::last_os_error(); + // If clonefile fails (e.g., across filesystems), fall back to regular copy + if err.raw_os_error() == Some(libc::EXDEV) + || err.raw_os_error() == Some(libc::ENOTSUP) + { + // Fall back to regular copy + self.copy_file_contents( + &src_path, + &dst_path, + (src_stat.st_mode & 0o777) as u32, + )?; + } else { + return Err(err); + } + } + } + libc::S_IFDIR => { + // Directory: just create it with the same permissions + unsafe { + if libc::mkdir(dst_path.as_ptr(), src_stat.st_mode & 0o777) < 0 { + return Err(io::Error::last_os_error()); + } + + // Explicitly set directory permissions to match source + if libc::chmod(dst_path.as_ptr(), src_stat.st_mode & 0o777) < 0 { + return Err(io::Error::last_os_error()); + } + } + } + libc::S_IFLNK => { + // Symbolic link: read target and recreate link + let mut buf = vec![0u8; libc::PATH_MAX as usize]; + let len = unsafe { + libc::readlink(src_path.as_ptr(), buf.as_mut_ptr() as *mut _, buf.len()) + }; + if len < 0 { + return Err(io::Error::last_os_error()); + } + buf.truncate(len as usize); + + unsafe { + if libc::symlink(buf.as_ptr() as *const _, dst_path.as_ptr()) < 0 { + return Err(io::Error::last_os_error()); + } + + // Note: macOS doesn't allow setting permissions on symlinks directly + // The permissions of symlinks are typically ignored by the system + } + } + _ => { + // Other types (devices, sockets, etc.) are not supported + return Err(io::Error::new( + io::ErrorKind::Unsupported, + "unsupported file type for copy up", + )); + } + } + + // Update parent dev/ino for next iteration + let new_stat = Self::unpatched_stat(&FileId::Path(dst_path))?; + parent_dev = new_stat.st_dev as i32; + parent_ino = new_stat.st_ino; + + // Update the inode entry to point to the new copy in the top layer + let alt_key = InodeAltKey::new(new_stat.st_ino, new_stat.st_dev as i32); + let mut inodes = self.inodes.write().unwrap(); + + // Create new inode data with updated dev/ino/layer_idx but same path and refcount + let new_data = Arc::new(InodeData { + inode: inode_data.inode, + ino: new_stat.st_ino, + dev: new_stat.st_dev as i32, + refcount: AtomicU64::new(inode_data.refcount.load(Ordering::SeqCst)), + path: inode_data.path.clone(), + layer_idx: top_layer_idx, + }); + + // Replace the old entry with the new one + inodes.insert(inode_data.inode, alt_key, new_data); + } + + Ok(()) + } + + /// Helper method to copy file contents when clonefile is not available or fails + fn copy_file_contents( + &self, + src_path: &CString, + dst_path: &CString, + mode: u32, + ) -> io::Result<()> { + unsafe { + let src_file = libc::open(src_path.as_ptr(), libc::O_RDONLY); + if src_file < 0 { + return Err(io::Error::last_os_error()); + } + + let dst_file = libc::open( + dst_path.as_ptr(), + libc::O_WRONLY | libc::O_CREAT | libc::O_EXCL, + mode, + ); + if dst_file < 0 { + libc::close(src_file); + return Err(io::Error::last_os_error()); + } + + // Copy file contents + let mut buf = [0u8; 8192]; + loop { + let n_read = libc::read(src_file, buf.as_mut_ptr() as *mut _, buf.len()); + if n_read <= 0 { + break; + } + let mut pos = 0; + while pos < n_read { + let n_written = libc::write( + dst_file, + buf.as_ptr().add(pos as usize) as *const _, + (n_read - pos) as usize, + ); + if n_written <= 0 { + libc::close(src_file); + libc::close(dst_file); + return Err(io::Error::last_os_error()); + } + pos += n_written; + } + } + + // Explicitly set permissions to match source file + // This will override any effects from the umask + if libc::fchmod(dst_file, mode as libc::mode_t) < 0 { + libc::close(src_file); + libc::close(dst_file); + return Err(io::Error::last_os_error()); + } + + libc::close(src_file); + libc::close(dst_file); + } + + Ok(()) + } + + /// Ensures the file is in the top layer by copying it up if necessary. + /// + /// This function: + /// 1. Checks if the file is already in the top layer + /// 2. If not, looks up the complete path to the file + /// 3. Copies the file and all its parent directories to the top layer + /// 4. Returns the inode data for the copied file + /// + /// ### Arguments + /// * `inode_data` - The inode data for the file to copy up + /// + /// ### Returns + /// * `Ok(InodeData)` - The inode data for the file in the top layer + /// * `Err(io::Error)` - If the copy-up operation fails + fn ensure_top_layer(&self, inode_data: Arc) -> io::Result> { + let top_layer_idx = self.get_top_layer_idx(); + + // If already in top layer, return early + if inode_data.layer_idx == top_layer_idx { + return Ok(inode_data); + } + + // Build the path segments + let path_segments = inode_data.path.clone(); + + // Lookup the file to get all path inodes + let (_, _, path_inodes) = self.lookup_layer_by_layer(top_layer_idx, &path_segments)?; + + // Copy up the file + self.copy_up(&path_inodes)?; + + // Get the inode data for the copied file + self.get_inode_data(inode_data.inode) + } + + /// Creates a whiteout file for a given parent directory and name. + /// This is used to hide files that exist in lower layers. + /// + /// # Arguments + /// * `parent` - The inode of the parent directory + /// * `name` - The name of the file to create a whiteout for + /// + /// # Returns + /// * `Ok(())` if the whiteout was created successfully + /// * `Err(io::Error)` if there was an error creating the whiteout + fn create_whiteout_for_lower(&self, parent: Inode, name: &CStr) -> io::Result<()> { + if let Ok((_, mut path_inodes)) = self.do_lookup(parent, name) { + // Copy up the parent directory if needed + path_inodes.pop(); + self.copy_up(&path_inodes)?; + let parent_data = self.get_inode_data(parent)?; + + // Create the whiteout file + let whiteout_path = + self.dev_ino_and_name_to_vol_whiteout_path(parent_data.dev, parent_data.ino, name)?; + + let fd = unsafe { + libc::open( + whiteout_path.as_ptr(), + libc::O_CREAT | libc::O_WRONLY | libc::O_EXCL, + 0o000, // Whiteout files have no permissions + ) + }; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + unsafe { libc::close(fd) }; + } + + Ok(()) + } + + /// Returns an iterator over all valid entries in the directory across all layers. + /// + /// Note: OverlayFs is a high-level, layered filesystem. A simple readdir on a single directory does not produce the complete view. + /// This function traverses the directory across multiple layers, merging entries while handling duplicates, + /// whiteout files, and opaque markers. + /// + /// ## Arguments + /// * `dir` - The inode of the directory to iterate over. + /// * `add_entry` - A callback function that processes each directory entry. If the callback returns 0, + /// it signals that the directory buffer is full and iteration should stop. + /// + /// ## Returns + /// * `Ok(())` if the directory was iterated successfully. + /// * `Err(io::Error)` if an error occurred during iteration. + pub(super) fn process_dir_entries(&self, dir: Inode, mut add_entry: F) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + // Local state to track iteration over layers + struct LazyReaddirState { + current_layer: isize, // current layer (top-down) + inode_data: Option>, + current_iter: Option, + seen: HashSet>, + } + + let inode_data = self.get_inode_data(dir)?; + let top_layer = self.get_top_layer_idx() as isize; + let path = inode_data.path.clone(); + let mut state = LazyReaddirState { + current_layer: top_layer, + inode_data: None, + current_iter: None, + seen: HashSet::new(), + }; + + let mut current_offset = 0u64; + let mut opaque_marker_found = false; + loop { + // If no current iterator, attempt to initialize one for the current layer + if state.current_iter.is_none() { + if state.current_layer < 0 { + break; // All layers exhausted + } + + let layer_root = self.get_layer_root(state.current_layer as usize)?; + let mut path_inodes = vec![layer_root.clone()]; + + match self.lookup_segment_by_segment(&layer_root, &path, &mut path_inodes) { + Some(Ok(_)) => { + let last_inode = path_inodes.last().unwrap(); + let vol_path = self.inode_number_to_vol_path((**last_inode).inode)?; + let dir_str = vol_path.as_c_str().to_str().map_err(|_| { + io::Error::new(io::ErrorKind::Other, "Invalid path string") + })?; + + state.inode_data = Some(last_inode.clone()); + state.current_iter = Some(std::fs::read_dir(dir_str)?); + } + Some(Err(e)) if e.kind() == io::ErrorKind::NotFound => { + state.current_layer -= 1; + continue; + } + Some(Err(e)) => return Err(e), + None => { + state.current_layer = -1; + continue; + } + } + } + + if let Some(iter) = state.current_iter.as_mut() { + if let Some(entry_result) = iter.next() { + let entry = entry_result?; + let name = entry.file_name(); + let name_str = name.to_string_lossy(); + + if state.seen.contains(name.as_bytes()) { + continue; + } + + // Handle opaque marker and whiteout files + if name_str == OPAQUE_MARKER { + // Opaque marker found; mark it and skip this entry + opaque_marker_found = true; + continue; + } else if name_str.starts_with(WHITEOUT_PREFIX) { + // Whiteout file; skip it + let actual = &name_str[WHITEOUT_PREFIX.len()..]; + state.seen.insert(actual.as_bytes().to_vec()); + continue; + } else { + state.seen.insert(name.as_bytes().to_vec()); + } + + let metadata = entry.metadata()?; + let mode = metadata.mode() as u32; + let s_ifmt = libc::S_IFMT as u32; + let type_ = if mode & s_ifmt == (libc::S_IFDIR as u32) { + libc::DT_DIR + } else if mode & s_ifmt == (libc::S_IFREG as u32) { + libc::DT_REG + } else if mode & s_ifmt == (libc::S_IFLNK as u32) { + libc::DT_LNK + } else if mode & s_ifmt == (libc::S_IFIFO as u32) { + libc::DT_FIFO + } else if mode & s_ifmt == (libc::S_IFCHR as u32) { + libc::DT_CHR + } else if mode & s_ifmt == (libc::S_IFBLK as u32) { + libc::DT_BLK + } else if mode & s_ifmt == (libc::S_IFSOCK as u32) { + libc::DT_SOCK + } else { + libc::DT_UNKNOWN + }; + + current_offset += 1; + + let dir_entry = DirEntry { + ino: metadata.ino(), + offset: current_offset, + type_: type_ as u32, + name: name.as_bytes(), + }; + + if add_entry(dir_entry)? == 0 { + return Ok(()); + } + } else { + state.current_iter = None; + if opaque_marker_found { + break; + } + state.current_layer -= 1; + continue; + } + } + } + + Ok(()) + } + + /// Reads directory entries for the given inode by merging entries from all underlying layers. + /// + /// Unlike conventional filesystems that simply call readdir on a directory file descriptor, + /// OverlayFs must aggregate entries from multiple layers. The `offset` parameter specifies the starting + /// index in the merged list of directory entries. The provided `add_entry` callback is invoked for each + /// entry; a return value of 0 indicates that the directory buffer is full and reading should cease. + /// + /// NOTE: The current implementation of offset does not entirely follow FUSE expected behaviors. + /// Changes to entries in the write layer can affect the offset, potentially causing inconsistencies + /// in directory listing between calls. + /// + /// TODO: Implement a more robust offset handling mechanism that maintains consistency even when + /// the underlying directory structure changes. One way is making offset a composite value of + /// layer (1 MSB) + offset (7 LSB). This will also require having multiple open dirs from lower layers + /// in [HandleData]. + pub(super) fn do_readdir( + &self, + inode: Inode, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + if size == 0 { + return Ok(()); + } + + let mut current_offset = 0u64; + self.process_dir_entries(inode, |entry| { + if current_offset < offset { + current_offset += 1; + return Ok(1); + } + + add_entry(entry) + }) + } + + /// Performs an open operation + fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option, OpenOptions)> { + // Parse and normalize the open flags + let flags = self.parse_open_flags(flags as i32); + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer + let inode_data = self.ensure_top_layer(inode_data)?; + + // Open the file with the appropriate flags and generate a new unique handle ID + let file = RwLock::new(self.open_inode(inode_data.inode, flags)?); + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + + // Create handle data structure with file and empty dirstream + let data = HandleData { inode, file }; + + // Store the handle data in the handles map + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + // Set up OpenOptions based on the cache policy configuration + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + // For CachePolicy::Never, set DIRECT_IO to bypass kernel caching for files (not directories) + CachePolicy::Never => opts.set(OpenOptions::DIRECT_IO, flags & libc::O_DIRECTORY == 0), + + // For CachePolicy::Always, set different caching options based on whether it's a file or directory + CachePolicy::Always => { + if flags & libc::O_DIRECTORY == 0 { + // For files: KEEP_CACHE maintains kernel cache between open/close operations + opts |= OpenOptions::KEEP_CACHE; + } else { + // For directories: CACHE_DIR enables caching of directory entries + opts |= OpenOptions::CACHE_DIR; + } + } + + // For CachePolicy::Auto, use default caching behavior + _ => {} + }; + + // Return the handle and options + Ok((Some(handle), opts)) + } + + /// Performs a release operation + fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> { + let mut handles = self.handles.write().unwrap(); + + if let btree_map::Entry::Occupied(e) = handles.entry(handle) { + if e.get().inode == inode { + // We don't need to close the file here because that will happen automatically when + // the last `Arc` is dropped. + e.remove(); + return Ok(()); + } + } + + Err(ebadf()) + } + + /// Performs a getattr operation + fn do_getattr(&self, inode: Inode) -> io::Result<(bindings::stat64, Duration)> { + let c_path = self.inode_number_to_vol_path(inode)?; + let st = Self::patched_stat(&FileId::Path(c_path))?; + + Ok((st, self.config.attr_timeout)) + } + + /// Performs a setattr operation, copying up the file if needed + fn do_setattr( + &self, + inode: Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get the file identifier - either from handle or path + let file_id = if let Some(handle) = handle { + // Get the handle data + let handles = self.handles.read().unwrap(); + let handle_data = handles.get(&handle).ok_or_else(ebadf)?; + let file = handle_data.file.read().unwrap(); + FileId::Fd(file.as_raw_fd()) + } else { + // Use path if no handle available + let c_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + FileId::Path(c_path) + }; + + // Consolidate attribute changes using a single setattrlist call + let current_stat = Self::patched_stat(&file_id)?; + + // Handle ownership changes + if valid.intersects(SetattrValid::UID | SetattrValid::GID) { + let uid = if valid.contains(SetattrValid::UID) { + Some(attr.st_uid) + } else { + None + }; + + let gid = if valid.contains(SetattrValid::GID) { + Some(attr.st_gid) + } else { + None + }; + + if let Some((uid, gid)) = uid + .zip(gid) + .or_else(|| uid.map(|u| (u, current_stat.st_gid))) + .or_else(|| gid.map(|g| (current_stat.st_uid, g))) + { + Self::set_owner_perms_attr(&file_id, ¤t_stat, Some((uid, gid)), None)?; + } + } + + // Handle mode changes + if valid.contains(SetattrValid::MODE) { + let mode = attr.st_mode & 0o7777; + Self::set_owner_perms_attr(&file_id, ¤t_stat, None, Some(mode))?; + } + + // Handle size changes + if valid.contains(SetattrValid::SIZE) { + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::ftruncate(fd, attr.st_size) }, + FileId::Path(ref c_path) => unsafe { + libc::truncate(c_path.as_ptr(), attr.st_size) + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Handle timestamp changes + if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) { + let mut tvs = [ + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + libc::timespec { + tv_sec: 0, + tv_nsec: libc::UTIME_OMIT, + }, + ]; + + if valid.contains(SetattrValid::ATIME_NOW) { + tvs[0].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::ATIME) { + tvs[0].tv_sec = attr.st_atime; + tvs[0].tv_nsec = attr.st_atime_nsec; + } + + if valid.contains(SetattrValid::MTIME_NOW) { + tvs[1].tv_nsec = libc::UTIME_NOW; + } else if valid.contains(SetattrValid::MTIME) { + tvs[1].tv_sec = attr.st_mtime; + tvs[1].tv_nsec = attr.st_mtime_nsec; + } + + // Safe because this doesn't modify any memory and we check the return value + let res = match file_id { + FileId::Fd(fd) => unsafe { libc::futimens(fd, tvs.as_ptr()) }, + FileId::Path(ref c_path) => unsafe { + let fd = libc::open(c_path.as_ptr(), libc::O_SYMLINK | libc::O_CLOEXEC); + let res = libc::futimens(fd, tvs.as_ptr()); + libc::close(fd); + res + }, + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // Return the updated attributes and timeout + self.do_getattr(inode) + } + + /// Performs a mkdir operation + fn do_mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + // Create the directory with initial permissions + let res = unsafe { libc::mkdir(c_path.as_ptr(), 0o700) }; + if res == 0 { + // Set security context if provided + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Path(c_path.clone()), secctx, false)?; + } + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + Self::set_owner_perms_attr( + &FileId::Path(c_path.clone()), + &stat, + Some((ctx.uid, ctx.gid)), + Some((mode & !umask) as u16), + )?; + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + return Ok(entry); + } + + // Return the error + Err(linux_error(io::Error::last_os_error())) + } + + /// Performs an unlink operation + fn do_unlink(&self, parent: Inode, name: &CStr) -> io::Result<()> { + let top_layer_idx = self.get_top_layer_idx(); + let (entry, _) = self.do_lookup(parent, name)?; + + // If the inode is in the top layer, we need to unlink it. + let entry_data = self.get_inode_data(entry.inode)?; + if entry_data.layer_idx == top_layer_idx { + // Get the path for the inode + let c_path = self.inode_number_to_vol_path(entry.inode)?; + + // Remove the inode from the overlayfs + let res = unsafe { libc::unlink(c_path.as_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // If after an unlink, the entry still exists in a lower layer, we need to add a whiteout + self.create_whiteout_for_lower(parent, name)?; + + Ok(()) + } + + /// Performs an rmdir operation + fn do_rmdir(&self, parent: Inode, name: &CStr) -> io::Result<()> { + let top_layer_idx = self.get_top_layer_idx(); + let (entry, _) = self.do_lookup(parent, name)?; + + // If the inode is in the top layer, we need to unlink it. + let entry_data = self.get_inode_data(entry.inode)?; + if entry_data.layer_idx == top_layer_idx { + // Get the path for the inode + let c_path = self.inode_number_to_vol_path(entry.inode)?; + + // Remove the inode from the overlayfs + let res = unsafe { libc::rmdir(c_path.as_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + } + + // If after an rmdir, the entry still exists in a lower layer, we need to add a whiteout + self.create_whiteout_for_lower(parent, name)?; + + Ok(()) + } + + /// Performs a symlink operation + fn do_symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + // Create the directory with initial permissions + let res = unsafe { libc::symlink(linkname.as_ptr(), c_path.as_ptr()) }; + if res == 0 { + // Set security context if provided + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Path(c_path.clone()), secctx, true)?; + } + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + let mode = libc::S_IFLNK | 0o777; + Self::set_owner_perms_attr( + &FileId::Path(c_path.clone()), + &stat, + Some((ctx.uid, ctx.gid)), + Some(mode), + )?; + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + return Ok(entry); + } + + // Return the error + Err(linux_error(io::Error::last_os_error())) + } + + fn do_rename( + &self, + old_parent: Inode, + old_name: &CStr, + new_parent: Inode, + new_name: &CStr, + flags: u32, + ) -> io::Result<()> { + // Copy up the old path to the top layer if not already in the top layer + let (_, old_path_inodes) = self.do_lookup(old_parent, old_name)?; + self.copy_up(&old_path_inodes)?; + let old_parent_data = self.get_inode_data(old_parent)?; + + // Copy up the new parent to the top layer if not already in the top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; + + // Get the paths for rename operation + let old_path = + self.dev_ino_and_name_to_vol_path(old_parent_data.dev, old_parent_data.ino, old_name)?; + let new_path = + self.dev_ino_and_name_to_vol_path(new_parent_data.dev, new_parent_data.ino, new_name)?; + + // Set up rename flags + let mut mflags: u32 = 0; + if ((flags as i32) & bindings::LINUX_RENAME_NOREPLACE) != 0 { + mflags |= libc::RENAME_EXCL; + } + if ((flags as i32) & bindings::LINUX_RENAME_EXCHANGE) != 0 { + mflags |= libc::RENAME_SWAP; + } + + // Check for invalid flag combinations + if ((flags as i32) & bindings::LINUX_RENAME_WHITEOUT) != 0 + && ((flags as i32) & bindings::LINUX_RENAME_EXCHANGE) != 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + // Perform the rename + let res = unsafe { libc::renamex_np(old_path.as_ptr(), new_path.as_ptr(), mflags) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // After successful rename, check if we need to add a whiteout for the old path + self.create_whiteout_for_lower(old_parent, old_name)?; + + // If LINUX_RENAME_WHITEOUT is set, create a character device at the old path location + if ((flags as i32) & bindings::LINUX_RENAME_WHITEOUT) != 0 { + let fd = unsafe { + libc::open( + old_path.as_ptr(), + libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + 0o600, + ) + }; + + let stat = Self::unpatched_stat(&FileId::Fd(fd))?; + Self::set_owner_perms_attr(&FileId::Fd(fd), &stat, None, Some(libc::S_IFCHR | 0o600))?; + + if fd < 0 { + return Err(io::Error::last_os_error()); + } + + unsafe { libc::close(fd) }; + } + + Ok(()) + } + + fn do_link(&self, inode: Inode, new_parent: Inode, new_name: &CStr) -> io::Result { + // Get the inode data for the source file + let inode_data = self.get_inode_data(inode)?; + + // Copy up the source file to the top layer if needed + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get source and destination paths + let src_path = self.dev_ino_to_vol_path(inode_data.dev, inode_data.ino)?; + + // Extraneous check to ensure the source file is not a symlink + let stat = Self::unpatched_stat(&FileId::Path(src_path.clone()))?; + if stat.st_mode & libc::S_IFMT == libc::S_IFLNK { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot link to a symlink", + )); + } + + // Get and ensure new parent is in top layer + let new_parent_data = self.ensure_top_layer(self.get_inode_data(new_parent)?)?; + + + let dst_path = + self.dev_ino_and_name_to_vol_path(new_parent_data.dev, new_parent_data.ino, new_name)?; + + // Create the hard link + let res = unsafe { libc::link(src_path.as_ptr(), dst_path.as_ptr()) }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Get the entry for the newly created link + let mut path = new_parent_data.path.clone(); + path.push(self.intern_name(new_name)?); + + // Get stats for the new link + let stat = Self::patched_stat(&FileId::Path(dst_path))?; + + // Create new inode for the link pointing to same dev/ino as source + let (inode, _) = self.create_inode( + stat.st_ino, + stat.st_dev as i32, + path, + new_parent_data.layer_idx, + ); + + Ok(self.create_entry(inode, stat)) + } + + /// Decrements the reference count for an inode and removes it if the count reaches zero + fn do_forget(&self, inode: Inode, count: u64) { + let mut inodes = self.inodes.write().unwrap(); + if let Some(data) = inodes.get(&inode) { + // Acquiring the write lock on the inode map prevents new lookups from incrementing the + // refcount but there is the possibility that a previous lookup already acquired a + // reference to the inode data and is in the process of updating the refcount so we need + // to loop here until we can decrement successfully. + loop { + let refcount = data.refcount.load(Ordering::Relaxed); + + // Saturating sub because it doesn't make sense for a refcount to go below zero and + // we don't want misbehaving clients to cause integer overflow. + let new_count = refcount.saturating_sub(count); + + if data + .refcount + .compare_exchange(refcount, new_count, Ordering::Release, Ordering::Relaxed) + .unwrap() + == refcount + { + if new_count == 0 { + // We just removed the last refcount for this inode. There's no need for an + // acquire fence here because we hold a write lock on the inode map and any + // thread that is waiting to do a forget on the same inode will have to wait + // until we release the lock. So there's is no other release store for us to + // synchronize with before deleting the entry. + inodes.remove(&inode); + } + break; + } + } + } + } + + fn do_readlink(&self, inode: Inode) -> io::Result> { + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Allocate a buffer for the link target + let mut buf = vec![0; libc::PATH_MAX as usize]; + + // Call readlink to get the symlink target + let res = unsafe { + libc::readlink( + c_path.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + buf.len(), + ) + }; + + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Resize the buffer to the actual length of the link target + buf.resize(res as usize, 0); + Ok(buf) + } + + fn do_setxattr(&self, inode: Inode, name: &CStr, value: &[u8], flags: u32) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Don't allow setting the owner/permissions attribute + if name.to_bytes() == OWNER_PERMS_XATTR_KEY { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Convert flags to mflags + let mut mflags: i32 = 0; + if (flags as i32) & bindings::LINUX_XATTR_CREATE != 0 { + mflags |= libc::XATTR_CREATE; + } + + if (flags as i32) & bindings::LINUX_XATTR_REPLACE != 0 { + mflags |= libc::XATTR_REPLACE; + } + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode_data.inode)?; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { + libc::setxattr( + c_path.as_ptr(), + name.as_ptr(), + value.as_ptr() as *const libc::c_void, + value.len(), + 0, + mflags as libc::c_int, + ) + }; + + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn do_getxattr(&self, inode: Inode, name: &CStr, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Don't allow getting attributes for init + if inode == self.init_inode { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENODATA))); + } + + // Don't allow getting the owner/permissions attribute + if name.to_bytes() == OWNER_PERMS_XATTR_KEY { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Safe because this will only modify the contents of `buf` + let mut buf = vec![0; size as usize]; + let res = unsafe { + if size == 0 { + libc::getxattr( + c_path.as_ptr(), + name.as_ptr(), + std::ptr::null_mut(), + size as libc::size_t, + 0, + 0, + ) + } else { + libc::getxattr( + c_path.as_ptr(), + name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + size as libc::size_t, + 0, + 0, + ) + } + }; + + if res < 0 { + let last_error = io::Error::last_os_error(); + if last_error.raw_os_error() == Some(libc::ERANGE) { + return Err(io::Error::from_raw_os_error(LINUX_ERANGE)); + } + + return Err(linux_error(last_error)); + } + + if size == 0 { + Ok(GetxattrReply::Count(res as u32)) + } else { + // Truncate the buffer to the actual length of the value + buf.resize(res as usize, 0); + Ok(GetxattrReply::Value(buf)) + } + } + + fn do_listxattr(&self, inode: Inode, size: u32) -> io::Result { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Safe because this will only modify the contents of `buf`. + let mut buf = vec![0; 512_usize]; + let res = unsafe { + libc::listxattr( + c_path.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_char, + 512, + 0, + ) + }; + + if res < 0 { + let last_error = io::Error::last_os_error(); + if last_error.raw_os_error() == Some(libc::ERANGE) { + return Err(io::Error::from_raw_os_error(LINUX_ERANGE)); + } + + return Err(linux_error(last_error)); + } + + // Truncate the buffer to the actual length of the list of attributes + buf.truncate(res as usize); + + if size == 0 { + let mut clean_size = res as usize; + + // Remove the owner/permissions attribute from the list of attributes + for attr in buf.split(|c| *c == 0) { + if attr.starts_with(&OWNER_PERMS_XATTR_KEY[..OWNER_PERMS_XATTR_KEY.len() - 1]) { + clean_size -= OWNER_PERMS_XATTR_KEY.len(); + } + } + + Ok(ListxattrReply::Count(clean_size as u32)) + } else { + let mut clean_buf = Vec::new(); + + // Remove the owner/permissions attribute from the list of attributes + for attr in buf.split(|c| *c == 0) { + if attr.is_empty() + || attr.starts_with(&OWNER_PERMS_XATTR_KEY[..OWNER_PERMS_XATTR_KEY.len() - 1]) + { + continue; + } + + clean_buf.extend_from_slice(attr); + clean_buf.push(0); + } + + // Shrink the buffer to the actual length of the list of attributes + clean_buf.shrink_to_fit(); + + // Return an error if the buffer exceeds the requested size + if clean_buf.len() > size as usize { + return Err(io::Error::from_raw_os_error(LINUX_ERANGE)); + } + + Ok(ListxattrReply::Names(clean_buf)) + } + } + + fn do_removexattr(&self, inode: Inode, name: &CStr) -> io::Result<()> { + // Check if extended attributes are enabled + if !self.config.xattr { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + // Don't allow setting the owner/permissions attribute + if name.to_bytes() == OWNER_PERMS_XATTR_KEY { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // Get the inode data + let inode_data = self.get_inode_data(inode)?; + + // Ensure the file is in the top layer before modifying attributes + let inode_data = self.ensure_top_layer(inode_data)?; + + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode_data.inode)?; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::removexattr(c_path.as_ptr(), name.as_ptr(), 0) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn do_create( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + let flags = self.parse_open_flags(flags as i32); + let hostmode = if (flags & libc::O_DIRECTORY) != 0 { + 0o700 + } else { + 0o600 + }; + + // Safe because this doesn't modify any memory and we check the return value. We don't + // really check `flags` because if the kernel can't handle poorly specified flags then we + // have much bigger problems. + let fd = unsafe { + libc::open( + c_path.as_ptr(), + flags | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + hostmode, + ) + }; + + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Set security context + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Fd(fd), secctx, false)? + }; + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + if let Err(e) = Self::set_owner_perms_attr( + &FileId::Fd(fd), + &stat, + Some((ctx.uid, ctx.gid)), + Some((libc::S_IFREG as u32 | (mode & !(umask & 0o777))) as u16), + ) { + unsafe { libc::close(fd) }; + return Err(e); + } + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + // Safe because we just opened this fd. + let file = RwLock::new(unsafe { File::from_raw_fd(fd) }); + + let handle = self.next_handle.fetch_add(1, Ordering::Relaxed); + let data = HandleData { + inode: entry.inode, + file, + }; + + self.handles.write().unwrap().insert(handle, Arc::new(data)); + + let mut opts = OpenOptions::empty(); + match self.config.cache_policy { + CachePolicy::Never => opts |= OpenOptions::DIRECT_IO, + CachePolicy::Always => opts |= OpenOptions::KEEP_CACHE, + _ => {} + }; + + Ok((entry, Some(handle), opts)) + } + + fn do_mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + // Check if an entry with the same name already exists in the parent directory + match self.do_lookup(parent, name) { + Ok(_) => { + return Err(io::Error::new( + io::ErrorKind::AlreadyExists, + "Entry already exists", + )) + } + Err(e) if e.raw_os_error() == Some(libc::ENOENT) => { + // Expected ENOENT means it does not exist, so continue. + } + Err(e) => return Err(e), + } + + // Get the parent inode data + let parent_data = self.get_inode_data(parent)?; + + // Ensure parent directory is in the top layer + let parent_data = self.ensure_top_layer(parent_data)?; + + // Get the path for the new directory + let c_path = self.dev_ino_and_name_to_vol_path(parent_data.dev, parent_data.ino, name)?; + + // NOTE: file nodes are created as regular file on macos following the passthroughfs + // behavior. + let fd = unsafe { + libc::open( + c_path.as_ptr(), + libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW, + 0o600, + ) + }; + + if fd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + // Set security context + if let Some(secctx) = extensions.secctx { + Self::set_secctx(&FileId::Fd(fd), secctx, false)? + }; + + // Get the initial stat for the directory + let stat = Self::unpatched_stat(&FileId::Path(c_path.clone()))?; + + // Set ownership and permissions + if let Err(e) = Self::set_owner_perms_attr( + &FileId::Fd(fd), + &stat, + Some((ctx.uid, ctx.gid)), + Some((mode & !umask) as u16), + ) { + unsafe { libc::close(fd) }; + return Err(e); + } + + // Get the updated stat for the directory + let updated_stat = Self::patched_stat(&FileId::Path(c_path))?; + + let mut path = parent_data.path.clone(); + path.push(self.intern_name(name)?); + + // Create the inode for the newly created directory + let (inode, _) = self.create_inode( + updated_stat.st_ino, + updated_stat.st_dev, + path, + parent_data.layer_idx, + ); + + // Create the entry for the newly created directory + let entry = self.create_entry(inode, updated_stat); + + unsafe { libc::close(fd) }; + + Ok(entry) + } + + fn do_fallocate( + &self, + inode: Inode, + handle: Handle, + offset: u64, + length: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + let fd = data.file.write().unwrap().as_raw_fd(); + let proposed_length = (offset + length) as i64; + let mut fs = libc::fstore_t { + fst_flags: libc::F_ALLOCATECONTIG, + fst_posmode: libc::F_PEOFPOSMODE, + fst_offset: 0, + fst_length: proposed_length, + fst_bytesalloc: 0, + }; + + let res = unsafe { libc::fcntl(fd, libc::F_PREALLOCATE, &mut fs as *mut _) }; + if res < 0 { + fs.fst_flags = libc::F_ALLOCATEALL; + let res = unsafe { libc::fcntl(fd, libc::F_PREALLOCATE, &mut fs as &mut _) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + } + + let st = Self::unpatched_stat(&FileId::Fd(fd))?; + if st.st_size >= proposed_length { + // fallocate should not shrink the file. The file is already larger than needed. + return Ok(()); + } + + let res = unsafe { libc::ftruncate(fd, proposed_length) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn do_lseek(&self, inode: Inode, handle: Handle, offset: u64, whence: u32) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + + // SEEK_DATA and SEEK_HOLE have slightly different semantics + // in Linux vs. macOS, which means we can't support them. + let mwhence = if whence == 3 { + // SEEK_DATA + return Ok(offset); + } else if whence == 4 { + // SEEK_HOLE + libc::SEEK_END + } else { + whence as i32 + }; + + let fd = data.file.write().unwrap().as_raw_fd(); + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lseek(fd, offset as bindings::off64_t, mwhence as libc::c_int) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(res as u64) + } + + fn do_setupmapping( + &self, + inode: Inode, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + if map_sender.is_none() { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + let prot_flags = if (flags & fuse::SetupmappingFlags::WRITE.bits()) != 0 { + libc::PROT_READ | libc::PROT_WRITE + } else { + libc::PROT_READ + }; + + if (moffset + len) > shm_size { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + let guest_addr = guest_shm_base + moffset; + + // Ensure the inode is in the top layer + let inode_data = self.get_inode_data(inode)?; + let inode_data = self.ensure_top_layer(inode_data)?; + + let file = self.open_inode(inode_data.inode, libc::O_RDWR)?; + let fd = file.as_raw_fd(); + + let host_addr = unsafe { + libc::mmap( + null_mut(), + len as usize, + prot_flags, + libc::MAP_SHARED, + fd, + foffset as libc::off_t, + ) + }; + if host_addr == libc::MAP_FAILED { + return Err(linux_error(io::Error::last_os_error())); + } + + let ret = unsafe { libc::close(fd) }; + if ret == -1 { + return Err(linux_error(io::Error::last_os_error())); + } + + // We've checked that map_sender is something above. + let sender = map_sender.as_ref().unwrap(); + let (reply_sender, reply_receiver) = unbounded(); + sender + .send(MemoryMapping::AddMapping( + reply_sender, + host_addr as u64, + guest_addr, + len, + )) + .unwrap(); + if !reply_receiver.recv().unwrap() { + error!("Error requesting HVF the addition of a DAX window"); + unsafe { libc::munmap(host_addr, len as usize) }; + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + self.map_windows + .lock() + .unwrap() + .insert(guest_addr, host_addr as u64); + + Ok(()) + } + + fn do_removemapping( + &self, + requests: Vec, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + if map_sender.is_none() { + return Err(linux_error(io::Error::from_raw_os_error(libc::ENOSYS))); + } + + for req in requests { + let guest_addr = guest_shm_base + req.moffset; + if (req.moffset + req.len) > shm_size { + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + let host_addr = match self.map_windows.lock().unwrap().remove(&guest_addr) { + Some(a) => a, + None => return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))), + }; + debug!( + "removemapping: guest_addr={:x} len={:?}", + guest_addr, req.len + ); + + let sender = map_sender.as_ref().unwrap(); + let (reply_sender, reply_receiver) = unbounded(); + sender + .send(MemoryMapping::RemoveMapping( + reply_sender, + guest_addr, + req.len, + )) + .unwrap(); + if !reply_receiver.recv().unwrap() { + error!("Error requesting HVF the removal of a DAX window"); + return Err(linux_error(io::Error::from_raw_os_error(libc::EINVAL))); + } + + let ret = unsafe { libc::munmap(host_addr as *mut libc::c_void, req.len as usize) }; + if ret == -1 { + error!("Error unmapping DAX window"); + return Err(linux_error(io::Error::last_os_error())); + } + } + + Ok(()) + } +} + +//-------------------------------------------------------------------------------------------------- +// Functions +//-------------------------------------------------------------------------------------------------- + +/// Returns a "bad file descriptor" error +fn ebadf() -> io::Error { + io::Error::from_raw_os_error(libc::EBADF) +} + +/// Returns an "invalid argument" error +fn einval() -> io::Error { + io::Error::from_raw_os_error(libc::EINVAL) +} + +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl FileSystem for OverlayFs { + type Inode = u64; + type Handle = u64; + + fn init(&self, capable: FsOptions) -> io::Result { + // Set the umask to 0 to ensure that all file permissions are set correctly + unsafe { libc::umask(0o000) }; + + // Enable readdirplus if supported + let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO; + + // Enable writeback caching if requested and supported + if self.config.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { + opts |= FsOptions::WRITEBACK_CACHE; + self.writeback.store(true, Ordering::SeqCst); + } + + // Enable submounts if supported + if capable.contains(FsOptions::SUBMOUNTS) { + opts |= FsOptions::SUBMOUNTS; + self.announce_submounts.store(true, Ordering::Relaxed); + } + + Ok(opts) + } + + fn destroy(&self) { + // Clear all handles + self.handles.write().unwrap().clear(); + + // Clear all inodes + self.inodes.write().unwrap().clear(); + + // Clear any memory-mapped windows + self.map_windows.lock().unwrap().clear(); + } + + fn statfs(&self, _ctx: Context, inode: Self::Inode) -> io::Result { + // Get the path for this inode + let c_path = self.inode_number_to_vol_path(inode)?; + + // Call statvfs64 to get filesystem statistics + // Safe because this will only modify `out` and we check the return value. + let mut out = MaybeUninit::::zeroed(); + let res = unsafe { bindings::statvfs64(c_path.as_ptr(), out.as_mut_ptr()) }; + if res < 0 { + return Err(io::Error::last_os_error()); + } + + // Safe because statvfs64 initialized the struct + Ok(unsafe { out.assume_init() }) + } + + fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result { + Self::validate_name(name)?; + + #[cfg(not(feature = "efi"))] + let init_name = unsafe { CStr::from_bytes_with_nul_unchecked(INIT_CSTR) }; + + #[cfg(not(feature = "efi"))] + if self.init_inode != 0 && name == init_name { + let mut st: bindings::stat64 = unsafe { std::mem::zeroed() }; + st.st_size = INIT_BINARY.len() as i64; + st.st_ino = self.init_inode; + st.st_mode = 0o100_755; + + return Ok(Entry { + inode: self.init_inode, + generation: 0, + attr: st, + attr_flags: 0, + attr_timeout: self.config.attr_timeout, + entry_timeout: self.config.entry_timeout, + }) + } + + let (entry, _) = self.do_lookup(parent, name)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) { + self.do_forget(inode, count); + } + + fn getattr( + &self, + _ctx: Context, + inode: Self::Inode, + _handle: Option, + ) -> io::Result<(bindings::stat64, Duration)> { + self.do_getattr(inode) + } + + fn setattr( + &self, + _ctx: Context, + inode: Self::Inode, + attr: bindings::stat64, + handle: Option, + valid: SetattrValid, + ) -> io::Result<(bindings::stat64, Duration)> { + self.do_setattr(inode, attr, handle, valid) + } + + fn readlink(&self, _ctx: Context, inode: Self::Inode) -> io::Result> { + self.do_readlink(inode) + } + + fn mkdir( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mkdir(ctx, parent, name, mode, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + Self::validate_name(name)?; + self.do_unlink(parent, name) + } + + fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> { + Self::validate_name(name)?; + self.do_rmdir(parent, name) + } + + fn symlink( + &self, + ctx: Context, + linkname: &CStr, + parent: Inode, + name: &CStr, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_symlink(ctx, linkname, parent, name, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn rename( + &self, + _ctx: Context, + old_parent: Self::Inode, + old_name: &CStr, + new_parent: Self::Inode, + new_name: &CStr, + flags: u32, + ) -> io::Result<()> { + Self::validate_name(old_name)?; + Self::validate_name(new_name)?; + self.do_rename(old_parent, old_name, new_parent, new_name, flags) + } + + fn link( + &self, + _ctx: Context, + inode: Self::Inode, + new_parent: Self::Inode, + new_name: &CStr, + ) -> io::Result { + Self::validate_name(new_name)?; + let entry = self.do_link(inode, new_parent, new_name)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn open( + &self, + _ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + if inode == self.init_inode { + Ok((Some(self.init_handle), OpenOptions::empty())) + } else { + self.do_open(inode, flags) + } + } + + fn read( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + mut w: W, + size: u32, + offset: u64, + _lock_owner: Option, + _flags: u32, + ) -> io::Result { + #[cfg(not(feature = "efi"))] + if inode == self.init_inode { + return w.write(&INIT_BINARY[offset as usize..(offset + (size as u64)) as usize]); + } + + let data = self.get_inode_handle_data(inode, handle)?; + + let f = data.file.read().unwrap(); + w.write_from(&f, size as usize, offset) + } + + fn write( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + mut r: R, + size: u32, + offset: u64, + _lock_owner: Option, + _delayed_write: bool, + _kill_priv: bool, + _flags: u32, + ) -> io::Result { + let data = self.get_inode_handle_data(inode, handle)?; + let f = data.file.read().unwrap(); + r.read_to(&f, size as usize, offset) + } + + fn flush( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + _lock_owner: u64, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + // Since this method is called whenever an fd is closed in the client, we can emulate that + // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe + // because this doesn't modify any memory and we check the return values. + unsafe { + let newfd = libc::dup(data.file.write().unwrap().as_raw_fd()); + if newfd < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + if libc::close(newfd) < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + } + + fn release( + &self, + _ctx: Context, + inode: Self::Inode, + _flags: u32, + handle: Self::Handle, + _flush: bool, + _flock_release: bool, + _lock_owner: Option, + ) -> io::Result<()> { + self.do_release(inode, handle) + } + + fn fsync( + &self, + _ctx: Context, + inode: Self::Inode, + _datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + let data = self.get_inode_handle_data(inode, handle)?; + + // Safe because this doesn't modify any memory and we check the return values. + let res = unsafe { libc::fsync(data.file.write().unwrap().as_raw_fd()) }; + if res < 0 { + return Err(linux_error(io::Error::last_os_error())); + } + + Ok(()) + } + + fn opendir( + &self, + _ctx: Context, + inode: Self::Inode, + flags: u32, + ) -> io::Result<(Option, OpenOptions)> { + self.do_open(inode, flags | libc::O_DIRECTORY as u32) + } + + fn readdir( + &self, + _ctx: Context, + inode: Self::Inode, + handle: Self::Handle, + size: u32, + offset: u64, + add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry) -> io::Result, + { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_readdir(inode, size, offset, add_entry) + } + + fn readdirplus( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + size: u32, + offset: u64, + mut add_entry: F, + ) -> io::Result<()> + where + F: FnMut(DirEntry, Entry) -> io::Result, + { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_readdir(inode, size, offset, |dir_entry| { + let (entry, _) = self.do_lookup(inode, &CString::new(dir_entry.name).unwrap())?; + add_entry(dir_entry, entry) + }) + } + + fn releasedir( + &self, + _ctx: Context, + inode: Self::Inode, + _flags: u32, + handle: Self::Handle, + ) -> io::Result<()> { + let _ = self.get_inode_handle_data(inode, handle)?; + self.do_release(inode, handle) + } + + fn fsyncdir( + &self, + ctx: Context, + inode: Self::Inode, + datasync: bool, + handle: Self::Handle, + ) -> io::Result<()> { + self.fsync(ctx, inode, datasync, handle) + } + + fn setxattr( + &self, + _ctx: Context, + inode: Self::Inode, + name: &CStr, + value: &[u8], + flags: u32, + ) -> io::Result<()> { + self.do_setxattr(inode, name, value, flags) + } + + fn getxattr( + &self, + _ctx: Context, + inode: Self::Inode, + name: &CStr, + size: u32, + ) -> io::Result { + self.do_getxattr(inode, name, size) + } + + fn listxattr( + &self, + _ctx: Context, + inode: Self::Inode, + size: u32, + ) -> io::Result { + self.do_listxattr(inode, size) + } + + fn removexattr(&self, _ctx: Context, inode: Self::Inode, name: &CStr) -> io::Result<()> { + self.do_removexattr(inode, name) + } + + fn access(&self, ctx: Context, inode: Self::Inode, mask: u32) -> io::Result<()> { + let c_path = self.inode_number_to_vol_path(inode)?; + + let st = Self::patched_stat(&FileId::Path(c_path))?; + + let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK); + + if mode == libc::F_OK { + // The file exists since we were able to call `stat(2)` on it. + return Ok(()); + } + + if (mode & libc::R_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0) + && st.st_mode & 0o004 == 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + if (mode & libc::W_OK) != 0 + && ctx.uid != 0 + && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0) + && st.st_mode & 0o002 == 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + // root can only execute something if it is executable by one of the owner, the group, or + // everyone. + if (mode & libc::X_OK) != 0 + && (ctx.uid != 0 || st.st_mode & 0o111 == 0) + && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0) + && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0) + && st.st_mode & 0o001 == 0 + { + return Err(linux_error(io::Error::from_raw_os_error(libc::EACCES))); + } + + Ok(()) + } + + fn create( + &self, + ctx: Context, + parent: Self::Inode, + name: &CStr, + mode: u32, + flags: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result<(Entry, Option, OpenOptions)> { + Self::validate_name(name)?; + let (entry, handle, opts) = self.do_create(ctx, parent, name, mode, flags, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok((entry, handle, opts)) + } + + fn mknod( + &self, + ctx: Context, + parent: Inode, + name: &CStr, + mode: u32, + _rdev: u32, + umask: u32, + extensions: Extensions, + ) -> io::Result { + Self::validate_name(name)?; + let entry = self.do_mknod(ctx, parent, name, mode, umask, extensions)?; + self.bump_refcount(entry.inode); + Ok(entry) + } + + fn fallocate( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + _mode: u32, + offset: u64, + length: u64, + ) -> io::Result<()> { + self.do_fallocate(inode, handle, offset, length) + } + + fn lseek( + &self, + _ctx: Context, + inode: Inode, + handle: Handle, + offset: u64, + whence: u32, + ) -> io::Result { + self.do_lseek(inode, handle, offset, whence) + } + + fn setupmapping( + &self, + _ctx: Context, + inode: Inode, + _handle: Handle, + foffset: u64, + len: u64, + flags: u64, + moffset: u64, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + self.do_setupmapping( + inode, + foffset, + len, + flags, + moffset, + guest_shm_base, + shm_size, + map_sender, + ) + } + + fn removemapping( + &self, + _ctx: Context, + requests: Vec, + guest_shm_base: u64, + shm_size: u64, + map_sender: &Option>, + ) -> io::Result<()> { + self.do_removemapping(requests, guest_shm_base, shm_size, map_sender) + } +} + +impl Default for Config { + fn default() -> Self { + Self { + entry_timeout: Duration::from_secs(5), + attr_timeout: Duration::from_secs(5), + cache_policy: CachePolicy::default(), // Use the default cache policy (Auto) + writeback: false, + xattr: false, + proc_sfd_rawfd: None, + export_fsid: 0, + export_table: None, + layers: vec![], + } + } +} + +//-------------------------------------------------------------------------------------------------- +// External Functions +//-------------------------------------------------------------------------------------------------- + +extern "C" { + /// macOS system call for cloning a file with COW semantics + /// + /// Creates a copy-on-write clone of a file. + /// + /// ## Arguments + /// + /// * `src` - Path to the source file + /// * `dst` - Path to the destination file + /// * `flags` - Currently unused, must be 0 + /// + /// ## Returns + /// + /// * `0` on success + /// * `-1` on error with errno set + fn clonefile( + src: *const libc::c_char, + dst: *const libc::c_char, + flags: libc::c_int, + ) -> libc::c_int; +} diff --git a/src/devices/src/virtio/fs/mod.rs b/src/devices/src/virtio/fs/mod.rs index ea475a5c1..208414bb7 100644 --- a/src/devices/src/virtio/fs/mod.rs +++ b/src/devices/src/virtio/fs/mod.rs @@ -1,10 +1,11 @@ mod device; #[allow(dead_code)] mod filesystem; +mod server; pub mod fuse; +mod kinds; #[allow(dead_code)] mod multikey; -mod server; mod worker; #[cfg(target_os = "linux")] @@ -13,11 +14,16 @@ pub mod linux; pub use linux::fs_utils; #[cfg(target_os = "linux")] pub use linux::passthrough; +#[cfg(target_os = "linux")] +pub use linux::overlayfs; #[cfg(target_os = "macos")] pub mod macos; +pub use kinds::*; #[cfg(target_os = "macos")] pub use macos::fs_utils; #[cfg(target_os = "macos")] +pub use macos::overlayfs; +#[cfg(target_os = "macos")] pub use macos::passthrough; use super::bindings; diff --git a/src/devices/src/virtio/fs/multikey.rs b/src/devices/src/virtio/fs/multikey.rs index 8dc35a447..bcbcb2717 100644 --- a/src/devices/src/virtio/fs/multikey.rs +++ b/src/devices/src/virtio/fs/multikey.rs @@ -9,7 +9,7 @@ use std::collections::BTreeMap; /// `std::collections::BTreeMap` also apply to this struct. Additionally, there is a 1:1 /// relationship between the 2 key types. In other words, for each `K1` in the map, there is exactly /// one `K2` in the map and vice versa. -#[derive(Default)] +#[derive(Default, Debug)] pub struct MultikeyBTreeMap where K1: Ord, @@ -18,8 +18,8 @@ where // We need to keep a copy of the second key in the main map so that we can remove entries using // just the main key. Otherwise we would require the caller to provide both keys when calling // `remove`. - main: BTreeMap, - alt: BTreeMap, + pub main: BTreeMap, + pub alt: BTreeMap, } impl MultikeyBTreeMap diff --git a/src/devices/src/virtio/fs/server.rs b/src/devices/src/virtio/fs/server.rs index b477a099e..a6ba689fd 100644 --- a/src/devices/src/virtio/fs/server.rs +++ b/src/devices/src/virtio/fs/server.rs @@ -1,7 +1,3 @@ -// Copyright 2019 The Chromium OS Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - #[cfg(target_os = "macos")] use crossbeam_channel::Sender; #[cfg(target_os = "macos")] @@ -17,61 +13,47 @@ use std::sync::atomic::{AtomicU64, Ordering}; use vm_memory::ByteValued; use super::super::linux_errno::linux_error; -use super::bindings; use super::descriptor_utils::{Reader, Writer}; -use super::filesystem::{ - Context, DirEntry, Entry, Extensions, FileSystem, GetxattrReply, ListxattrReply, SecContext, - ZeroCopyReader, ZeroCopyWriter, -}; +use super::filesystem::{Context, DirEntry, Entry, Extensions, FileSystem, GetxattrReply, ListxattrReply, SecContext, ZeroCopyReader, ZeroCopyWriter}; use super::fs_utils::einval; use super::fuse::*; +use super::{bindings, FsImpl}; use super::{FsError as Error, Result}; use crate::virtio::VirtioShmRegion; -const MAX_BUFFER_SIZE: u32 = 1 << 20; -const BUFFER_HEADER_SIZE: u32 = 0x1000; -const DIRENT_PADDING: [u8; 8] = [0; 8]; - -struct ZCReader<'a>(Reader<'a>); - -impl ZeroCopyReader for ZCReader<'_> { - fn read_to(&mut self, f: &File, count: usize, off: u64) -> io::Result { - self.0.read_to_at(f, count, off) - } +//-------------------------------------------------------------------------------------------------- +// Constants +//-------------------------------------------------------------------------------------------------- + +pub(super) const MAX_BUFFER_SIZE: u32 = 1 << 20; +pub(super) const BUFFER_HEADER_SIZE: u32 = 0x1000; +pub(super) const DIRENT_PADDING: [u8; 8] = [0; 8]; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// `FsImplServer` is a concrete FUSE server implementation designed to work with specific +/// filesystem implementations provided by libkrun, particularly: +/// +/// - [`PassthroughFs`]: For direct passthrough access to the host filesystem +/// - [`OverlayFs`]: For overlayfs functionality to combine multiple filesystem layers +pub struct FsImplServer { + fs: FsImpl, + options: AtomicU64, } -impl io::Read for ZCReader<'_> { - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.0.read(buf) - } -} +struct ZCReader<'a>(Reader<'a>); struct ZCWriter<'a>(Writer<'a>); -impl ZeroCopyWriter for ZCWriter<'_> { - fn write_from(&mut self, f: &File, count: usize, off: u64) -> io::Result { - self.0.write_from_at(f, count, off) - } -} - -impl io::Write for ZCWriter<'_> { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.0.write(buf) - } - - fn flush(&mut self) -> io::Result<()> { - self.0.flush() - } -} - -pub struct Server { - fs: F, - options: AtomicU64, -} +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- -impl Server { - pub fn new(fs: F) -> Server { - Server { +impl FsImplServer { + pub fn new(fs: FsImpl) -> FsImplServer { + FsImplServer { fs, options: AtomicU64::new(FsOptions::empty().bits()), } @@ -94,7 +76,7 @@ impl Server { w, ); } - debug!("opcode: {}", in_header.opcode); + match in_header.opcode { x if x == Opcode::Lookup as u32 => self.lookup(in_header, r, w), x if x == Opcode::Forget as u32 => self.forget(in_header, r), // No reply. @@ -1409,6 +1391,42 @@ impl Server { } } +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl ZeroCopyReader for ZCReader<'_> { + fn read_to(&mut self, f: &File, count: usize, off: u64) -> io::Result { + self.0.read_to_at(f, count, off) + } +} + +impl io::Read for ZCReader<'_> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.0.read(buf) + } +} + +impl ZeroCopyWriter for ZCWriter<'_> { + fn write_from(&mut self, f: &File, count: usize, off: u64) -> io::Result { + self.0.write_from_at(f, count, off) + } +} + +impl io::Write for ZCWriter<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.0.write(buf) + } + + fn flush(&mut self) -> io::Result<()> { + self.0.flush() + } +} + +//-------------------------------------------------------------------------------------------------- +// Functions +//-------------------------------------------------------------------------------------------------- + fn reply_ok( out: Option, data: Option<&[u8]>, diff --git a/src/devices/src/virtio/fs/tests/overlayfs/create.rs b/src/devices/src/virtio/fs/tests/overlayfs/create.rs new file mode 100644 index 000000000..82b62398c --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/create.rs @@ -0,0 +1,1488 @@ +use std::{ffi::CString, fs, io}; + +use crate::virtio::{ + bindings, + fs::filesystem::{Context, Extensions, FileSystem}, + fuse::FsOptions, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_mkdir_basic() -> io::Result<()> { + // Create test layers: + // Single layer with a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Create a new directory + let dir_name = CString::new("new_dir").unwrap(); + let ctx = Context::default(); + let entry = fs.mkdir(ctx, 1, &dir_name, 0o755, 0, Extensions::default())?; + + // Verify the directory was created with correct mode + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + assert_eq!(entry.attr.st_mode & 0o777, 0o755); + + // Verify we can look it up + let lookup_entry = fs.lookup(ctx, 1, &dir_name)?; + assert_eq!(lookup_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Verify the directory exists on disk in the top layer + let dir_path = temp_dirs.last().unwrap().path().join("new_dir"); + assert!(dir_path.exists()); + assert!(dir_path.is_dir()); + + Ok(()) +} + +#[test] +fn test_mkdir_nested() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/bottom_file + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + // - dir3/top_file + // - dir1/.wh.subdir (whiteout) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![ + ("dir3", true, 0o755), + ("dir3/top_file", false, 0o644), + ("dir1/.wh.subdir", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create nested directory in dir1 (should trigger copy-up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let nested_name = CString::new("new_nested").unwrap(); + let nested_entry = fs.mkdir( + ctx, + dir1_entry.inode, + &nested_name, + 0o700, + 0, + Extensions::default(), + )?; + assert_eq!(nested_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Test 2: Create directory inside the newly created nested directory + let deep_name = CString::new("deep_dir").unwrap(); + let deep_entry = fs.mkdir( + ctx, + nested_entry.inode, + &deep_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(deep_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Test 3: Create directory in dir2 (middle layer, should trigger copy-up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_nested_name = CString::new("middle_nested").unwrap(); + let middle_nested_entry = fs.mkdir( + ctx, + dir2_entry.inode, + &middle_nested_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!( + middle_nested_entry.attr.st_mode & libc::S_IFMT, + libc::S_IFDIR + ); + + // Test 4: Create directory in dir3 (top layer, no copy-up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + let top_nested_name = CString::new("top_nested").unwrap(); + let top_nested_entry = fs.mkdir( + ctx, + dir3_entry.inode, + &top_nested_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(top_nested_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + helper::debug_print_layers(&temp_dirs, false)?; + + // Verify all directories exist in appropriate layers + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir1/new_nested").exists()); + assert!(top_layer.join("dir1/new_nested/deep_dir").exists()); + assert!(top_layer.join("dir2/middle_nested").exists()); + assert!(top_layer.join("dir3/top_nested").exists()); + + // Verify the original files are still accessible + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + assert_eq!(file1_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_mkdir_with_umask() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/subdir/ (0o755) + // - dir1/subdir/file1 + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ (0o777) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file1", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![("dir3", true, 0o777)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create directory with different umasks in root + let dir_names = vec![ + ("dir_umask_022", 0o777, 0o022, 0o755), // Common umask + ("dir_umask_077", 0o777, 0o077, 0o700), // Strict umask + ("dir_umask_002", 0o777, 0o002, 0o775), // Group writable + ("dir_umask_000", 0o777, 0o000, 0o777), // No umask + ]; + + let test_cases = dir_names.clone(); + for (name, mode, umask, expected) in test_cases { + let dir_name = CString::new(name).unwrap(); + let entry = fs.mkdir(ctx, 1, &dir_name, mode, umask, Extensions::default())?; + assert_eq!( + entry.attr.st_mode & 0o777, + expected, + "Directory {} has wrong permissions", + name + ); + } + + // Test 2: Create nested directories with umask in different layers + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let nested_name = CString::new("nested_umask").unwrap(); + let nested_entry = fs.mkdir( + ctx, + dir1_entry.inode, + &nested_name, + 0o777, + 0o027, + Extensions::default(), + )?; + assert_eq!(nested_entry.attr.st_mode & 0o777, 0o750); + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_name = CString::new("middle_umask").unwrap(); + let middle_entry = fs.mkdir( + ctx, + dir2_entry.inode, + &middle_name, + 0o777, + 0o077, + Extensions::default(), + )?; + assert_eq!(middle_entry.attr.st_mode & 0o777, 0o700); + + Ok(()) +} + +#[test] +fn test_mkdir_existing_name() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/file2 + // Layer 1 (middle): + // - dir2/ + // - dir2/file3 + // - dir1/another_file + // Layer 2 (top): + // - dir3/ + // - dir3/file4 + // - .wh.dir1/subdir (whiteout) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/file3", false, 0o644), + ("dir1/another_file", false, 0o644), + ], + vec![ + ("dir3", true, 0o755), + ("dir3/file4", false, 0o644), + ("dir1/.wh.subdir", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Try to create directory with name of existing file in bottom layer + let file1_name = CString::new("file1").unwrap(); + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + match fs.mkdir( + ctx, + dir1_entry.inode, + &file1_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => { + helper::debug_print_layers(&temp_dirs, false)?; + panic!("Expected mkdir with existing file name to fail"); + } + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + // Test 2: Try to create directory with name of existing file in middle layer + let file3_name = CString::new("file3").unwrap(); + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + match fs.mkdir( + ctx, + dir2_entry.inode, + &file3_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => panic!("Expected mkdir with existing file name to fail"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + // Test 3: Try to create directory with name of existing directory + let dir3_name = CString::new("dir3").unwrap(); + match fs.mkdir(ctx, 1, &dir3_name, 0o755, 0, Extensions::default()) { + Ok(_) => panic!("Expected mkdir with existing directory name to fail"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + // Test 4: Try to create directory with name that exists in lower layer but is whited out + let subdir_name = CString::new("subdir").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // This should succeed because the original subdir is whited out + let new_subdir = fs.mkdir( + ctx, + dir1_entry.inode, + &subdir_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(new_subdir.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + Ok(()) +} + +#[test] +fn test_mkdir_invalid_parent() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // - .wh.dir1 (whiteout entire dir1) + // Layer 2 (top): + // - dir3/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/file2", false, 0o644), + (".wh.dir1", false, 0o644), // Whiteout entire dir1 + ], + vec![("dir3", true, 0o755)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&_temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Try to create directory with non-existent parent inode + let dir_name = CString::new("new_dir").unwrap(); + let invalid_inode = 999999; + match fs.mkdir( + ctx, + invalid_inode, + &dir_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => panic!("Expected mkdir with invalid parent to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + // Test 2: Try to create directory in whited-out directory + let dir1_name = CString::new("dir1").unwrap(); + match fs.lookup(ctx, 1, &dir1_name) { + Ok(_) => panic!("Expected lookup of whited-out directory to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Test 3: Try to create directory with file as parent + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(ctx, dir2_entry.inode, &file2_name)?; + + let nested_name = CString::new("nested").unwrap(); + match fs.mkdir( + ctx, + file2_entry.inode, + &nested_name, + 0o755, + 0, + Extensions::default(), + ) { + Ok(_) => panic!("Expected mkdir with file as parent to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOTDIR)), + } + + Ok(()) +} + +#[test] +fn test_mkdir_invalid_name() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/.hidden_file + // - dir1/subdir/ + // Layer 1 (middle): + // - dir2/ + // - dir2/.wh..wh..opq (opaque directory) + // Layer 2 (top): + // - dir3/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/.hidden_file", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/.wh..wh..opq", false, 0o644), // Opaque directory marker + ], + vec![("dir3", true, 0o755)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test various invalid names + let test_cases = vec![ + ("", io::ErrorKind::InvalidInput, "empty name"), + ( + "..", + io::ErrorKind::PermissionDenied, + "parent dir traversal", + ), + ("foo/bar", io::ErrorKind::PermissionDenied, "contains slash"), + ( + "foo\\bar", + io::ErrorKind::PermissionDenied, + "contains backslash", + ), + ( + "foo\0bar", + io::ErrorKind::InvalidInput, + "contains null byte", + ), + (".wh.foo", io::ErrorKind::InvalidInput, "whiteout prefix"), + (".wh..wh..opq", io::ErrorKind::InvalidInput, "opaque marker"), + ]; + + for (name, expected_kind, desc) in test_cases { + let name = CString::new(name.as_bytes().to_vec()).unwrap_or_default(); + match fs.mkdir(ctx, 1, &name, 0o755, 0, Extensions::default()) { + Ok(_) => panic!("Expected mkdir with {} to fail", desc), + Err(e) => assert_eq!( + e.kind(), + expected_kind, + "Wrong error kind for {}: expected {:?}, got {:?}", + desc, + expected_kind, + e.kind() + ), + } + } + + // Test invalid UTF-8 separately since it can't be represented as a string literal + let invalid_utf8 = vec![0x66, 0x6f, 0x6f, 0x80, 0x62, 0x61, 0x72]; // "foobar" + let name = CString::new(invalid_utf8).unwrap(); + match fs.mkdir(ctx, 1, &name, 0o755, 0, Extensions::default()) { + Ok(_) => panic!("Expected mkdir with invalid UTF-8 to fail"), + Err(e) => assert_eq!( + e.kind(), + io::ErrorKind::InvalidInput, + "Wrong error kind for invalid UTF-8: expected {:?}, got {:?}", + io::ErrorKind::InvalidInput, + e.kind() + ), + } + + // Test with valid but unusual names + let valid_cases = vec![ + "very_long_name_that_is_valid_but_unusual_and_tests_length_limits", + " leading_space", + "trailing_space ", + "!@#$%^&*()_+-=", + ]; + + for name in valid_cases { + let name = CString::new(name).unwrap(); + // These should succeed + let entry = fs.mkdir(ctx, 1, &name, 0o755, 0, Extensions::default())?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + } + + Ok(()) +} + +#[test] +fn test_mkdir_multiple_layers() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/bottom_file + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + // - dir3/top_file + // - .wh.dir1 (whiteout) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![ + ("dir3", true, 0o755), + ("dir3/top_file", false, 0o644), + (".wh.dir1", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create directory in each layer and verify copy-up behavior + let dir_names = vec![("dir2", "new_dir2"), ("dir3", "new_dir3")]; + + for (parent, new_dir) in dir_names { + let parent_name = CString::new(parent).unwrap(); + let parent_entry = fs.lookup(ctx, 1, &parent_name)?; + + let new_name = CString::new(new_dir).unwrap(); + let entry = fs.mkdir( + ctx, + parent_entry.inode, + &new_name, + 0o755, + 0, + Extensions::default(), + )?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Create a nested directory inside + let nested_name = CString::new(format!("nested_in_{}", new_dir)).unwrap(); + let nested_entry = fs.mkdir( + ctx, + entry.inode, + &nested_name, + 0o700, + 0, + Extensions::default(), + )?; + assert_eq!(nested_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + } + + // Test 2: Verify all directories exist in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir2/new_dir2").exists()); + assert!(top_layer.join("dir2/new_dir2/nested_in_new_dir2").exists()); + assert!(top_layer.join("dir3/new_dir3").exists()); + assert!(top_layer.join("dir3/new_dir3/nested_in_new_dir3").exists()); + + // Test 3: Try to create directory in whited-out dir1 (should fail) + let dir1_name = CString::new("dir1").unwrap(); + match fs.lookup(ctx, 1, &dir1_name) { + Ok(_) => panic!("Expected lookup of whited-out directory to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + Ok(()) +} + +#[test] +fn test_symlink_basic() -> io::Result<()> { + // Create test layers: + // Single layer with a file + let layers = vec![vec![("target_file", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Create a new symlink + let link_name = CString::new("link").unwrap(); + let target_name = CString::new("target_file").unwrap(); + let ctx = Context::default(); + let entry = fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default())?; + + // Verify the symlink was created with correct mode + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + assert_eq!(entry.attr.st_mode & 0o777, 0o777); // Symlinks are typically 0777 + + // Verify we can look it up + let lookup_entry = fs.lookup(ctx, 1, &link_name)?; + assert_eq!(lookup_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Verify the symlink exists on disk in the top layer + let link_path = temp_dirs.last().unwrap().path().join("link"); + assert!(link_path.exists()); + assert!(link_path.is_symlink()); + + // Verify the symlink points to the correct target + let target = fs.readlink(ctx, lookup_entry.inode)?; + assert_eq!(target, target_name.to_bytes()); + + Ok(()) +} + +#[test] +fn test_symlink_nested() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/bottom_file + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + // - dir3/top_file + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![("dir3", true, 0o755), ("dir3/top_file", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create symlink in dir1 (should trigger copy-up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let link_name = CString::new("link_to_file1").unwrap(); + let target_name = CString::new("file1").unwrap(); + let link_entry = fs.symlink( + ctx, + &target_name, + dir1_entry.inode, + &link_name, + Extensions::default(), + )?; + assert_eq!(link_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Test 2: Create symlink in dir2 (middle layer, should trigger copy-up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_link_name = CString::new("link_to_file2").unwrap(); + let middle_target = CString::new("file2").unwrap(); + let middle_link_entry = fs.symlink( + ctx, + &middle_target, + dir2_entry.inode, + &middle_link_name, + Extensions::default(), + )?; + assert_eq!(middle_link_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Test 3: Create symlink in dir3 (top layer, no copy-up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + let top_link_name = CString::new("link_to_top_file").unwrap(); + let top_target = CString::new("top_file").unwrap(); + let top_link_entry = fs.symlink( + ctx, + &top_target, + dir3_entry.inode, + &top_link_name, + Extensions::default(), + )?; + assert_eq!(top_link_entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Verify all symlinks exist in appropriate layers + let top_layer = temp_dirs.last().unwrap().path(); + assert!(fs::symlink_metadata(top_layer.join("dir1/link_to_file1")).is_ok()); + assert!(fs::symlink_metadata(top_layer.join("dir2/link_to_file2")).is_ok()); + assert!(fs::symlink_metadata(top_layer.join("dir3/link_to_top_file")).is_ok()); + + // Verify symlink targets + let link1_target = fs.readlink(ctx, link_entry.inode)?; + assert_eq!(link1_target, target_name.to_bytes()); + + let link2_target = fs.readlink(ctx, middle_link_entry.inode)?; + assert_eq!(link2_target, middle_target.to_bytes()); + + let link3_target = fs.readlink(ctx, top_link_entry.inode)?; + assert_eq!(link3_target, top_target.to_bytes()); + + Ok(()) +} + +#[test] +fn test_symlink_existing_name() -> io::Result<()> { + // Create test layers with a file and directory + let layers = vec![vec![ + ("target_file", false, 0o644), + ("existing_name", false, 0o644), + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + let link_name = CString::new("existing_name").unwrap(); + let target_name = CString::new("target_file").unwrap(); + + // Try to create a symlink with an existing name + match fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default()) { + Ok(_) => panic!("Expected error when creating symlink with existing name"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + Ok(()) +} + +#[test] +fn test_symlink_multiple_layers() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): base files + // Layer 1 (middle): some files + // Layer 2 (top): more files + let layers = vec![ + vec![ + ("bottom_dir", true, 0o755), + ("bottom_dir/target1", false, 0o644), + ], + vec![ + ("middle_dir", true, 0o755), + ("middle_dir/target2", false, 0o644), + ], + vec![("top_dir", true, 0o755), ("top_dir/target3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Create symlinks to files in different layers + let test_cases = vec![ + ("link_to_bottom", "bottom_dir/target1"), + ("link_to_middle", "middle_dir/target2"), + ("link_to_top", "top_dir/target3"), + ]; + + for (link, target) in test_cases.clone() { + let link_name = CString::new(link).unwrap(); + let target_name = CString::new(target).unwrap(); + + let entry = fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default())?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFLNK); + + // Verify symlink target + let target_bytes = fs.readlink(ctx, entry.inode)?; + assert_eq!(target_bytes, target_name.to_bytes()); + } + + // Verify all symlinks exist in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + for (link, _) in test_cases { + assert!(fs::symlink_metadata(top_layer.join(link)).is_ok()); + } + + Ok(()) +} + +#[test] +fn test_symlink_invalid_name() -> io::Result<()> { + // Create a simple test layer + let layers = vec![vec![("target_file", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + let target_name = CString::new("target_file").unwrap(); + + // Test cases with invalid names + let invalid_names = vec![ + "..", // Path traversal attempt + "invalid/name", // Contains slash + ".wh.name", // Contains whiteout prefix + ".wh..wh..opq", // Opaque directory marker + ]; + + for name in invalid_names { + let link_name = CString::new(name).unwrap(); + match fs.symlink(ctx, &target_name, 1, &link_name, Extensions::default()) { + Ok(_) => panic!("Expected error for invalid name: {}", name), + Err(e) => { + assert!( + e.kind() == io::ErrorKind::InvalidInput + || e.kind() == io::ErrorKind::PermissionDenied, + "Unexpected error kind for name {}: {:?}", + name, + e.kind() + ); + } + } + } + + Ok(()) +} + +#[test] +fn test_rename_basic() -> io::Result<()> { + // Create test layers + let files = vec![("file1.txt", false, 0o644), ("file2.txt", false, 0o644)]; + let layers = vec![files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Lookup source and destination parents (root in this case) + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Perform rename + overlayfs.rename(Context::default(), root, &old_name, root, &new_name, 0)?; + + // Verify old name doesn't exist + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_whiteout() -> io::Result<()> { + // Create test layers with file in lower layer + let lower_files = vec![("file1.txt", false, 0o644)]; + let upper_files = vec![]; + let layers = vec![lower_files, upper_files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename file from lower layer + overlayfs.rename(Context::default(), root, &old_name, root, &new_name, 0)?; + + // Verify old name is whited out + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists in upper layer + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_multiple_layers() -> io::Result<()> { + // Create test layers + let lower_files = vec![("file1.txt", false, 0o644), ("file2.txt", false, 0o644)]; + let middle_files = vec![("file3.txt", false, 0o644)]; + let upper_files = vec![("file4.txt", false, 0o644)]; + let layers = vec![lower_files, middle_files, upper_files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename file from lowest layer + overlayfs.rename(Context::default(), root, &old_name, root, &new_name, 0)?; + + // Verify old name is whited out + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists in upper layer + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_errors() -> io::Result<()> { + // Create test layers + let files = vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("file2.txt", false, 0o644), + ]; + let layers = vec![files]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let dir1_name = CString::new("dir1")?; + let _ = overlayfs.lookup(Context::default(), root, &dir1_name)?; + + // Test renaming non-existent file + let nonexistent = CString::new("nonexistent.txt")?; + let new_name = CString::new("renamed.txt")?; + assert!(overlayfs + .rename(Context::default(), root, &nonexistent, root, &new_name, 0,) + .is_err()); + + // Test renaming to invalid parent + let file2_name = CString::new("file2.txt")?; + let invalid_parent = 99999; + assert!(overlayfs + .rename( + Context::default(), + root, + &file2_name, + invalid_parent, + &new_name, + 0, + ) + .is_err()); + + // Test renaming directory to non-empty directory + let _ = CString::new("dir1_new")?; + assert!(overlayfs + .rename(Context::default(), root, &dir1_name, root, &file2_name, 0,) + .is_err()); + + Ok(()) +} + +#[test] +fn test_rename_whiteout_flag() -> io::Result<()> { + // Create test layers with file in lower layer + let lower_files = vec![("file1.txt", false, 0o644)]; + let upper_files = vec![]; + let layers = vec![lower_files, upper_files]; + let (overlayfs, temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Use the whiteout flag + let flags = bindings::LINUX_RENAME_WHITEOUT; + overlayfs.rename( + Context::default(), + root, + &old_name, + root, + &new_name, + flags as u32, + )?; + + // Verify that lookup for the old name fails + assert!(overlayfs + .lookup(Context::default(), root, &old_name) + .is_err()); + + // Verify new name exists + let entry = overlayfs.lookup(Context::default(), root, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Check that a whiteout file is created in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + // For root parent, the whiteout should be at the top layer root with prefix '.wh.' + let whiteout_path = top_layer.join(".wh.file1.txt"); + let meta = fs::metadata(&whiteout_path)?; + // Updated check: expect a regular file with mode 0o600 + assert!( + meta.file_type().is_file(), + "Expected whiteout to be a regular file" + ); + + Ok(()) +} + +#[test] +fn test_rename_nested_files() -> io::Result<()> { + // Create test layers with nested structure + let files = vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir2", true, 0o755), + ]; + let (overlayfs, _temp_dirs) = helper::create_overlayfs(vec![files])?; + + let root = 1; + let dir1_name = CString::new("dir1")?; + let dir2_name = CString::new("dir2")?; + + // Lookup directory inodes + let dir1_entry = overlayfs.lookup(Context::default(), root, &dir1_name)?; + let dir2_entry = overlayfs.lookup(Context::default(), root, &dir2_name)?; + + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename file between directories + overlayfs.rename( + Context::default(), + dir1_entry.inode, + &old_name, + dir2_entry.inode, + &new_name, + 0, + )?; + + // Verify old location is empty + assert!(overlayfs + .lookup(Context::default(), dir1_entry.inode, &old_name) + .is_err()); + + // Verify new location has the file + let entry = overlayfs.lookup(Context::default(), dir2_entry.inode, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_rename_complex_layers() -> io::Result<()> { + // Create test layers with complex structure + let lower_files = vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2.txt", false, 0o644), + ]; + let middle_files = vec![("dir3", true, 0o755), ("dir3/file3.txt", false, 0o644)]; + let upper_files = vec![("dir4", true, 0o755), ("dir4/file4.txt", false, 0o644)]; + let layers = vec![lower_files, middle_files, upper_files]; + let (overlayfs, temp_dirs) = helper::create_overlayfs(layers)?; + + let root = 1; + + // Test renaming between different layer directories + let dir1_name = CString::new("dir1")?; + let dir4_name = CString::new("dir4")?; + let dir1_entry = overlayfs.lookup(Context::default(), root, &dir1_name)?; + let dir4_entry = overlayfs.lookup(Context::default(), root, &dir4_name)?; + + let old_name = CString::new("file1.txt")?; + let new_name = CString::new("renamed.txt")?; + + // Rename from lower to upper layer directory + overlayfs.rename( + Context::default(), + dir1_entry.inode, + &old_name, + dir4_entry.inode, + &new_name, + 0, + )?; + + // Verify file moved correctly + assert!(overlayfs + .lookup(Context::default(), dir1_entry.inode, &old_name) + .is_err()); + let entry = overlayfs.lookup(Context::default(), dir4_entry.inode, &new_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Check whiteout file in the old parent's directory (dir1) in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + let whiteout_path = top_layer.join("dir1").join(".wh.file1.txt"); + assert!( + fs::metadata(&whiteout_path).is_ok(), + "Expected whiteout file at {:?}", + whiteout_path + ); + + Ok(()) +} + +#[test] +fn test_create_basic() -> io::Result<()> { + // Create test layers: + // Single layer with a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Create a new file in root + let file_name = CString::new("new_file.txt").unwrap(); + let ctx = Context::default(); + let (entry, handle, _) = + fs.create(ctx, 1, &file_name, 0o644, 0, 0o022, Extensions::default())?; + + // Verify the file was created with correct mode + let entry_mode = entry.attr.st_mode as u32; + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, 0o644 & !0o022); + + // Verify we can look it up + let lookup_entry = fs.lookup(ctx, 1, &file_name)?; + let lookup_mode = lookup_entry.attr.st_mode as u32; + assert_eq!(lookup_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Verify the file exists on disk in the top layer + let file_path = temp_dirs.last().unwrap().path().join("new_file.txt"); + assert!(file_path.exists()); + assert!(file_path.is_file()); + + // If we got a handle, release it + if let Some(h) = handle { + fs.release(ctx, entry.inode, 0, h, false, false, None)?; + } + + Ok(()) +} + +#[test] +fn test_create_nested() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 + // Layer 2 (top): + // - dir3/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o644)], + vec![("dir3", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test 1: Create file in dir1 (should trigger copy-up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let file_name = CString::new("new_file.txt").unwrap(); + let (entry, handle, _) = fs.create( + ctx, + dir1_entry.inode, + &file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + )?; + let entry_mode = entry.attr.st_mode as u32; + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Test 2: Create file in dir2 (middle layer, should trigger copy-up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + let middle_file_name = CString::new("middle_file.txt").unwrap(); + let (middle_entry, middle_handle, _) = fs.create( + ctx, + dir2_entry.inode, + &middle_file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + )?; + let middle_mode = middle_entry.attr.st_mode as u32; + assert_eq!(middle_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Test 3: Create file in dir3 (top layer, no copy-up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + let top_file_name = CString::new("top_file.txt").unwrap(); + let (top_entry, top_handle, _) = fs.create( + ctx, + dir3_entry.inode, + &top_file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + )?; + let top_mode = top_entry.attr.st_mode as u32; + assert_eq!(top_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + + // Verify all files exist in appropriate layers + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir1/new_file.txt").exists()); + assert!(top_layer.join("dir2/middle_file.txt").exists()); + assert!(top_layer.join("dir3/top_file.txt").exists()); + + // Release handles + if let Some(h) = handle { + fs.release(ctx, entry.inode, 0, h, false, false, None)?; + } + if let Some(h) = middle_handle { + fs.release(ctx, middle_entry.inode, 0, h, false, false, None)?; + } + if let Some(h) = top_handle { + fs.release(ctx, top_entry.inode, 0, h, false, false, None)?; + } + + Ok(()) +} + +#[test] +fn test_create_with_flags() -> io::Result<()> { + // Create test layers with a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test different flag combinations + let test_cases = vec![ + ("file_rdonly.txt", libc::O_RDONLY, 0o644), + ("file_wronly.txt", libc::O_WRONLY, 0o644), + ("file_rdwr.txt", libc::O_RDWR, 0o644), + ("file_append.txt", libc::O_WRONLY | libc::O_APPEND, 0o644), + ("file_trunc.txt", libc::O_WRONLY | libc::O_TRUNC, 0o644), + ("file_excl.txt", libc::O_WRONLY | libc::O_EXCL, 0o644), + ]; + + for (name, flags, mode) in test_cases { + let file_name = CString::new(name).unwrap(); + let (entry, handle, _) = fs.create( + ctx, + 1, + &file_name, + mode, + flags as u32, + 0o022, + Extensions::default(), + )?; + + // Verify file creation + let entry_mode = entry.attr.st_mode as u32; + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, mode & !0o022); + + // Verify file exists + let file_path = temp_dirs.last().unwrap().path().join(name); + assert!(file_path.exists()); + assert!(file_path.is_file()); + + // Release handle if we got one + if let Some(h) = handle { + fs.release(ctx, entry.inode, 0, h, false, false, None)?; + } + } + + Ok(()) +} + +#[test] +fn test_create_existing_name() -> io::Result<()> { + // Create test layers with existing files + let layers = vec![vec![ + ("dir1", true, 0o755), + ("existing_file.txt", false, 0o644), + ]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + let file_name = CString::new("existing_file.txt").unwrap(); + + // Try to create a file with existing name without O_EXCL + match fs.create( + ctx, + 1, + &file_name, + 0o644, + libc::O_WRONLY as u32, + 0o022, + Extensions::default(), + ) { + Ok(_) => panic!("Expected create with existing name to fail"), + Err(e) => assert_eq!(e.kind(), io::ErrorKind::AlreadyExists), + } + + Ok(()) +} + +#[test] +fn test_create_invalid_parent() -> io::Result<()> { + // Create test layers + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test with invalid parent inode + let file_name = CString::new("test.txt").unwrap(); + let invalid_inode = 999999; + match fs.create( + ctx, + invalid_inode, + &file_name, + 0o644, + 0, + 0o022, + Extensions::default(), + ) { + Ok(_) => panic!("Expected create with invalid parent to fail"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + Ok(()) +} + +#[test] +fn test_mknod_basic() -> io::Result<()> { + // Create test layers with a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Test creating different types of nodes + let test_cases: Vec<(&str, u32)> = vec![ + ("fifo1", libc::S_IFIFO as u32 | 0o644), + ("sock1", libc::S_IFSOCK as u32 | 0o644), + ]; + + for (name, mode) in test_cases { + let node_name = CString::new(name).unwrap(); + let entry = fs.mknod(ctx, 1, &node_name, mode, 0, 0o022, Extensions::default())?; + + // Verify node creation + let entry_mode = entry.attr.st_mode as u32; + #[cfg(target_os = "linux")] + assert_eq!(entry_mode & libc::S_IFMT as u32, mode & libc::S_IFMT as u32); + #[cfg(target_os = "macos")] + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, (0o644 & !0o022) as u32); + + // Verify node exists with correct type + let node_path = temp_dirs.last().unwrap().path().join(name); + assert!(node_path.exists()); + } + + Ok(()) +} + +#[test] +fn test_mknod_nested() -> io::Result<()> { + // Create test layers with complex structure + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ], + vec![("dir2", true, 0o755)], + vec![("dir3", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + let ctx = Context::default(); + + // Create nodes in different directories + let test_cases = vec![ + ("dir1", "fifo1", libc::S_IFIFO as u32 | 0o644), + ("dir2", "sock1", libc::S_IFSOCK as u32 | 0o644), + ("dir3", "fifo2", libc::S_IFIFO as u32 | 0o644), + ]; + + for (dir, name, mode) in test_cases { + let dir_name = CString::new(dir).unwrap(); + let dir_entry = fs.lookup(ctx, 1, &dir_name)?; + let node_name = CString::new(name).unwrap(); + + let entry = fs.mknod( + ctx, + dir_entry.inode, + &node_name, + mode, + 0, + 0o022, + Extensions::default(), + )?; + + // Verify node creation + let entry_mode = entry.attr.st_mode as u32; + #[cfg(target_os = "linux")] + assert_eq!(entry_mode & libc::S_IFMT as u32, mode & libc::S_IFMT as u32); + #[cfg(target_os = "macos")] + assert_eq!(entry_mode & libc::S_IFMT as u32, libc::S_IFREG as u32); + assert_eq!(entry_mode & 0o777, (0o644 & !0o022) as u32); + + // Verify node exists in the top layer + let node_path = temp_dirs.last().unwrap().path().join(dir).join(name); + assert!(node_path.exists()); + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/lookup.rs b/src/devices/src/virtio/fs/tests/overlayfs/lookup.rs new file mode 100644 index 000000000..bc9afdb43 --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/lookup.rs @@ -0,0 +1,458 @@ +use std::{ffi::CString, io}; + +use crate::virtio::{ + fs::filesystem::{Context, FileSystem}, + fuse::FsOptions, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_lookup_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: file1, dir1/file2 + // Upper layer: file3 + let layers = vec![ + vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("dir1/file2", false, 0o644), + ], + vec![("file3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test lookup in top layer + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test lookup in lower layer + let file1_name = CString::new("file1").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file1_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test lookup of directory + let dir1_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(Context::default(), 1, &dir1_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + Ok(()) +} + +#[test] +fn test_lookup_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: file1, file2 + // Upper layer: .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644), ("file2", false, 0o644)], + vec![(".wh.file1", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test lookup of whited-out file + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(Context::default(), 1, &file1_name).is_err()); + + // Test lookup of non-whited-out file + let file2_name = CString::new("file2").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file2_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_opaque_dir() -> io::Result<()> { + // Create test layers: + // Lower layer: dir1/file1, dir1/file2 + // Upper layer: dir1/.wh..wh..opq, dir1/file3 + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), + ("dir1/file3", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Lookup dir1 first + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + + // Test lookup of file in opaque directory + // file1 and file2 should not be visible + let file1_name = CString::new("file1").unwrap(); + assert!(fs + .lookup(Context::default(), dir1_entry.inode, &file1_name) + .is_err()); + + let file2_name = CString::new("file2").unwrap(); + assert!(fs + .lookup(Context::default(), dir1_entry.inode, &file2_name) + .is_err()); + + // file3 should be visible + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), dir1_entry.inode, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_multiple_layers() -> io::Result<()> { + // Create test layers: + // Lower layer 1: file1 + // Lower layer 2: file2 + // Upper layer: file3 + let layers = vec![ + vec![("file1", false, 0o644)], + vec![("file2", false, 0o644)], + vec![("file3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test lookup in each layer + let file1_name = CString::new("file1").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file1_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + let file2_name = CString::new("file2").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file2_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), 1, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_nested_whiteouts() -> io::Result<()> { + // Create test layers: + // Lower layer: dir1/file1, dir2/file2 + // Middle layer: dir1/.wh.file1, .wh.dir2 + // Upper layer: dir1/file3 + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh.file1", false, 0o644), + (".wh.dir2", false, 0o644), + ], + vec![("dir1", true, 0o755), ("dir1/file3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Lookup dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + + // file1 should be whited out + let file1_name = CString::new("file1").unwrap(); + assert!(fs + .lookup(Context::default(), dir1_entry.inode, &file1_name) + .is_err()); + + // file3 should be visible + let file3_name = CString::new("file3").unwrap(); + let entry = fs.lookup(Context::default(), dir1_entry.inode, &file3_name)?; + assert_eq!(entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // dir2 should be whited out + let dir2_name = CString::new("dir2").unwrap(); + assert!(fs.lookup(Context::default(), 1, &dir2_name).is_err()); + + Ok(()) +} + +#[test] +fn test_lookup_complex_layers() -> io::Result<()> { + // Create test layers with complex directory structure: + // Layer 0 (bottom): bar, bar/hi, bar/hi/txt + // Layer 1: foo, foo/hello, bar + // Layer 2: bar, bar/hi, bar/hi/xml + // Layer 3 (top): bar, bar/hello, bar/hi, bar/hi/json + let layers = vec![ + vec![ + ("bar", true, 0o755), + ("bar/hi", true, 0o755), + ("bar/hi/txt", false, 0o644), + ], + vec![ + ("foo", true, 0o755), + ("foo/hello", false, 0o644), + ("bar", true, 0o755), + ], + vec![ + ("bar", true, 0o755), + ("bar/hi", true, 0o755), + ("bar/hi/xml", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/hello", false, 0o644), + ("bar/hi", true, 0o755), + ("bar/hi/json", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // First lookup 'bar' directory + let bar_name = CString::new("bar").unwrap(); + let bar_entry = fs.lookup(Context::default(), 1, &bar_name)?; + assert_eq!(bar_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Then lookup 'hi' in bar directory + let hi_name = CString::new("hi").unwrap(); + let hi_entry = fs.lookup(Context::default(), bar_entry.inode, &hi_name)?; + assert_eq!(hi_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Finally lookup 'txt' in bar/hi directory - should find it in layer 0 + let txt_name = CString::new("txt").unwrap(); + let txt_entry = fs.lookup(Context::default(), hi_entry.inode, &txt_name)?; + assert_eq!(txt_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Verify we can also find files from other layers + // Lookup 'json' in bar/hi - should find it in layer 3 (top) + let json_name = CString::new("json").unwrap(); + let json_entry = fs.lookup(Context::default(), hi_entry.inode, &json_name)?; + assert_eq!(json_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'xml' in bar/hi - should find it in layer 2 + let xml_name = CString::new("xml").unwrap(); + let xml_entry = fs.lookup(Context::default(), hi_entry.inode, &xml_name)?; + assert_eq!(xml_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'hello' in bar - should find it in layer 3 + let hello_name = CString::new("hello").unwrap(); + let hello_entry = fs.lookup(Context::default(), bar_entry.inode, &hello_name)?; + assert_eq!(hello_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'foo' in root - should find it in layer 1 + let foo_name = CString::new("foo").unwrap(); + let foo_entry = fs.lookup(Context::default(), 1, &foo_name)?; + assert_eq!(foo_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'hello' in foo - should find it in layer 1 + let foo_hello_name = CString::new("hello").unwrap(); + let foo_hello_entry = fs.lookup(Context::default(), foo_entry.inode, &foo_hello_name)?; + assert_eq!(foo_hello_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_lookup_complex_opaque_dirs() -> io::Result<()> { + // Create test layers with complex directory structure and opaque directories: + // Layer 0 (bottom): + // - bar/ + // - bar/file1 + // - bar/subdir/ + // - bar/subdir/bottom_file + // - other/ + // - other/file + // Layer 1: + // - bar/ (with opaque marker) + // - bar/file2 + // - extra/ + // - extra/data + // Layer 2 (top): + // - bar/ + // - bar/file3 + // - bar/subdir/ + // - bar/subdir/top_file + // - other/ + // - other/new_file + + let layers = vec![ + vec![ + ("bar", true, 0o755), + ("bar/file1", false, 0o644), + ("bar/subdir", true, 0o755), + ("bar/subdir/bottom_file", false, 0o644), + ("other", true, 0o755), + ("other/file", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/.wh..wh..opq", false, 0o644), + ("bar/file2", false, 0o644), + ("extra", true, 0o755), + ("extra/data", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/file3", false, 0o644), + ("bar/subdir", true, 0o755), + ("bar/subdir/top_file", false, 0o644), + ("other", true, 0o755), + ("other/new_file", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // First lookup 'bar' directory + let bar_name = CString::new("bar").unwrap(); + let bar_entry = fs.lookup(Context::default(), 1, &bar_name)?; + assert_eq!(bar_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'file1' in bar - should NOT be found due to opaque marker in layer 1 + let file1_name = CString::new("file1").unwrap(); + let file1_result = fs.lookup(Context::default(), bar_entry.inode, &file1_name); + assert!( + file1_result.is_err(), + "file1 should be hidden by opaque directory" + ); + + // Lookup 'file2' in bar - should be found in layer 1 + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), bar_entry.inode, &file2_name)?; + assert_eq!(file2_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'file3' in bar - should be found in layer 2 + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(Context::default(), bar_entry.inode, &file3_name)?; + assert_eq!(file3_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'subdir' in bar - should be found in layer 2, not layer 0 + // because of the opaque marker in layer 1 + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(Context::default(), bar_entry.inode, &subdir_name)?; + assert_eq!(subdir_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'bottom_file' in bar/subdir - should NOT be found due to opaque marker + let bottom_file_name = CString::new("bottom_file").unwrap(); + let bottom_file_result = fs.lookup(Context::default(), subdir_entry.inode, &bottom_file_name); + assert!( + bottom_file_result.is_err(), + "bottom_file should be hidden by opaque directory" + ); + + // Lookup 'top_file' in bar/subdir - should be found in layer 2 + let top_file_name = CString::new("top_file").unwrap(); + let top_file_entry = fs.lookup(Context::default(), subdir_entry.inode, &top_file_name)?; + assert_eq!(top_file_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'other' in root - should be found + let other_name = CString::new("other").unwrap(); + let other_entry = fs.lookup(Context::default(), 1, &other_name)?; + assert_eq!(other_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Lookup 'file' in other - should be found in layer 0 + // (other directory is not affected by the opaque marker in bar) + let other_file_name = CString::new("file").unwrap(); + let other_file_entry = fs.lookup(Context::default(), other_entry.inode, &other_file_name)?; + assert_eq!(other_file_entry.attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Lookup 'extra' in root - should be found in layer 1 + let extra_name = CString::new("extra").unwrap(); + let extra_entry = fs.lookup(Context::default(), 1, &extra_name)?; + assert_eq!(extra_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + Ok(()) +} + +#[test] +fn test_lookup_opaque_with_empty_subdir() -> io::Result<()> { + // Create test layers: + // Lower layer: + // - bar/ + // - bar/hello/ + // - bar/hello/txt + // Upper layer: + // - bar/ + // - bar/.wh..wh..opq + // - bar/hello/ (empty directory) + let layers = vec![ + vec![ + ("bar", true, 0o755), + ("bar/hello", true, 0o755), + ("bar/hello/txt", false, 0o644), + ], + vec![ + ("bar", true, 0o755), + ("bar/.wh..wh..opq", false, 0o644), + ("bar/hello", true, 0o755), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // First lookup 'bar' directory + let bar_name = CString::new("bar").unwrap(); + let bar_entry = fs.lookup(Context::default(), 1, &bar_name)?; + assert_eq!(bar_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Then lookup 'hello' in bar directory + let hello_name = CString::new("hello").unwrap(); + let hello_entry = fs.lookup(Context::default(), bar_entry.inode, &hello_name)?; + assert_eq!(hello_entry.attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Finally lookup 'txt' in bar/hello directory + // This should fail because the opaque marker in bar/ hides everything from lower layers + let txt_name = CString::new("txt").unwrap(); + let txt_result = fs.lookup(Context::default(), hello_entry.inode, &txt_name); + assert!( + txt_result.is_err(), + "txt should be hidden by opaque directory marker in bar/" + ); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/metadata.rs b/src/devices/src/virtio/fs/tests/overlayfs/metadata.rs new file mode 100644 index 000000000..bf9431f15 --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/metadata.rs @@ -0,0 +1,921 @@ +use std::{collections::HashSet, ffi::CString, fs, io}; + +use crate::virtio::{ + bindings::{self, LINUX_ENODATA, LINUX_ENOSYS}, + fs::filesystem::{Context, FileSystem, GetxattrReply, ListxattrReply}, + fuse::{FsOptions, SetattrValid}, + linux_errno::LINUX_ERANGE, overlayfs::{Config, OverlayFs}, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_getattr_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 (mode 0644), dir1 (mode 0755), shadowed (mode 0644) + // Upper layer: file2 (mode 0600), shadowed (mode 0600) - shadows lower layer's shadowed + let layers = vec![ + vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("shadowed", false, 0o644), + ], + vec![ + ("file2", false, 0o600), + ("shadowed", false, 0o600), // This shadows the lower layer's shadowed file + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test getattr on file in lower layer + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + let (file1_attr, _) = fs.getattr(Context::default(), file1_entry.inode, None)?; + assert_eq!(file1_attr.st_mode & 0o777, 0o644); + assert_eq!(file1_attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test getattr on directory + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let (dir1_attr, _) = fs.getattr(Context::default(), dir1_entry.inode, None)?; + assert_eq!(dir1_attr.st_mode & 0o777, 0o755); + assert_eq!(dir1_attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + + // Test getattr on file in upper layer + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), 1, &file2_name)?; + let (file2_attr, _) = fs.getattr(Context::default(), file2_entry.inode, None)?; + assert_eq!(file2_attr.st_mode & 0o777, 0o600); + assert_eq!(file2_attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + // Test getattr on shadowed file - should get attributes from upper layer + let shadowed_name = CString::new("shadowed").unwrap(); + let shadowed_entry = fs.lookup(Context::default(), 1, &shadowed_name)?; + let (shadowed_attr, _) = fs.getattr(Context::default(), shadowed_entry.inode, None)?; + assert_eq!( + shadowed_attr.st_mode & 0o777, + 0o600, + "Should get mode from upper layer's shadowed file" + ); + assert_eq!(shadowed_attr.st_mode & libc::S_IFMT, libc::S_IFREG); + + Ok(()) +} + +#[test] +fn test_getattr_invalid_inode() -> io::Result<()> { + // Create a simple test layer + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test getattr with invalid inode + let invalid_inode = 999999; + let result = fs.getattr(Context::default(), invalid_inode, None); + assert!(result.is_err()); + assert_eq!(result.unwrap_err().raw_os_error(), Some(libc::EBADF)); + + Ok(()) +} + +#[test] +fn test_getattr_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 + // Upper layer: .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Try to lookup and getattr whited-out file + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(Context::default(), 1, &file1_name).is_err()); + + Ok(()) +} + +#[test] +fn test_getattr_timestamps() -> io::Result<()> { + // Create test layers with a single file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Get the file's attributes + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + let (file1_attr, timeout) = fs.getattr(Context::default(), file1_entry.inode, None)?; + + // Verify that timestamps are present + assert!(file1_attr.st_atime > 0); + assert!(file1_attr.st_mtime > 0); + assert!(file1_attr.st_ctime > 0); + + // Verify that the timeout matches the configuration + assert_eq!(timeout, fs.get_config().attr_timeout); + + Ok(()) +} + +#[test] +fn test_getattr_complex() -> io::Result<()> { + // Create test layers with complex directory structure and various shadowing/opaque scenarios: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (mode 0644) + // - dir1/subdir/ + // - dir1/subdir/bottom_file (mode 0644) + // - dir2/ + // - dir2/file2 (mode 0644) + // Layer 1 (middle): + // - dir1/ (with opaque marker) + // - dir1/file1 (mode 0600) - shadows bottom but visible due to opaque + // - dir1/middle_file (mode 0600) + // - dir2/file2 (mode 0600) - shadows bottom + // Layer 2 (top): + // - dir1/ + // - dir1/top_file (mode 0666) + // - dir2/ (with opaque marker) + // - dir2/new_file (mode 0666) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), // Makes dir1 opaque + ("dir1/file1", false, 0o600), // Shadows but visible due to opaque + ("dir1/middle_file", false, 0o600), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o600), // Shadows bottom layer + ], + vec![ + ("dir1", true, 0o755), + ("dir1/top_file", false, 0o666), + ("dir2", true, 0o755), + ("dir2/.wh..wh..opq", false, 0o644), // Makes dir2 opaque + ("dir2/new_file", false, 0o666), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test 1: Files in dir1 (with opaque marker in middle layer) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + + // 1a. file1 should have mode 0600 from middle layer (due to opaque marker), not 0644 from bottom + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), dir1_entry.inode, &file1_name)?; + let (file1_attr, _) = fs.getattr(Context::default(), file1_entry.inode, None)?; + assert_eq!( + file1_attr.st_mode & 0o777, + 0o600, + "file1 should have mode from middle layer due to opaque marker" + ); + + // 1b. bottom_file should not be visible due to opaque marker in middle layer + let bottom_file_name = CString::new("bottom_file").unwrap(); + assert!( + fs.lookup(Context::default(), dir1_entry.inode, &bottom_file_name) + .is_err(), + "bottom_file should be hidden by opaque marker" + ); + + // 1c. middle_file should be visible with mode 0600 + let middle_file_name = CString::new("middle_file").unwrap(); + let middle_file_entry = fs.lookup(Context::default(), dir1_entry.inode, &middle_file_name)?; + let (middle_file_attr, _) = fs.getattr(Context::default(), middle_file_entry.inode, None)?; + assert_eq!(middle_file_attr.st_mode & 0o777, 0o600); + + // 1d. top_file should be visible with mode 0666 + let top_file_name = CString::new("top_file").unwrap(); + let top_file_entry = fs.lookup(Context::default(), dir1_entry.inode, &top_file_name)?; + let (top_file_attr, _) = fs.getattr(Context::default(), top_file_entry.inode, None)?; + assert_eq!(top_file_attr.st_mode & 0o777, 0o666); + + // Test 2: Files in dir2 (with opaque marker in top layer) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + + // 2a. file2 from bottom and middle layers should not be visible due to opaque marker in top + let file2_name = CString::new("file2").unwrap(); + assert!( + fs.lookup(Context::default(), dir2_entry.inode, &file2_name) + .is_err(), + "file2 should be hidden by opaque marker in top layer" + ); + + // 2b. new_file should be visible with mode 0666 + let new_file_name = CString::new("new_file").unwrap(); + let new_file_entry = fs.lookup(Context::default(), dir2_entry.inode, &new_file_name)?; + let (new_file_attr, _) = fs.getattr(Context::default(), new_file_entry.inode, None)?; + assert_eq!(new_file_attr.st_mode & 0o777, 0o666); + + // Test 3: Directory attributes + // 3a. dir1 should exist and be a directory + let (dir1_attr, _) = fs.getattr(Context::default(), dir1_entry.inode, None)?; + assert_eq!(dir1_attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + assert_eq!(dir1_attr.st_mode & 0o777, 0o755); + + // 3b. dir2 should exist and be a directory + let (dir2_attr, _) = fs.getattr(Context::default(), dir2_entry.inode, None)?; + assert_eq!(dir2_attr.st_mode & libc::S_IFMT, libc::S_IFDIR); + assert_eq!(dir2_attr.st_mode & 0o777, 0o755); + + Ok(()) +} + +#[test] +fn test_setattr_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 (mode 0644) + // Upper layer: file2 (mode 0600) + let layers = vec![vec![("file1", false, 0o644)], vec![("file2", false, 0o600)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, true)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test setattr on file in upper layer + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), 1, &file2_name)?; + + // Change mode to 0640 + let mut attr = file2_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o640; + let valid = SetattrValid::MODE; + let (new_attr, _) = fs.setattr(Context::default(), file2_entry.inode, attr, None, valid)?; + assert_eq!(new_attr.st_mode & 0o777, 0o640); + + // Verify the change was applied to the filesystem + let (verify_attr, _) = fs.getattr(Context::default(), file2_entry.inode, None)?; + assert_eq!(verify_attr.st_mode & 0o777, 0o640); + + Ok(()) +} + +#[test] +fn test_setattr_copy_up() -> io::Result<()> { + // Create test layers: + // Lower layer: file1 (mode 0644) + // Upper layer: empty (file1 will be copied up) + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, true)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test setattr on file in lower layer + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + + // Change mode to 0640 + let mut attr = file1_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o640; + let valid = SetattrValid::MODE; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + assert_eq!(new_attr.st_mode & 0o777, 0o640); + + Ok(()) +} + +#[test] +fn test_setattr_timestamps() -> io::Result<()> { + // Create test layers with a single file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Get the file's entry + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + + // Set specific timestamps + let mut attr = file1_entry.attr; + attr.st_atime = 12345; + attr.st_atime_nsec = 67890; + attr.st_mtime = 98765; + attr.st_mtime_nsec = 43210; + + let valid = SetattrValid::ATIME | SetattrValid::MTIME; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + + // Verify timestamps were set + assert_eq!(new_attr.st_atime, 12345); + assert_eq!(new_attr.st_atime_nsec, 67890); + assert_eq!(new_attr.st_mtime, 98765); + assert_eq!(new_attr.st_mtime_nsec, 43210); + + Ok(()) +} + +#[test] +fn test_setattr_size() -> io::Result<()> { + // Create test layers with a single file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Get the file's entry + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), 1, &file1_name)?; + + // Set file size to 1000 bytes + let mut attr = file1_entry.attr; + attr.st_size = 1000; + let valid = SetattrValid::SIZE; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + + // Verify size was set + assert_eq!(new_attr.st_size, 1000); + + // Verify the actual file size on disk + let file_path = temp_dirs[0].path().join("file1"); + let metadata = fs::metadata(file_path)?; + assert_eq!(metadata.len(), 1000); + + Ok(()) +} + +#[test] +fn test_setattr_complex() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (mode 0644) + // - dir1/subdir/ + // - dir1/subdir/bottom_file (mode 0644) + // Layer 1 (middle): + // - dir2/ + // - dir2/file2 (mode 0600) + // Layer 2 (top): + // - dir3/ + // - dir3/file3 (mode 0666) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/file2", false, 0o600)], + vec![("dir3", true, 0o755), ("dir3/file3", false, 0o666)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test 1: Modify file in bottom layer (should trigger copy_up) + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(Context::default(), dir1_entry.inode, &file1_name)?; + + // Change mode and size + let mut attr = file1_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o640; + attr.st_size = 2000; + let valid = SetattrValid::MODE | SetattrValid::SIZE; + let (new_attr, _) = fs.setattr(Context::default(), file1_entry.inode, attr, None, valid)?; + + // Verify changes + assert_eq!(new_attr.st_mode & 0o777, 0o640); + assert_eq!(new_attr.st_size, 2000); + + // Test 2: Modify file in middle layer (should trigger copy_up) + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(Context::default(), dir2_entry.inode, &file2_name)?; + + // Change timestamps + let mut attr = file2_entry.attr; + attr.st_atime = 12345; + attr.st_mtime = 67890; + let valid = SetattrValid::ATIME | SetattrValid::MTIME; + let (new_attr, _) = fs.setattr(Context::default(), file2_entry.inode, attr, None, valid)?; + + // Verify changes + assert_eq!(new_attr.st_atime, 12345); + assert_eq!(new_attr.st_mtime, 67890); + + // Verify file was copied up + let top_file2_path = temp_dirs[2].path().join("dir2").join("file2"); + assert!(top_file2_path.exists()); + + // Test 3: Modify file in top layer (no copy_up needed) + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(Context::default(), 1, &dir3_name)?; + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(Context::default(), dir3_entry.inode, &file3_name)?; + + // Change mode + let mut attr = file3_entry.attr; + attr.st_mode = (attr.st_mode & !0o777) | 0o644; + let valid = SetattrValid::MODE; + let (new_attr, _) = fs.setattr(Context::default(), file3_entry.inode, attr, None, valid)?; + + // Verify changes + assert_eq!(new_attr.st_mode & 0o777, 0o644); + + Ok(()) +} + +#[test] +fn test_xattrs() -> io::Result<()> { + // Create test layers with nested structure: + // Layer 0 (bottom): dir1/file1.txt, dir2/file2.txt + // Layer 1 (middle): dir1/file3.txt, dir3/file4.txt + // Layer 2 (top): dir1/file5.txt, dir2/dir4/file6.txt + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2.txt", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file3.txt", false, 0o644), + ("dir3", true, 0o755), + ("dir3/file4.txt", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file5.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/dir4", true, 0o755), + ("dir2/dir4/file6.txt", false, 0o644), + ], + ]; + + // Enable xattr in config + let mut cfg = Config::default(); + cfg.xattr = true; + + // Create overlay filesystem with the specified layers + let temp_dirs = layers + .iter() + .map(|layer| helper::setup_test_layer(layer).unwrap()) + .collect::>(); + + let layer_paths = temp_dirs + .iter() + .map(|dir| dir.path().to_path_buf()) + .collect::>(); + + cfg.layers = layer_paths; + + let overlayfs = OverlayFs::new(cfg)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + overlayfs.init(FsOptions::empty())?; + let ctx = Context::default(); + + // ---------- Test setting, getting, listing, and removing xattrs on files in different layers ---------- + + // Look up dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = overlayfs.lookup(ctx, 1, &dir1_name)?; + + // Test file in top layer (dir1/file5.txt) + let file5_name = CString::new("file5.txt").unwrap(); + let file5_entry = overlayfs.lookup(ctx, dir1_entry.inode, &file5_name)?; + + // Test setxattr on top layer file + let xattr_name = CString::new("user.test_attr").unwrap(); + let xattr_value = b"test_value_123"; + overlayfs.setxattr(ctx, file5_entry.inode, &xattr_name, xattr_value, 0)?; + + // Test getxattr + let result = overlayfs.getxattr(ctx, file5_entry.inode, &xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // Test listxattr + let result = overlayfs.listxattr(ctx, file5_entry.inode, 100); + match result { + Ok(ListxattrReply::Names(names)) => { + let mut found = false; + let mut start = 0; + while start < names.len() { + let end = names[start..] + .iter() + .position(|&b| b == 0) + .map(|pos| start + pos) + .unwrap_or(names.len()); + + let attr_name = &names[start..end]; + if attr_name == xattr_name.to_bytes() { + found = true; + break; + } + start = end + 1; + } + assert!(found, "Attribute name not found in listxattr result"); + } + Err(e) => panic!("Expected ListxattrReply::Names, got error: {:?}", e), + _ => panic!("Unexpected result from listxattr"), + } + + // Test setting another attribute + let xattr_name2 = CString::new("user.another_attr").unwrap(); + let xattr_value2 = b"another_value_456"; + overlayfs.setxattr(ctx, file5_entry.inode, &xattr_name2, xattr_value2, 0)?; + + // Verify both attributes are listed + let result = overlayfs.listxattr(ctx, file5_entry.inode, 200); + match result { + Ok(ListxattrReply::Names(names)) => { + let mut attrs = HashSet::new(); + let mut start = 0; + while start < names.len() { + let end = names[start..] + .iter() + .position(|&b| b == 0) + .map(|pos| start + pos) + .unwrap_or(names.len()); + + let attr_name = &names[start..end]; + attrs.insert(attr_name.to_vec()); + start = end + 1; + } + assert!( + attrs.contains(&xattr_name.to_bytes().to_vec()), + "First attribute not found" + ); + assert!( + attrs.contains(&xattr_name2.to_bytes().to_vec()), + "Second attribute not found" + ); + } + Err(e) => panic!("Expected ListxattrReply::Names, got error: {:?}", e), + _ => panic!("Unexpected result from listxattr"), + } + + // Test removexattr + overlayfs.removexattr(ctx, file5_entry.inode, &xattr_name)?; + + // Verify the attribute was removed + let result = overlayfs.listxattr(ctx, file5_entry.inode, 100); + match result { + Ok(ListxattrReply::Names(names)) => { + let mut found = false; + let mut start = 0; + while start < names.len() { + let end = names[start..] + .iter() + .position(|&b| b == 0) + .map(|pos| start + pos) + .unwrap_or(names.len()); + + let attr_name = &names[start..end]; + if attr_name == xattr_name.to_bytes() { + found = true; + break; + } + start = end + 1; + } + assert!(!found, "Attribute should have been removed"); + } + Err(e) => panic!("Expected ListxattrReply::Names, got error: {:?}", e), + _ => panic!("Unexpected result from listxattr"), + } + + // ---------- Test xattrs on files in middle layer (should trigger copy-up) ---------- + + // Look up dir3 + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = overlayfs.lookup(ctx, 1, &dir3_name)?; + + // Test file in middle layer (dir3/file4.txt) + let file4_name = CString::new("file4.txt").unwrap(); + let file4_entry = overlayfs.lookup(ctx, dir3_entry.inode, &file4_name)?; + + // Verify file exists in middle layer before copy-up + let middle_layer_file = temp_dirs[1].path().join("dir3").join("file4.txt"); + assert!( + middle_layer_file.exists(), + "File should exist in middle layer before copy-up" + ); + assert!( + !temp_dirs[2].path().join("dir3").join("file4.txt").exists(), + "File should not exist in top layer before copy-up" + ); + + // This should cause a copy-up operation since the file is in a lower layer + let middle_xattr_name = CString::new("user.middle_attr").unwrap(); + let middle_xattr_value = b"middle_layer_value"; + overlayfs.setxattr( + ctx, + file4_entry.inode, + &middle_xattr_name, + middle_xattr_value, + 0, + )?; + + // Verify file was copied up to top layer + let top_layer_file = temp_dirs[2].path().join("dir3").join("file4.txt"); + assert!( + top_layer_file.exists(), + "File should be copied up to top layer" + ); + + // Verify the attribute was set on the top layer file + let result = overlayfs.getxattr(ctx, file4_entry.inode, &middle_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, middle_xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // Verify the middle layer file still exists and is unchanged (no xattr) + assert!( + middle_layer_file.exists(), + "Original file should still exist in middle layer" + ); + let result = overlayfs.getxattr(ctx, file4_entry.inode, &middle_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!( + value, middle_xattr_value, + "Xattr should be accessible through overlay" + ); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // Try to read the xattr directly from the middle layer file (should not exist) + let middle_layer_path = CString::new(middle_layer_file.to_str().unwrap()).unwrap(); + let mut buf = vec![0; 100]; + #[cfg(target_os = "macos")] + let res = unsafe { + #[cfg(target_os = "macos")] + libc::getxattr( + middle_layer_path.as_ptr(), + middle_xattr_name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + 0, + 0, + ) + }; + + #[cfg(target_os = "linux")] + let res = unsafe { + libc::getxattr( + middle_layer_path.as_ptr(), + middle_xattr_name.as_ptr(), + buf.as_mut_ptr() as *mut libc::c_void, + buf.len(), + ) + }; + + assert!(res < 0, "Xattr should not exist on middle layer file"); + let err = io::Error::last_os_error(); + assert!( + err.raw_os_error().unwrap() == libc::ENODATA, + "Expected ENODATA when reading xattr from middle layer file" + ); + + // ---------- Test xattrs on nested directories ---------- + + // Look up dir2/dir4 + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = overlayfs.lookup(ctx, 1, &dir2_name)?; + + let dir4_name = CString::new("dir4").unwrap(); + let dir4_entry = overlayfs.lookup(ctx, dir2_entry.inode, &dir4_name)?; + + // Set xattr on a nested directory + let dir_xattr_name = CString::new("user.dir_attr").unwrap(); + let dir_xattr_value = b"directory_attribute"; + overlayfs.setxattr(ctx, dir4_entry.inode, &dir_xattr_name, dir_xattr_value, 0)?; + + // Verify the attribute was set + let result = overlayfs.getxattr(ctx, dir4_entry.inode, &dir_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, dir_xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // ---------- Test xattrs on file in deeply nested directory ---------- + + // Get file in nested directory (dir2/dir4/file6.txt) + let file6_name = CString::new("file6.txt").unwrap(); + let file6_entry = overlayfs.lookup(ctx, dir4_entry.inode, &file6_name)?; + + // Set xattr on the nested file + let nested_xattr_name = CString::new("user.nested_attr").unwrap(); + let nested_xattr_value = b"nested_file_value"; + overlayfs.setxattr( + ctx, + file6_entry.inode, + &nested_xattr_name, + nested_xattr_value, + 0, + )?; + + // Verify the attribute was set + let result = overlayfs.getxattr(ctx, file6_entry.inode, &nested_xattr_name, 100); + match result { + Ok(GetxattrReply::Value(value)) => { + assert_eq!(value, nested_xattr_value); + } + Err(e) => panic!("Expected GetxattrReply::Value, got error: {:?}", e), + _ => panic!("Unexpected result from getxattr"), + } + + // ---------- Test error cases ---------- + + // Test getxattr on non-existent attribute + let nonexistent_attr = CString::new("user.nonexistent").unwrap(); + let result = overlayfs.getxattr(ctx, file6_entry.inode, &nonexistent_attr, 100); + match result { + Err(e) => { + let err_code = e.raw_os_error().unwrap(); + assert!( + err_code == LINUX_ENODATA, + "Expected ENODATA, got: {}", + err_code + ); + } + Ok(_) => panic!("Expected error for non-existent attribute"), + } + + // Test getxattr with buffer too small + let result = overlayfs.getxattr(ctx, file6_entry.inode, &nested_xattr_name, 5); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ERANGE, + "Expected ERANGE error" + ); + } + Ok(_) => panic!("Expected ERANGE error for small buffer"), + } + + // Test removexattr on non-existent attribute + let result = overlayfs.removexattr(ctx, file6_entry.inode, &nonexistent_attr); + match result { + Err(e) => { + let err_code = e.raw_os_error().unwrap(); + assert!( + err_code == LINUX_ENODATA, + "Expected ENODATA, got: {}", + err_code + ); + } + Ok(_) => panic!("Expected error for non-existent attribute"), + } + + // Test setting xattr with invalid flags (flag value 2 is XATTR_CREATE, which should fail if attr exists) + let result = overlayfs.setxattr( + ctx, + file6_entry.inode, + &nested_xattr_name, + nested_xattr_value, + bindings::LINUX_XATTR_CREATE as u32, // XATTR_CREATE - should fail on existing attr + ); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + libc::EEXIST, + "Expected EEXIST error" + ); + } + Ok(_) => panic!("Expected EEXIST error for XATTR_CREATE on existing attribute"), + } + + // ---------- Test disabling xattr functionality ---------- + + // Create a new overlayfs with xattr disabled + let mut cfg_no_xattr = Config::default(); + cfg_no_xattr.xattr = false; + cfg_no_xattr.layers = temp_dirs + .iter() + .map(|dir| dir.path().to_path_buf()) + .collect(); + + let overlayfs_no_xattr = OverlayFs::new(cfg_no_xattr)?; + + overlayfs_no_xattr.init(FsOptions::empty())?; + + // Look up a file again + let dir1_entry = overlayfs_no_xattr.lookup(ctx, 1, &dir1_name)?; + let file5_entry = overlayfs_no_xattr.lookup(ctx, dir1_entry.inode, &file5_name)?; + + // All xattr operations should return ENOSYS + let result = overlayfs_no_xattr.setxattr(ctx, file5_entry.inode, &xattr_name, b"test", 0); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + let result = overlayfs_no_xattr.getxattr(ctx, file5_entry.inode, &xattr_name, 100); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + let result = overlayfs_no_xattr.listxattr(ctx, file5_entry.inode, 100); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + let result = overlayfs_no_xattr.removexattr(ctx, file5_entry.inode, &xattr_name); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error().unwrap(), + LINUX_ENOSYS, + "Expected ENOSYS error" + ); + } + Ok(_) => panic!("Expected ENOSYS error when xattr is disabled"), + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/misc.rs b/src/devices/src/virtio/fs/tests/overlayfs/misc.rs new file mode 100644 index 000000000..219e69f2d --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/misc.rs @@ -0,0 +1,546 @@ +use std::{ffi::CString, fs, io, os::unix::fs::PermissionsExt, path::PathBuf}; + +use tempfile::TempDir; + +use crate::virtio::{ + fs::filesystem::{Context, FileSystem}, + fuse::FsOptions, + overlayfs::{Config, OverlayFs}, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_copy_up_complex() -> io::Result<()> { + // Create test layers with complex structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (mode 0644) + // - dir1/subdir/ + // - dir1/subdir/bottom_file (mode 0644) + // - dir1/symlink -> file1 + // - dir2/ + // - dir2/file2 (mode 0600) + // Layer 1 (middle): + // - dir3/ + // - dir3/middle_file (mode 0666) + // - dir3/nested/ + // - dir3/nested/data (mode 0644) + // Layer 2 (top - initially empty): + // (empty - will be populated by copy_up operations) + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/bottom_file", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file2", false, 0o600), + ], + vec![ + ("dir3", true, 0o755), + ("dir3/middle_file", false, 0o666), + ("dir3/nested", true, 0o755), + ("dir3/nested/data", false, 0o644), + ], + vec![], // Empty top layer + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Create symlink in bottom layer + let symlink_path = temp_dirs[0].path().join("dir1").join("symlink"); + std::os::unix::fs::symlink("file1", &symlink_path)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test 1: Copy up a regular file from bottom layer + // First lookup dir1/file1 to get its path_inodes + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let file1_name = CString::new("file1").unwrap(); + let (_, path_inodes) = fs.do_lookup(dir1_entry.inode, &file1_name)?; + + // Perform copy_up + fs.copy_up(&path_inodes)?; + + // Verify the file was copied up correctly + let top_file1_path = temp_dirs[2].path().join("dir1").join("file1"); + let metadata = fs::metadata(&top_file1_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o644); + assert!(top_file1_path.exists()); + + // Test 2: Copy up a directory with nested content + let dir3_name = CString::new("dir3").unwrap(); + let dir3_entry = fs.lookup(Context::default(), 1, &dir3_name)?; + let nested_name = CString::new("nested").unwrap(); + let (nested_entry, nested_path_inodes) = fs.do_lookup(dir3_entry.inode, &nested_name)?; + + // Copy up the nested directory + fs.copy_up(&nested_path_inodes)?; + + // Verify the directory structure was copied + let top_nested_path = temp_dirs[2].path().join("dir3").join("nested"); + assert!(top_nested_path.exists()); + assert!(top_nested_path.is_dir()); + let metadata = fs::metadata(&top_nested_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o755); + + // Test 3: Copy up a file from the middle layer + let middle_file_name = CString::new("middle_file").unwrap(); + let (_, middle_file_path_inodes) = fs.do_lookup(dir3_entry.inode, &middle_file_name)?; + + // Perform copy_up + fs.copy_up(&middle_file_path_inodes)?; + + // Verify the file was copied up correctly + let top_middle_file_path = temp_dirs[2].path().join("dir3").join("middle_file"); + let metadata = fs::metadata(&top_middle_file_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o666); + assert!(top_middle_file_path.exists()); + + // Test 4: Copy up a nested file + let data_name = CString::new("data").unwrap(); + let (_, data_path_inodes) = fs.do_lookup(nested_entry.inode, &data_name)?; + + // Perform copy_up + fs.copy_up(&data_path_inodes)?; + + // Verify the nested file was copied up correctly + let top_data_path = temp_dirs[2].path().join("dir3").join("nested").join("data"); + let metadata = fs::metadata(&top_data_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o644); + assert!(top_data_path.exists()); + + // Test 5: Verify parent directories are created as needed + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + let file2_name = CString::new("file2").unwrap(); + let (_, file2_path_inodes) = fs.do_lookup(dir2_entry.inode, &file2_name)?; + + // Perform copy_up + fs.copy_up(&file2_path_inodes)?; + + // Verify the directory structure + let top_dir2_path = temp_dirs[2].path().join("dir2"); + assert!(top_dir2_path.exists()); + assert!(top_dir2_path.is_dir()); + let top_file2_path = top_dir2_path.join("file2"); + let metadata = fs::metadata(&top_file2_path)?; + assert_eq!(metadata.permissions().mode() & 0o777, 0o600); + assert!(top_file2_path.exists()); + + // Test 6: Copy up a symbolic link + let symlink_name = CString::new("symlink").unwrap(); + let (_, symlink_path_inodes) = fs.do_lookup(dir1_entry.inode, &symlink_name)?; + + // Perform copy_up + fs.copy_up(&symlink_path_inodes)?; + + // Verify the symlink was copied up correctly + let top_symlink_path = temp_dirs[2].path().join("dir1").join("symlink"); + assert!(top_symlink_path.exists()); + assert!(fs::symlink_metadata(&top_symlink_path)? + .file_type() + .is_symlink()); + + // Read the symlink target + let target = fs::read_link(&top_symlink_path)?; + assert_eq!(target.to_str().unwrap(), "file1"); + + Ok(()) +} + +#[test] +fn test_copy_up_with_content() -> io::Result<()> { + // Create test layers with files containing specific content: + // Layer 0 (bottom): + // - file1 (contains "bottom layer content") + // - dir1/nested_file1 (contains "nested bottom content") + // Layer 1 (middle): + // - file2 (contains "middle layer content") + // - dir1/nested_file2 (contains "nested middle content") + // Layer 2 (top): + // - file3 (contains "top layer content") + // - dir1/nested_file3 (contains "nested top content") + + // Create temporary directories for each layer + let temp_dirs: Vec = vec![ + TempDir::new().unwrap(), + TempDir::new().unwrap(), + TempDir::new().unwrap(), + ]; + + // Create directory structure in each layer + for dir in &temp_dirs { + fs::create_dir_all(dir.path().join("dir1"))?; + } + + // Create files with content in bottom layer + fs::write(temp_dirs[0].path().join("file1"), "bottom layer content")?; + fs::write( + temp_dirs[0].path().join("dir1").join("nested_file1"), + "nested bottom content", + )?; + + // Create files with content in middle layer + fs::write(temp_dirs[1].path().join("file2"), "middle layer content")?; + fs::write( + temp_dirs[1].path().join("dir1").join("nested_file2"), + "nested middle content", + )?; + + // Create files with content in top layer + fs::write(temp_dirs[2].path().join("file3"), "top layer content")?; + fs::write( + temp_dirs[2].path().join("dir1").join("nested_file3"), + "nested top content", + )?; + + // Set permissions + for dir in &temp_dirs { + fs::set_permissions(dir.path().join("dir1"), fs::Permissions::from_mode(0o755)).ok(); + } + fs::set_permissions( + temp_dirs[0].path().join("file1"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[0].path().join("dir1").join("nested_file1"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[1].path().join("file2"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[1].path().join("dir1").join("nested_file2"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[2].path().join("file3"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + fs::set_permissions( + temp_dirs[2].path().join("dir1").join("nested_file3"), + fs::Permissions::from_mode(0o644), + ) + .ok(); + + // Create layer paths + let layer_paths: Vec = temp_dirs.iter().map(|d| d.path().to_path_buf()).collect(); + + // Create the overlayfs + let cfg = Config { + layers: layer_paths, + ..Default::default() + }; + let fs = OverlayFs::new(cfg)?; + let ctx = Context::default(); + + // Test 1: Open file1 from bottom layer with write access (should trigger copy-up) + let file1_name = CString::new("file1").unwrap(); + let (_, path_inodes) = fs.do_lookup(1, &file1_name)?; + fs.copy_up(&path_inodes)?; + + // Verify file1 was copied up to the top layer with correct content + let top_file1 = temp_dirs[2].path().join("file1"); + assert!(top_file1.exists()); + let content = fs::read_to_string(&top_file1)?; + assert_eq!(content, "bottom layer content"); + + // Test 2: Open nested_file1 from bottom layer with write access + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let nested_file1_name = CString::new("nested_file1").unwrap(); + let (_, path_inodes) = fs.do_lookup(dir1_entry.inode, &nested_file1_name)?; + fs.copy_up(&path_inodes)?; + + // Verify nested_file1 was copied up to the top layer with correct content + let top_nested_file1 = temp_dirs[2].path().join("dir1").join("nested_file1"); + assert!(top_nested_file1.exists()); + let content = fs::read_to_string(&top_nested_file1)?; + assert_eq!(content, "nested bottom content"); + + // Test 3: Open file2 from middle layer with write access + let file2_name = CString::new("file2").unwrap(); + let (_, path_inodes) = fs.do_lookup(1, &file2_name)?; + fs.copy_up(&path_inodes)?; + + // Verify file2 was copied up to the top layer with correct content + let top_file2 = temp_dirs[2].path().join("file2"); + assert!(top_file2.exists()); + let content = fs::read_to_string(&top_file2)?; + assert_eq!(content, "middle layer content"); + + // Test 4: Open file3 from top layer (no copy-up needed) + let file3_name = CString::new("file3").unwrap(); + let (_, path_inodes) = fs.do_lookup(1, &file3_name)?; + fs.copy_up(&path_inodes)?; + + // Verify file3 content is unchanged + let content = fs::read_to_string(temp_dirs[2].path().join("file3"))?; + assert_eq!(content, "top layer content"); + + // Clean up + fs.destroy(); + + Ok(()) +} + +#[test] +fn test_link_basic() -> io::Result<()> { + // Create test layers with simple structure: + // Layer 0 (bottom): + // - file1 + // Layer 1 (top): + // - dir1/ + let layers = vec![vec![("file1", false, 0o644)], vec![("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Create hard link from file1 to dir1/link1 + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let link1_name = CString::new("link1").unwrap(); + let link1_entry = fs.link(ctx, file1_entry.inode, dir1_entry.inode, &link1_name)?; + + // Verify the link was created + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir1/link1").exists()); + + // Verify the link has the same inode number as the original file + let updated_file1_entry = fs.lookup(ctx, 1, &file1_name)?; + assert_eq!(link1_entry.attr.st_ino, updated_file1_entry.attr.st_ino); + assert_eq!(link1_entry.attr.st_nlink, updated_file1_entry.attr.st_nlink); + + Ok(()) +} + +#[test] +fn test_link_multiple_layers() -> io::Result<()> { + // Create test layers with multiple files: + // Layer 0 (bottom): + // - file1 + // - dir1/ + // - dir1/file2 + // Layer 1 (middle): + // - file3 + // Layer 2 (top): + // - dir2/ + let layers = vec![ + vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("dir1/file2", false, 0o644), + ], + vec![("file3", false, 0o644)], + vec![("dir2", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Create links to files from different layers + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(ctx, 1, &file3_name)?; + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + + // Create links in top layer + let link1_name = CString::new("link1").unwrap(); + let link2_name = CString::new("link2").unwrap(); + + let link1_entry = fs.link(ctx, file1_entry.inode, dir2_entry.inode, &link1_name)?; + let link2_entry = fs.link(ctx, file3_entry.inode, dir2_entry.inode, &link2_name)?; + + // Verify the links were created in the top layer + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir2/link1").exists()); + assert!(top_layer.join("dir2/link2").exists()); + + // Verify source files were copied up + assert!(top_layer.join("file1").exists()); + assert!(top_layer.join("file3").exists()); + + // Verify link attributes + let updated_file1_entry = fs.lookup(ctx, 1, &file1_name)?; + let updated_file3_entry = fs.lookup(ctx, 1, &file3_name)?; + assert_eq!(link1_entry.attr.st_ino, updated_file1_entry.attr.st_ino); + assert_eq!(link2_entry.attr.st_ino, updated_file3_entry.attr.st_ino); + + Ok(()) +} + +#[test] +fn test_link_errors() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): + // - file1 + // - dir1/ + let layers = vec![vec![("file1", false, 0o644), ("dir1", true, 0o755)]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Test linking to invalid parent + let invalid_name = CString::new("link1").unwrap(); + assert!(fs + .link(ctx, file1_entry.inode, 999999, &invalid_name) + .is_err()); + + // Test linking with invalid source inode + assert!(fs + .link(ctx, 999999, dir1_entry.inode, &invalid_name) + .is_err()); + + // Test linking with invalid name + let invalid_name = CString::new("../link1").unwrap(); + assert!(fs + .link(ctx, file1_entry.inode, dir1_entry.inode, &invalid_name) + .is_err()); + + // Test linking a symlink (should error) + // Create a symlink in the bottom layer + let symlink_path = temp_dirs[0].path().join("symlink"); + std::os::unix::fs::symlink("file1", &symlink_path)?; + + // Initialize filesystem to detect the new symlink + fs.init(FsOptions::empty())?; + + // Get the symlink's inode + let symlink_name = CString::new("symlink").unwrap(); + let symlink_entry = fs.lookup(ctx, 1, &symlink_name)?; + + // Try to create a hard link to the symlink (should fail) + let link_name = CString::new("link_to_symlink").unwrap(); + assert!(fs + .link(ctx, symlink_entry.inode, dir1_entry.inode, &link_name) + .is_err()); + + Ok(()) +} + +#[test] +fn test_link_nested() -> io::Result<()> { + // Create test layers with nested structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 + // - dir1/subdir/ + // - dir1/subdir/file2 + // Layer 1 (top): + // - dir2/ + // - dir2/subdir/ + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![("dir2", true, 0o755), ("dir2/subdir", true, 0o755)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Create links to nested files + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(ctx, subdir_entry.inode, &file2_name)?; + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, 1, &dir2_name)?; + + let dir2_subdir_entry = fs.lookup(ctx, dir2_entry.inode, &subdir_name)?; + + // Create links in different locations + let link1_name = CString::new("link1").unwrap(); + let link2_name = CString::new("link2").unwrap(); + + let link1_entry = fs.link(ctx, file1_entry.inode, dir2_entry.inode, &link1_name)?; + let link2_entry = fs.link(ctx, file2_entry.inode, dir2_subdir_entry.inode, &link2_name)?; + + // Verify the links were created + let top_layer = temp_dirs.last().unwrap().path(); + assert!(top_layer.join("dir2/link1").exists()); + assert!(top_layer.join("dir2/subdir/link2").exists()); + + // Verify source files were copied up + assert!(top_layer.join("dir1/file1").exists()); + assert!(top_layer.join("dir1/subdir/file2").exists()); + + // Verify link attributes + let updated_file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + let updated_file2_entry = fs.lookup(ctx, subdir_entry.inode, &file2_name)?; + assert_eq!(link1_entry.attr.st_ino, updated_file1_entry.attr.st_ino); + assert_eq!(link2_entry.attr.st_ino, updated_file2_entry.attr.st_ino); + + Ok(()) +} + +#[test] +fn test_link_existing_name() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): + // - file1 + // - dir1/ + // - dir1/existing + let layers = vec![vec![ + ("file1", false, 0o644), + ("dir1", true, 0o755), + ("dir1/existing", false, 0o644), + ]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, 1, &file1_name)?; + + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Try to create a link with an existing name + let existing_name = CString::new("existing").unwrap(); + assert!(fs + .link(ctx, file1_entry.inode, dir1_entry.inode, &existing_name) + .is_err()); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/mod.rs b/src/devices/src/virtio/fs/tests/overlayfs/mod.rs new file mode 100644 index 000000000..85d6ea594 --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/mod.rs @@ -0,0 +1,214 @@ +#[cfg(test)] +mod create; + +#[cfg(test)] +mod lookup; + +#[cfg(test)] +mod metadata; + +#[cfg(test)] +mod misc; + +#[cfg(test)] +mod open; + +#[cfg(test)] +mod read; + +#[cfg(test)] +mod remove; + +#[cfg(test)] +mod write; + +//-------------------------------------------------------------------------------------------------- +// Trait Implementations +//-------------------------------------------------------------------------------------------------- + +impl Default for crate::virtio::fs::filesystem::Context { + fn default() -> Self { + Self { + uid: 0, + gid: 0, + pid: 0, + } + } +} + +//-------------------------------------------------------------------------------------------------- +// Modules: Helper +//-------------------------------------------------------------------------------------------------- + +#[cfg(test)] +mod helper { + use std::{ + fs::{self, File}, + io, + os::unix::fs::PermissionsExt, + process::Command, + }; + + use crate::virtio::{ + fs::filesystem::{ZeroCopyReader, ZeroCopyWriter}, + fs::overlayfs::{Config, OverlayFs}, + }; + + use tempfile::TempDir; + + //-------------------------------------------------------------------------------------------------- + // Types + //-------------------------------------------------------------------------------------------------- + + pub(super) struct TestContainer(pub(super) Vec); + + //-------------------------------------------------------------------------------------------------- + // Trait Implementations + //-------------------------------------------------------------------------------------------------- + + impl io::Write for TestContainer { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.0.extend_from_slice(buf); + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } + } + + impl ZeroCopyWriter for TestContainer { + fn write_from(&mut self, f: &File, count: usize, off: u64) -> io::Result { + use std::os::unix::fs::FileExt; + + // Pre-allocate space in our vector to avoid reallocations + let original_len = self.0.len(); + self.0.resize(original_len + count, 0); + + // Read directly into our vector's buffer + let bytes_read = f.read_at(&mut self.0[original_len..original_len + count], off)?; + + // Adjust the size to match what was actually read + self.0.truncate(original_len + bytes_read); + + if bytes_read == 0 && count > 0 { + return Err(io::Error::new( + io::ErrorKind::UnexpectedEof, + "unexpected EOF", + )); + } + + Ok(bytes_read) + } + } + + impl io::Read for TestContainer { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let available = self.0.len(); + if available == 0 { + return Ok(0); + } + + let amt = std::cmp::min(buf.len(), available); + buf[..amt].copy_from_slice(&self.0[..amt]); + Ok(amt) + } + } + + impl ZeroCopyReader for TestContainer { + fn read_to(&mut self, f: &File, count: usize, off: u64) -> io::Result { + use std::os::unix::fs::FileExt; + + let available = self.0.len(); + if available == 0 { + return Ok(0); + } + + let to_write = std::cmp::min(count, available); + let written = f.write_at(&self.0[..to_write], off)?; + Ok(written) + } + } + + //-------------------------------------------------------------------------------------------------- + // Functions + //-------------------------------------------------------------------------------------------------- + + // Helper function to create a temporary directory with specified files + pub(super) fn setup_test_layer(files: &[(&str, bool, u32)]) -> io::Result { + let dir = TempDir::new().unwrap(); + + for (path, is_dir, mode) in files { + let full_path = dir.path().join(path); + if let Some(parent) = full_path.parent() { + fs::create_dir_all(parent)?; + } + + if *is_dir { + fs::create_dir(&full_path)?; + } else { + File::create(&full_path)?; + } + + fs::set_permissions(&full_path, fs::Permissions::from_mode(*mode))?; + } + + Ok(dir) + } + + // Helper function to create an overlayfs with specified layers + pub(super) fn create_overlayfs( + layers: Vec>, + ) -> io::Result<(OverlayFs, Vec)> { + let mut temp_dirs = Vec::new(); + let mut layer_paths = Vec::new(); + + for layer in layers { + let temp_dir = setup_test_layer(&layer)?; + layer_paths.push(temp_dir.path().to_path_buf()); + temp_dirs.push(temp_dir); + } + + let cfg = Config { + layers: layer_paths, + ..Default::default() + }; + + let overlayfs = OverlayFs::new(cfg)?; + Ok((overlayfs, temp_dirs)) + } + + // Debug utility to print the directory structure of each layer using tree command + pub(super) fn debug_print_layers(temp_dirs: &[TempDir], show_perms: bool) -> io::Result<()> { + if Command::new("tree").arg("--version").output().is_err() { + println!("tree command is not accessible. please install it to see the layer directory structures."); + return Ok(()); + } + println!("\n=== Layer Directory Structures ==="); + + for (i, dir) in temp_dirs.iter().enumerate() { + println!("\nLayer {}: {}", i, dir.path().display()); + + let path = dir.path(); + let mut tree_cmd = Command::new("tree"); + tree_cmd.arg("-a"); // show hidden files + if show_perms { + tree_cmd.arg("-p"); + } + let output = tree_cmd.arg(path).output()?; + + if output.status.success() { + println!("{}", String::from_utf8_lossy(&output.stdout)); + } else { + println!( + "Error running tree command: {}", + String::from_utf8_lossy(&output.stderr) + ); + } + } + + println!("================================\n"); + + Ok(()) + } +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/open.rs b/src/devices/src/virtio/fs/tests/overlayfs/open.rs new file mode 100644 index 000000000..7d4916c68 --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/open.rs @@ -0,0 +1,378 @@ +use std::{ffi::CString, io}; + +use crate::virtio::fs::filesystem::{Context, Extensions, FileSystem}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_open_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open the file with read-only flags + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_directory() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Open the directory + let (handle, _opts) = fs.open( + ctx, + entry.inode, + (libc::O_RDONLY | libc::O_DIRECTORY) as u32, + )?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_nonexistent() -> io::Result<()> { + // Create a simple overlayfs with a single layer + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to open a non-existent inode + let result = fs.open(ctx, 999, libc::O_RDONLY as u32); + + // Verify it fails with ENOENT + assert!(result.is_err()); + assert_eq!(result.unwrap_err().raw_os_error(), Some(libc::EBADF)); + + Ok(()) +} +#[test] +fn test_open_with_copy_up() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 + // Layer 1 (top): empty + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open the file with write flags, which should trigger copy-up + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDWR as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Verify the file was copied up to the top layer + let top_layer_file = temp_dirs[1].path().join("file1"); + assert!(top_layer_file.exists()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_whiteout() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 + // Layer 1 (top): .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o000)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to lookup the file (should fail because it's whited out) + let file_name = CString::new("file1").unwrap(); + let result = fs.lookup(ctx, 1, &file_name); + + // Verify lookup fails + assert!(result.is_err()); + + let non_existent_inode = 999; // Use a high number that shouldn't exist + let open_result = fs.open(ctx, non_existent_inode, libc::O_RDONLY as u32); + assert!(open_result.is_err()); + + Ok(()) +} + +#[test] +fn test_open_and_release_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open and close the file multiple times + for _ in 0..5 { + // Open the file + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + // Verify we can still open the file after multiple open/release cycles + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_open_with_different_flags() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the file to get its inode + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Test different open flags + let flags = [ + libc::O_RDONLY, + libc::O_WRONLY, + libc::O_RDWR, + libc::O_RDONLY | libc::O_NONBLOCK, + libc::O_WRONLY | libc::O_APPEND, + ]; + + for flag in flags.iter() { + // Open the file with the current flag + let (handle, _opts) = fs.open(ctx, entry.inode, *flag as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + Ok(()) +} + +#[test] +fn test_opendir_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Open the directory + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_opendir_nonexistent() -> io::Result<()> { + // Create a simple overlayfs with a single layer + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to open a non-existent inode + let result = fs.opendir(ctx, 999, libc::O_RDONLY as u32); + + // Verify it fails with EBADF + match result { + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::EBADF)); + } + Ok(_) => panic!("Expected error for non-existent inode"), + } + + Ok(()) +} + +#[test] +fn test_opendir_whiteout() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): dir1/ + // Layer 1 (top): .wh.dir1 (whiteout for dir1) + let layers = vec![ + vec![("dir1", true, 0o755)], + vec![(".wh.dir1", false, 0o000)], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to lookup the directory (should fail because it's whited out) + let dir_name = CString::new("dir1").unwrap(); + let result = fs.lookup(ctx, 1, &dir_name); + + // Verify lookup fails with ENOENT + match result { + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::ENOENT)); + } + Ok(_) => panic!("Expected error for whited-out directory"), + } + + Ok(()) +} + +#[test] +fn test_opendir_with_copy_up() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): dir1/ + // Layer 1 (top): empty + let layers = vec![vec![("dir1", true, 0o755)], vec![]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // First open the directory normally + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + // Trigger copy-up by creating a new file in the directory + let new_file = CString::new("newfile").unwrap(); + fs.mkdir(ctx, entry.inode, &new_file, 0o755, 0, Extensions::default())?; + + // Verify the directory was copied up to the top layer + let top_layer_dir = temp_dirs[1].path().join("dir1"); + assert!(top_layer_dir.exists()); + assert!(top_layer_dir.is_dir()); + + // Verify we can still open the directory after copy-up + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_opendir_and_release_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Open and close the directory multiple times + for _ in 0..5 { + // Open the directory + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + // Verify we can still open the directory after multiple open/release cycles + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + assert!(handle.is_some()); + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + + Ok(()) +} + +#[test] +fn test_opendir_with_different_flags() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory + let layers = vec![vec![("dir1", true, 0o755)]]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup the directory to get its inode + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + + // Test different open flags - only use read-only flags since directories can't be opened for writing + let flags = [ + libc::O_RDONLY | libc::O_DIRECTORY, + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_NONBLOCK, + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_CLOEXEC, + libc::O_RDONLY | libc::O_DIRECTORY | libc::O_NONBLOCK | libc::O_CLOEXEC, + ]; + + for flag in flags.iter() { + // Open the directory with the current flag + let (handle, _opts) = fs.opendir(ctx, entry.inode, *flag as u32)?; + + // Verify we got a valid handle + assert!(handle.is_some()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle.unwrap(), false, false, None)?; + } + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/read.rs b/src/devices/src/virtio/fs/tests/overlayfs/read.rs new file mode 100644 index 000000000..715dd236b --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/read.rs @@ -0,0 +1,1265 @@ +use std::{ffi::CString, fs, io}; + +use crate::virtio::{ + fs::filesystem::{Context, FileSystem}, + fuse::FsOptions, + overlayfs::tests::helper::TestContainer, +}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_readlink_basic() -> io::Result<()> { + // Create test layers: + // Lower layer: target_file, link -> target_file + let layers = vec![vec![ + ("target_file", false, 0o644), + // Note: symlinks will be created separately below + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Create symlink in bottom layer + let symlink_path = temp_dirs[0].path().join("link"); + std::os::unix::fs::symlink("target_file", &symlink_path)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink + let link_name = CString::new("link").unwrap(); + let link_entry = fs.lookup(Context::default(), 1, &link_name)?; + let target = fs.readlink(Context::default(), link_entry.inode)?; + + assert_eq!(target, b"target_file"); + + Ok(()) +} + +#[test] +fn test_readlink_multiple_layers() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link1 -> target1 + // Middle layer: target2, link2 -> target2 + // Upper layer: target3, link3 -> target3 + let layers = vec![ + vec![("target1", false, 0o644)], + vec![("target2", false, 0o644)], + vec![("target3", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + // Create symlinks in each layer + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link1"))?; + std::os::unix::fs::symlink("target2", temp_dirs[1].path().join("link2"))?; + std::os::unix::fs::symlink("target3", temp_dirs[2].path().join("link3"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink for symlink in bottom layer + let link1_name = CString::new("link1").unwrap(); + let link1_entry = fs.lookup(Context::default(), 1, &link1_name)?; + let target1 = fs.readlink(Context::default(), link1_entry.inode)?; + assert_eq!(target1, b"target1"); + + // Test readlink for symlink in middle layer + let link2_name = CString::new("link2").unwrap(); + let link2_entry = fs.lookup(Context::default(), 1, &link2_name)?; + let target2 = fs.readlink(Context::default(), link2_entry.inode)?; + assert_eq!(target2, b"target2"); + + // Test readlink for symlink in top layer + let link3_name = CString::new("link3").unwrap(); + let link3_entry = fs.lookup(Context::default(), 1, &link3_name)?; + let target3 = fs.readlink(Context::default(), link3_entry.inode)?; + assert_eq!(target3, b"target3"); + + Ok(()) +} + +#[test] +fn test_readlink_shadowed() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link -> target1 + // Upper layer: link -> target2 (shadows lower layer's link) + let layers = vec![ + vec![("target1", false, 0o644)], + vec![("target2", false, 0o644)], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Create symlinks + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link"))?; + std::os::unix::fs::symlink("target2", temp_dirs[1].path().join("link"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink - should get the symlink from upper layer + let link_name = CString::new("link").unwrap(); + let link_entry = fs.lookup(Context::default(), 1, &link_name)?; + let target = fs.readlink(Context::default(), link_entry.inode)?; + + assert_eq!(target, b"target2", "Should read symlink from upper layer"); + + Ok(()) +} + +#[test] +fn test_readlink_nested() -> io::Result<()> { + // Create test layers with nested directory structure: + // Lower layer: + // - dir1/target1 + // - dir1/link1 -> target1 + // - dir2/target2 + // - dir2/subdir/link2 -> ../target2 + let layers = vec![vec![ + ("dir1", true, 0o755), + ("dir1/target1", false, 0o644), + ("dir2", true, 0o755), + ("dir2/target2", false, 0o644), + ("dir2/subdir", true, 0o755), + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + // Create symlinks + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("dir1/link1"))?; + std::os::unix::fs::symlink("../target2", temp_dirs[0].path().join("dir2/subdir/link2"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink for simple symlink in directory + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(Context::default(), 1, &dir1_name)?; + let link1_name = CString::new("link1").unwrap(); + let link1_entry = fs.lookup(Context::default(), dir1_entry.inode, &link1_name)?; + let target1 = fs.readlink(Context::default(), link1_entry.inode)?; + assert_eq!(target1, b"target1"); + + // Test readlink for symlink with relative path + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(Context::default(), 1, &dir2_name)?; + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(Context::default(), dir2_entry.inode, &subdir_name)?; + let link2_name = CString::new("link2").unwrap(); + let link2_entry = fs.lookup(Context::default(), subdir_entry.inode, &link2_name)?; + let target2 = fs.readlink(Context::default(), link2_entry.inode)?; + assert_eq!(target2, b"../target2"); + + Ok(()) +} + +#[test] +fn test_readlink_errors() -> io::Result<()> { + // Create test layers: + // Lower layer: regular_file, directory + let layers = vec![vec![ + ("regular_file", false, 0o644), + ("directory", true, 0o755), + ]]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + helper::debug_print_layers(&temp_dirs, false)?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Test readlink on regular file (should fail with either EINVAL or ENOENT) + let file_name = CString::new("regular_file").unwrap(); + let file_entry = fs.lookup(Context::default(), 1, &file_name)?; + let result = fs.readlink(Context::default(), file_entry.inode); + match result { + Err(e) => { + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EINVAL || code == libc::ENOENT, + "Reading link of regular file should return either EINVAL or ENOENT, got {}", + code + ); + } + Ok(_) => panic!("Expected error for regular file"), + } + + // Test readlink on directory (should fail with either EINVAL or ENOENT) + let dir_name = CString::new("directory").unwrap(); + let dir_entry = fs.lookup(Context::default(), 1, &dir_name)?; + let result = fs.readlink(Context::default(), dir_entry.inode); + match result { + Err(e) => { + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EINVAL || code == libc::ENOENT, + "Reading link of directory should return either EINVAL or ENOENT, got {}", + code + ); + } + Ok(_) => panic!("Expected error for directory"), + } + + // Test readlink with invalid inode + let result = fs.readlink(Context::default(), 999999); + match result { + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::EBADF), + "Reading link with invalid inode should return EBADF" + ); + } + Ok(_) => panic!("Expected error for invalid inode"), + } + + Ok(()) +} + +#[test] +fn test_readlink_whiteout() -> io::Result<()> { + // Create test layers: + // Lower layer: target1, link1 -> target1 + // Upper layer: .wh.link1 (whiteout for link1) + let layers = vec![ + vec![("target1", false, 0o644)], + vec![(".wh.link1", false, 0o644)], // Whiteout file + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Create symlink in bottom layer + std::os::unix::fs::symlink("target1", temp_dirs[0].path().join("link1"))?; + + // Initialize filesystem + fs.init(FsOptions::empty())?; + + // Try to lookup whited-out symlink (should fail) + let link_name = CString::new("link1").unwrap(); + match fs.lookup(Context::default(), 1, &link_name) { + Ok(_) => panic!("Expected lookup of whited-out symlink to fail"), + Err(e) => { + assert_eq!( + e.raw_os_error(), + Some(libc::ENOENT), + "Looking up whited-out symlink should return ENOENT" + ); + } + } + + Ok(()) +} + +#[test] +fn test_read_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the entire content + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, entry.inode, handle, &mut writer, 100, 0, None, 0)?; + + assert_eq!(bytes_read, 13); // Length of "Hello, World!" + assert_eq!(&writer.0, b"Hello, World!"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_with_offset() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read with offset + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read( + ctx, + entry.inode, + handle, + &mut writer, + 100, + 7, // Start at offset 7 (after "Hello, ") + None, + 0, + )?; + + assert_eq!(bytes_read, 6); // Length of "World!" + assert_eq!(&writer.0, b"World!"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_partial() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read only first 5 bytes + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read( + ctx, + entry.inode, + handle, + &mut writer, + 5, // Only read 5 bytes + 0, + None, + 0, + )?; + + assert_eq!(bytes_read, 5); + assert_eq!(&writer.0, b"Hello"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_whiteout() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 with content + // Layer 1 (top): .wh.file1 (whiteout for file1) + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o000)], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file in bottom layer + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Try to lookup the file (should fail because it's whited out) + let file_name = CString::new("file1").unwrap(); + assert!(fs.lookup(ctx, 1, &file_name).is_err()); + + Ok(()) +} + +#[test] +fn test_read_after_copy_up() -> io::Result<()> { + // Create test layers: + // Layer 0 (bottom): file1 with content + // Layer 1 (top): empty + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file in bottom layer + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Open with write flag to trigger copy-up + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDWR as u32)?; + let handle = handle.unwrap(); + + // Verify the file was copied up + assert!(temp_dirs[1].path().join("file1").exists()); + + // Read the content after copy-up + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, entry.inode, handle, &mut writer, 100, 0, None, 0)?; + + assert_eq!(bytes_read, 13); + assert_eq!(&writer.0, b"Hello, World!"); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_invalid_handle() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, _) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Try to read with an invalid handle + let mut writer = TestContainer(Vec::new()); + let result = fs.read( + ctx, + 1, + 999, // Invalid handle + &mut writer, + 100, + 0, + None, + 0, + ); + + assert!(result.is_err()); + assert_eq!(result.unwrap_err().raw_os_error(), Some(libc::EBADF)); + + Ok(()) +} + +#[test] +fn test_read_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the file multiple times with different offsets + let test_cases: Vec<(u64, u32, &[u8])> = + vec![(0, 5, b"Hello"), (7, 5, b"World"), (12, 1, b"!")]; + + for (offset, size, expected) in test_cases { + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, entry.inode, handle, &mut writer, size, offset, None, 0)?; + + assert_eq!(bytes_read, expected.len()); + assert_eq!(&writer.0, expected); + } + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_nested_directories() -> io::Result<()> { + // Create test layers with nested structure: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (content: "bottom file1") + // - dir1/subdir/ + // - dir1/subdir/file2 (content: "bottom file2") + // Layer 1 (middle): + // - dir1/file3 (content: "middle file3") + // - dir1/subdir/file4 (content: "middle file4") + // Layer 2 (top): + // - dir1/file1 (content: "top file1") + // - dir1/subdir/file5 (content: "top file5") + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file3", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file4", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file5", false, 0o644), + ], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write content to files in different layers + std::fs::write(temp_dirs[0].path().join("dir1/file1"), b"bottom file1")?; + std::fs::write( + temp_dirs[0].path().join("dir1/subdir/file2"), + b"bottom file2", + )?; + std::fs::write(temp_dirs[1].path().join("dir1/file3"), b"middle file3")?; + std::fs::write( + temp_dirs[1].path().join("dir1/subdir/file4"), + b"middle file4", + )?; + std::fs::write(temp_dirs[2].path().join("dir1/file1"), b"top file1")?; + std::fs::write(temp_dirs[2].path().join("dir1/subdir/file5"), b"top file5")?; + + let ctx = Context::default(); + + // First lookup dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Test 1: Read file1 (should get content from top layer) + let file1_name = CString::new("file1").unwrap(); + let file1_entry = fs.lookup(ctx, dir1_entry.inode, &file1_name)?; + let (handle, _) = fs.open(ctx, file1_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file1_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 9); + assert_eq!(&writer.0, b"top file1"); + fs.release(ctx, file1_entry.inode, 0, handle, false, false, None)?; + + // Test 2: Read file3 (from middle layer) + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(ctx, dir1_entry.inode, &file3_name)?; + let (handle, _) = fs.open(ctx, file3_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file3_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 12); + assert_eq!(&writer.0, b"middle file3"); + fs.release(ctx, file3_entry.inode, 0, handle, false, false, None)?; + + // Lookup subdir + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + // Test 3: Read file2 (from bottom layer) + let file2_name = CString::new("file2").unwrap(); + let file2_entry = fs.lookup(ctx, subdir_entry.inode, &file2_name)?; + let (handle, _) = fs.open(ctx, file2_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file2_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 12); + assert_eq!(&writer.0, b"bottom file2"); + fs.release(ctx, file2_entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_read_with_whiteouts_and_opaque_dirs() -> io::Result<()> { + // Create test layers with whiteouts and opaque directories: + // Layer 0 (bottom): + // - dir1/ + // - dir1/file1 (content: "file1") + // - dir1/subdir/ + // - dir1/subdir/file2 (content: "file2") + // Layer 1 (middle): + // - dir1/ + // - dir1/.wh.file1 (whiteout file1) + // - dir1/subdir/ + // - dir1/subdir/.wh..wh..opq (opaque dir) + // - dir1/subdir/file3 (content: "file3") + // Layer 2 (top): + // - dir1/ + // - dir1/file4 (content: "file4") + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh.file1", false, 0o000), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/.wh..wh..opq", false, 0o000), + ("dir1/subdir/file3", false, 0o644), + ], + vec![("dir1", true, 0o755), ("dir1/file4", false, 0o644)], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write content to files + std::fs::write(temp_dirs[0].path().join("dir1/file1"), b"file1")?; + std::fs::write(temp_dirs[0].path().join("dir1/subdir/file2"), b"file2")?; + std::fs::write(temp_dirs[1].path().join("dir1/subdir/file3"), b"file3")?; + std::fs::write(temp_dirs[2].path().join("dir1/file4"), b"file4")?; + + let ctx = Context::default(); + + // First lookup dir1 + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + // Test 1: Try to read whited-out file1 (should fail) + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(ctx, dir1_entry.inode, &file1_name).is_err()); + + // Test 2: Read file4 from top layer + let file4_name = CString::new("file4").unwrap(); + let file4_entry = fs.lookup(ctx, dir1_entry.inode, &file4_name)?; + let (handle, _) = fs.open(ctx, file4_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file4_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 5); + assert_eq!(&writer.0, b"file4"); + fs.release(ctx, file4_entry.inode, 0, handle, false, false, None)?; + + // Lookup subdir + let subdir_name = CString::new("subdir").unwrap(); + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + // Test 3: Try to read file2 through opaque directory (should fail) + let file2_name = CString::new("file2").unwrap(); + assert!(fs.lookup(ctx, subdir_entry.inode, &file2_name).is_err()); + + // Test 4: Read file3 through opaque directory (should succeed) + let file3_name = CString::new("file3").unwrap(); + let file3_entry = fs.lookup(ctx, subdir_entry.inode, &file3_name)?; + let (handle, _) = fs.open(ctx, file3_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut writer = TestContainer(Vec::new()); + let bytes_read = fs.read(ctx, file3_entry.inode, handle, &mut writer, 100, 0, None, 0)?; + assert_eq!(bytes_read, 5); + assert_eq!(&writer.0, b"file3"); + fs.release(ctx, file3_entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} + +#[test] +fn test_readdir_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a directory with files + let layers = vec![vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ]]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file2".to_string())); + assert_eq!(entries.len(), 2); + + Ok(()) +} + +#[test] +fn test_readdir_with_offset() -> io::Result<()> { + // Create an overlayfs with multiple layers containing overlapping directories and files + // Layer 0 (lowest): Some initial files + // Layer 1 (middle): Some additional files and modifications + // Layer 2 (top): More files and potential whiteouts + let layers = vec![ + // Layer 0 (lowest) + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("dir1/common", false, 0o644), + ], + // Layer 1 (middle) + vec![ + ("dir1", true, 0o755), + ("dir1/file3", false, 0o644), + ("dir1/file4", false, 0o644), + ("dir1/common", false, 0o644), // This overlays the one in layer 0 + ], + // Layer 2 (top) + vec![ + ("dir1", true, 0o755), + ("dir1/file5", false, 0o644), + ("dir1/file6", false, 0o644), + ("dir1/file7", false, 0o644), + ], + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the first batch of directory entries and save the offset + let mut entries = Vec::new(); + let mut last_offset = 0; + fs.readdir( + ctx, + entry.inode, + handle, + 1024, // Small buffer to force multiple reads + 0, + |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + last_offset = dir_entry.offset; + Ok(0) + }, + )?; + + // Read the second batch of directory entries starting from the last offset + let mut more_entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, last_offset, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + more_entries.push(name); + Ok(1) + })?; + + // Verify that we got all entries between the two reads + let all_entries: Vec<_> = entries + .into_iter() + .chain(more_entries.into_iter()) + .collect(); + + assert!(all_entries.contains(&"file1".to_string())); + assert!(all_entries.contains(&"file2".to_string())); + assert!(all_entries.contains(&"file3".to_string())); + assert!(all_entries.contains(&"file4".to_string())); + assert!(all_entries.contains(&"file5".to_string())); + assert!(all_entries.contains(&"file6".to_string())); + assert!(all_entries.contains(&"file7".to_string())); + assert!(all_entries.contains(&"common".to_string())); + + // Verify we have the right number of entries + assert_eq!(all_entries.len(), 8); + + Ok(()) +} + +#[test] +fn test_readdir_empty_directory() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty directory + let layers = vec![vec![("empty_dir", true, 0o755)]]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("empty_dir").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(0) + })?; + + // Verify the entries (should be empty since "." and ".." are handled by the kernel) + assert_eq!(entries.len(), 0); + + Ok(()) +} + +#[test] +fn test_readdir_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers: + // Layer 0 (bottom): dir1 with file1, file2, file3 + // Layer 1 (top): dir1 with file2 whited out + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("dir1/file3", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh.file2", false, 0o644), // Whiteout for file2 + ], + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1" and "file3", but not "file2") + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file3".to_string())); + assert!(!entries.contains(&"file2".to_string())); // Should be whited out + assert_eq!(entries.len(), 2); + + Ok(()) +} + +#[test] +fn test_readdir_multiple_layers() -> io::Result<()> { + let layers = vec![ + vec![("dir1", true, 0o755), ("dir1/file1", false, 0o644)], + vec![ + ("dir1", true, 0o755), + ("dir1/file2", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file1", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file3", false, 0o644), + ("dir2/file2", false, 0o644), + ("dir3", true, 0o755), + ("dir3/file1", false, 0o644), + ], + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the dir1 + let entry = fs.lookup(ctx, 1, &CString::new("dir1").unwrap())?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1", "file2", and "file3") + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file2".to_string())); + assert!(entries.contains(&"file3".to_string())); + assert_eq!(entries.len(), 3); + + // Lookup and open the dir2 + let entry = fs.lookup(ctx, 1, &CString::new("dir2").unwrap())?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1", and "file2") + assert!(entries.contains(&"file1".to_string())); + assert!(entries.contains(&"file2".to_string())); + assert_eq!(entries.len(), 2); + + // Lookup and open the dir3 + let entry = fs.lookup(ctx, 1, &CString::new("dir3").unwrap())?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Verify the entries (should include "file1") + assert!(entries.contains(&"file1".to_string())); + assert_eq!(entries.len(), 1); + + Ok(()) +} + +#[test] +fn test_readdir_opaque_marker() -> io::Result<()> { + // Create an overlayfs with three layers: + // Layer 0 (bottom): dir1 with file1, file2, file3 + // Layer 1 (middle): dir1 with opaque marker, file4, file5 + // Layer 2 (top): dir1 with file5 (shadows middle), file6, file7 + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("dir1/file3", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), // Opaque marker for dir1 + ("dir1/file4", false, 0o644), + ("dir1/file5", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/file5", false, 0o644), // Shadows file5 from layer 1 + ("dir1/file6", false, 0o644), + ("dir1/file7", false, 0o644), + ], + ]; + + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Sort entries for consistent comparison + entries.sort(); + + // Due to the opaque marker in the middle layer, we should only see: + // - files from the top layer (file5, file6, file7) + // - files from the middle layer that aren't shadowed by the top (file4) + // - NO files from the bottom layer (file1, file2, file3 should be hidden) + let expected_entries = vec![ + "file4".to_string(), + "file5".to_string(), + "file6".to_string(), + "file7".to_string(), + ]; + + assert_eq!(entries, expected_entries, "Unexpected directory entries"); + + // Release the directory handle + fs.releasedir(ctx, entry.inode, 0, handle)?; + + // Additional test: Create a second directory with opaque marker in top layer + let layers2 = vec![ + vec![ + ("dir2", true, 0o755), + ("dir2/bottom1", false, 0o644), + ("dir2/bottom2", false, 0o644), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/middle1", false, 0o644), + ("dir2/middle2", false, 0o644), + ], + vec![ + ("dir2", true, 0o755), + ("dir2/.wh..wh..opq", false, 0o644), // Opaque marker in top layer + ("dir2/top1", false, 0o644), + ], + ]; + + let (fs2, _temp_dirs2) = helper::create_overlayfs(layers2)?; + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir2").unwrap(); + let entry = fs2.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs2.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs2.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Sort entries for consistent comparison + entries.sort(); + + // With opaque marker in the top layer, we should only see: + // - files from the top layer (top1) + // - NO files from middle or bottom layers + assert_eq!( + entries, + vec!["top1".to_string()], + "Unexpected entries in dir2" + ); + + // Release the directory handle + fs2.releasedir(ctx, entry.inode, 0, handle)?; + + Ok(()) +} + +#[test] +fn test_readdir_shadow() -> io::Result<()> { + // Create an overlayfs with three layers with shadowing: + // Layer 0 (bottom): dir1 with common, only_bottom, shadowed1, shadowed2 + // Layer 1 (middle): dir1 with common, only_middle, shadowed1 + // Layer 2 (top): dir1 with common, only_top, shadowed2 + // + // Each file has different content to verify proper shadowing + let layers = vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/common", false, 0o644), + ("dir1/only_bottom", false, 0o644), + ("dir1/shadowed1", false, 0o644), + ("dir1/shadowed2", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/common", false, 0o644), + ("dir1/only_middle", false, 0o644), + ("dir1/shadowed1", false, 0o644), + ], + vec![ + ("dir1", true, 0o755), + ("dir1/common", false, 0o644), + ("dir1/only_top", false, 0o644), + ("dir1/shadowed2", false, 0o644), + ], + ]; + + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write different content to each layer's files + // Bottom layer + fs::write( + temp_dirs[0].path().join("dir1/common"), + "bottom layer common content", + )?; + fs::write( + temp_dirs[0].path().join("dir1/only_bottom"), + "only in bottom layer", + )?; + fs::write( + temp_dirs[0].path().join("dir1/shadowed1"), + "shadowed1 bottom content", + )?; + fs::write( + temp_dirs[0].path().join("dir1/shadowed2"), + "shadowed2 bottom content", + )?; + + // Middle layer + fs::write( + temp_dirs[1].path().join("dir1/common"), + "middle layer common content", + )?; + fs::write( + temp_dirs[1].path().join("dir1/only_middle"), + "only in middle layer", + )?; + fs::write( + temp_dirs[1].path().join("dir1/shadowed1"), + "shadowed1 middle content", + )?; + + // Top layer + fs::write( + temp_dirs[2].path().join("dir1/common"), + "top layer common content", + )?; + fs::write( + temp_dirs[2].path().join("dir1/only_top"), + "only in top layer", + )?; + fs::write( + temp_dirs[2].path().join("dir1/shadowed2"), + "shadowed2 top content", + )?; + + let ctx = Context::default(); + + // Lookup and open the directory + let dir_name = CString::new("dir1").unwrap(); + let entry = fs.lookup(ctx, 1, &dir_name)?; + let (handle, _opts) = fs.opendir(ctx, entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + // Read the directory entries + let mut entries = Vec::new(); + fs.readdir(ctx, entry.inode, handle, 4096, 0, |dir_entry| { + let name = String::from_utf8_lossy(dir_entry.name).to_string(); + entries.push(name); + Ok(1) + })?; + + // Sort entries for consistent comparison + entries.sort(); + + // Release the directory handle + fs.releasedir(ctx, entry.inode, 0, handle)?; + + // We should see all unique filenames across layers + // Each file should appear exactly once + let expected_entries = vec![ + "common".to_string(), + "only_bottom".to_string(), + "only_middle".to_string(), + "only_top".to_string(), + "shadowed1".to_string(), + "shadowed2".to_string(), + ]; + + assert_eq!(entries, expected_entries, "Unexpected directory entries"); + + // Now verify the content of each file to check shadowing + + // 1. common file - should have top layer content + let common_entry = fs.lookup(ctx, entry.inode, &CString::new("common").unwrap())?; + let (handle, _) = fs.open(ctx, common_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + common_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "top layer common content", + "common file should have top layer content" + ); + fs.release(ctx, common_entry.inode, 0, handle, false, false, None)?; + + // 2. shadowed1 file - should have middle layer content (shadowed by middle over bottom) + let shadowed1_entry = fs.lookup(ctx, entry.inode, &CString::new("shadowed1").unwrap())?; + let (handle, _) = fs.open(ctx, shadowed1_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + shadowed1_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "shadowed1 middle content", + "shadowed1 file should have middle layer content" + ); + fs.release(ctx, shadowed1_entry.inode, 0, handle, false, false, None)?; + + // 3. shadowed2 file - should have top layer content (shadowed by top over bottom) + let shadowed2_entry = fs.lookup(ctx, entry.inode, &CString::new("shadowed2").unwrap())?; + let (handle, _) = fs.open(ctx, shadowed2_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + shadowed2_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "shadowed2 top content", + "shadowed2 file should have top layer content" + ); + fs.release(ctx, shadowed2_entry.inode, 0, handle, false, false, None)?; + + // 4. only_bottom file - should exist and have bottom layer content + let only_bottom_entry = fs.lookup(ctx, entry.inode, &CString::new("only_bottom").unwrap())?; + let (handle, _) = fs.open(ctx, only_bottom_entry.inode, libc::O_RDONLY as u32)?; + let handle = handle.unwrap(); + + let mut container = TestContainer(Vec::new()); + fs.read( + ctx, + only_bottom_entry.inode, + handle, + &mut container, + 1024, + 0, + None, + 0, + )?; + assert_eq!( + String::from_utf8_lossy(&container.0), + "only in bottom layer", + "only_bottom file should have bottom layer content" + ); + fs.release(ctx, only_bottom_entry.inode, 0, handle, false, false, None)?; + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/remove.rs b/src/devices/src/virtio/fs/tests/overlayfs/remove.rs new file mode 100644 index 000000000..69c33f5e4 --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/remove.rs @@ -0,0 +1,508 @@ +use std::{ffi::CString, io}; + +use crate::virtio::fs::filesystem::{Context, FileSystem}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_unlink_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let (fs, temp_dirs) = helper::create_overlayfs(vec![vec![("file1.txt", false, 0o644)]])?; + let ctx = Context::default(); + + // Lookup the file to get its parent inode (root) and verify it exists + let file_name = CString::new("file1.txt").unwrap(); + let _ = fs.lookup(ctx, 1, &file_name)?; + + // Unlink the file + fs.unlink(ctx, 1, &file_name)?; + + // Verify the file is gone + match fs.lookup(ctx, 1, &file_name) { + Ok(_) => panic!("File still exists after unlink"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the file is physically removed from the filesystem + assert!(!temp_dirs[0].path().join("file1.txt").exists()); + + Ok(()) +} + +#[test] +fn test_unlink_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers: + // - Lower layer: contains file1.txt + // - Upper layer: empty + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("file1.txt", false, 0o644)], // lower layer + vec![], // upper layer + ])?; + let ctx = Context::default(); + + // Lookup the file to verify it exists + let file_name = CString::new("file1.txt").unwrap(); + let _ = fs.lookup(ctx, 1, &file_name)?; + + // Unlink the file - this should create a whiteout in the upper layer + fs.unlink(ctx, 1, &file_name)?; + + // Verify the file appears to be gone through the overlayfs + match fs.lookup(ctx, 1, &file_name) { + Ok(_) => panic!("File still exists after unlink"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the original file still exists in the lower layer + assert!(temp_dirs[0].path().join("file1.txt").exists()); + + // Verify a whiteout was created in the upper layer + assert!(temp_dirs[1].path().join(".wh.file1.txt").exists()); + + Ok(()) +} + +#[test] +fn test_unlink_multiple_layers() -> io::Result<()> { + // Create an overlayfs with three layers, each containing different files + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("lower.txt", false, 0o644)], // lowest layer + vec![("middle.txt", false, 0o644)], // middle layer + vec![("upper.txt", false, 0o644)], // upper layer + ])?; + let ctx = Context::default(); + + // Test unlinking a file from each layer + for file in &["lower.txt", "middle.txt", "upper.txt"] { + let file_name = CString::new(*file).unwrap(); + + // Verify file exists before unlink + fs.lookup(ctx, 1, &file_name)?; + + // Unlink the file + fs.unlink(ctx, 1, &file_name)?; + + // Verify file appears gone through overlayfs + match fs.lookup(ctx, 1, &file_name) { + Ok(_) => panic!("File {} still exists after unlink", file), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + } + + // Verify physical state of layers: + // - Files in lower layers should still exist + // - File in top layer should be gone + // - Whiteouts should exist in top layer for lower files + assert!(temp_dirs[0].path().join("lower.txt").exists()); + assert!(temp_dirs[1].path().join("middle.txt").exists()); + assert!(!temp_dirs[2].path().join("upper.txt").exists()); + assert!(temp_dirs[2].path().join(".wh.lower.txt").exists()); + assert!(temp_dirs[2].path().join(".wh.middle.txt").exists()); + + Ok(()) +} + +#[test] +fn test_unlink_nested_files() -> io::Result<()> { + // Create an overlayfs with nested directory structure + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir1/subdir", true, 0o755), + ("dir1/subdir/file2.txt", false, 0o644), + ], + vec![], // empty upper layer + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Lookup and unlink nested files + let dir1_name = CString::new("dir1").unwrap(); + let subdir_name = CString::new("subdir").unwrap(); + let file1_name = CString::new("file1.txt").unwrap(); + let file2_name = CString::new("file2.txt").unwrap(); + + // Get directory inodes + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let subdir_entry = fs.lookup(ctx, dir1_entry.inode, &subdir_name)?; + + // Unlink file2.txt from subdir + fs.unlink(ctx, subdir_entry.inode, &file2_name)?; + + // Verify file2.txt is gone but file1.txt still exists + match fs.lookup(ctx, subdir_entry.inode, &file2_name) { + Ok(_) => panic!("file2.txt still exists after unlink"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + fs.lookup(ctx, dir1_entry.inode, &file1_name)?; // should succeed + + helper::debug_print_layers(&temp_dirs, false)?; + + // Verify whiteout was created in correct location + assert!(temp_dirs[1] + .path() + .join("dir1/subdir/.wh.file2.txt") + .exists()); + + Ok(()) +} + +#[test] +fn test_unlink_errors() -> io::Result<()> { + // Create a basic overlayfs + let (fs, _) = helper::create_overlayfs(vec![vec![("file1.txt", false, 0o644)]])?; + let ctx = Context::default(); + + // Test: Try to unlink non-existent file + let nonexistent = CString::new("nonexistent.txt").unwrap(); + match fs.unlink(ctx, 1, &nonexistent) { + Ok(_) => panic!("Unlink succeeded on non-existent file"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Test: Try to unlink with invalid parent inode + let file_name = CString::new("file1.txt").unwrap(); + match fs.unlink(ctx, 999999, &file_name) { + Ok(_) => panic!("Unlink succeeded with invalid parent inode"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + // Test: Try to unlink with invalid name (containing path traversal) + let invalid_name = CString::new("../file1.txt").unwrap(); + match fs.unlink(ctx, 1, &invalid_name) { + Ok(_) => panic!("Unlink succeeded with invalid name"), + Err(e) => { + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EPERM || code == libc::ENOENT, + "Expected EPERM or ENOENT error for path traversal, got {}", + code + ); + } + } + + Ok(()) +} + +#[test] +fn test_unlink_complex_layers() -> io::Result<()> { + // Create an overlayfs with complex layer structure: + // - Lower layer: base files + // - Middle layer: some files deleted, some added + // - Upper layer: more modifications + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + // lower layer + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ("dir1/file2.txt", false, 0o644), + ("dir2", true, 0o755), + ("dir2/file3.txt", false, 0o644), + ], + vec![ + // middle layer + ("dir1/new_file.txt", false, 0o644), + ("dir2/file4.txt", false, 0o644), + // Whiteout in middle layer for file3.txt in dir2 - placed in dir2 directory + ("dir2/.wh.file3.txt", false, 0o000), + ], + vec![ + // upper layer + ("dir3", true, 0o755), + ("dir3/file5.txt", false, 0o644), + ], + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Test 1: Unlink a file that exists in the top layer + let dir3_name = CString::new("dir3").unwrap(); + let file5_name = CString::new("file5.txt").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + fs.unlink(ctx, dir3_entry.inode, &file5_name)?; + assert!(!temp_dirs[2].path().join("dir3/file5.txt").exists()); + + // Test 2: Unlink a file from middle layer + let dir1_name = CString::new("dir1").unwrap(); + let new_file_name = CString::new("new_file.txt").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + fs.unlink(ctx, dir1_entry.inode, &new_file_name)?; + // Expect a whiteout created in the top layer for new_file.txt + assert!(temp_dirs[2].path().join("dir1/.wh.new_file.txt").exists()); + + // Test 3: Unlink a file from lowest layer + let file1_name = CString::new("file1.txt").unwrap(); + fs.unlink(ctx, dir1_entry.inode, &file1_name)?; + // Expect a whiteout in the top layer but the original file remains in lower layer + assert!(temp_dirs[2].path().join("dir1/.wh.file1.txt").exists()); + assert!(temp_dirs[0].path().join("dir1/file1.txt").exists()); + + // Test 4: Unlink a file from lowest layer that is already whiteouted + let file2_name = CString::new("file2.txt").unwrap(); + // First unlink to create the whiteout + fs.unlink(ctx, dir1_entry.inode, &file2_name)?; + assert!(temp_dirs[2].path().join("dir1/.wh.file2.txt").exists()); + // Second attempt should fail with ENOENT + match fs.unlink(ctx, dir1_entry.inode, &file2_name) { + Ok(_) => panic!("Unlink succeeded on already whiteouted file"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + Ok(()) +} + +#[test] +fn test_rmdir_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty directory + let (fs, temp_dirs) = helper::create_overlayfs(vec![vec![("empty_dir", true, 0o755)]])?; + let ctx = Context::default(); + + // Lookup the directory to verify it exists + let dir_name = CString::new("empty_dir").unwrap(); + let _ = fs.lookup(ctx, 1, &dir_name)?; + + // Remove the directory + fs.rmdir(ctx, 1, &dir_name)?; + + // Verify the directory is gone + match fs.lookup(ctx, 1, &dir_name) { + Ok(_) => panic!("Directory still exists after rmdir"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the directory is physically removed from the filesystem + assert!(!temp_dirs[0].path().join("empty_dir").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers: + // - Lower layer: contains empty_dir + // - Upper layer: empty + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("empty_dir", true, 0o755)], // lower layer + vec![], // upper layer + ])?; + let ctx = Context::default(); + + // Lookup the directory to verify it exists + let dir_name = CString::new("empty_dir").unwrap(); + let _ = fs.lookup(ctx, 1, &dir_name)?; + + // Remove the directory - this should create a whiteout in the upper layer + fs.rmdir(ctx, 1, &dir_name)?; + + // Verify the directory appears to be gone through the overlayfs + match fs.lookup(ctx, 1, &dir_name) { + Ok(_) => panic!("Directory still exists after rmdir"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Verify the original directory still exists in the lower layer + assert!(temp_dirs[0].path().join("empty_dir").exists()); + + // Verify a whiteout was created in the upper layer + assert!(temp_dirs[1].path().join(".wh.empty_dir").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_multiple_layers() -> io::Result<()> { + // Create an overlayfs with three layers, each containing different directories + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![("lower_dir", true, 0o755)], // lowest layer + vec![("middle_dir", true, 0o755)], // middle layer + vec![("upper_dir", true, 0o755)], // upper layer + ])?; + let ctx = Context::default(); + + // Test removing a directory from each layer + for dir in &["lower_dir", "middle_dir", "upper_dir"] { + let dir_name = CString::new(*dir).unwrap(); + + // Verify directory exists before removal + fs.lookup(ctx, 1, &dir_name)?; + + // Remove the directory + fs.rmdir(ctx, 1, &dir_name)?; + + // Verify directory appears gone through overlayfs + match fs.lookup(ctx, 1, &dir_name) { + Ok(_) => panic!("Directory {} still exists after rmdir", dir), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + } + + // Verify physical state of layers: + // - Directories in lower layers should still exist + // - Directory in top layer should be gone + // - Whiteouts should exist in top layer for lower directories + assert!(temp_dirs[0].path().join("lower_dir").exists()); + assert!(temp_dirs[1].path().join("middle_dir").exists()); + assert!(!temp_dirs[2].path().join("upper_dir").exists()); + assert!(temp_dirs[2].path().join(".wh.lower_dir").exists()); + assert!(temp_dirs[2].path().join(".wh.middle_dir").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_nested_dirs() -> io::Result<()> { + // Create an overlayfs with nested directory structure + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + ("dir1", true, 0o755), + ("dir1/subdir1", true, 0o755), + ("dir1/subdir2", true, 0o755), + ("dir1/subdir2/nested", true, 0o755), + ], + vec![], // empty upper layer + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Lookup and remove nested directories + let dir1_name = CString::new("dir1").unwrap(); + let subdir2_name = CString::new("subdir2").unwrap(); + let nested_name = CString::new("nested").unwrap(); + + // Get directory inodes + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + let subdir2_entry = fs.lookup(ctx, dir1_entry.inode, &subdir2_name)?; + + // Remove nested directory + fs.rmdir(ctx, subdir2_entry.inode, &nested_name)?; + + // Verify nested is gone but subdir1 still exists + match fs.lookup(ctx, subdir2_entry.inode, &nested_name) { + Ok(_) => panic!("nested directory still exists after rmdir"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + let subdir1_name = CString::new("subdir1").unwrap(); + fs.lookup(ctx, dir1_entry.inode, &subdir1_name)?; // should succeed + + // Verify whiteout was created in correct location + assert!(temp_dirs[1].path().join("dir1/subdir2/.wh.nested").exists()); + + Ok(()) +} + +#[test] +fn test_rmdir_errors() -> io::Result<()> { + // Create an overlayfs with a directory containing a file + let (fs, _temp_dirs) = helper::create_overlayfs(vec![vec![ + ("dir1", true, 0o755), + ("dir1/file1.txt", false, 0o644), + ]])?; + let ctx = Context::default(); + + // Test: Try to remove non-existent directory + let nonexistent = CString::new("nonexistent").unwrap(); + match fs.rmdir(ctx, 1, &nonexistent) { + Ok(_) => panic!("rmdir succeeded on non-existent directory"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOENT)), + } + + // Test: Try to remove with invalid parent inode + let dir_name = CString::new("dir1").unwrap(); + match fs.rmdir(ctx, 999999, &dir_name) { + Ok(_) => panic!("rmdir succeeded with invalid parent inode"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::EBADF)), + } + + // Test: Try to remove non-empty directory + match fs.rmdir(ctx, 1, &dir_name) { + Ok(_) => panic!("rmdir succeeded on non-empty directory"), + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::ENOTEMPTY)); + } + } + + // Test: Try to remove with invalid name (containing path traversal) + let invalid_name = CString::new("../dir1").unwrap(); + match fs.rmdir(ctx, 1, &invalid_name) { + Ok(_) => panic!("rmdir succeeded with invalid name"), + Err(e) => { + let code = e.raw_os_error().unwrap(); + assert!( + code == libc::EPERM || code == libc::ENOENT, + "Expected EPERM or ENOENT error for path traversal, got {}", + code + ); + } + } + + // Test: Try to remove a file using rmdir + let file_name = CString::new("file1.txt").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir_name)?; + match fs.rmdir(ctx, dir1_entry.inode, &file_name) { + Ok(_) => panic!("rmdir succeeded on a file"), + Err(e) => assert_eq!(e.raw_os_error(), Some(libc::ENOTDIR)), + } + + Ok(()) +} + +#[test] +fn test_rmdir_complex_layers() -> io::Result<()> { + // Create an overlayfs with complex layer structure: + // - Lower layer: base directories + // - Middle layer: some directories deleted, some added + // - Upper layer: more modifications + let (fs, temp_dirs) = helper::create_overlayfs(vec![ + vec![ + // lower layer + ("dir1", true, 0o755), + ("dir1/subdir1", true, 0o755), + ("dir2", true, 0o755), + ("dir2/subdir2", true, 0o755), + ], + vec![ + // middle layer + ("dir1/new_dir", true, 0o755), + ("dir2/subdir3", true, 0o755), + // Whiteout in middle layer for subdir2 in dir2 + ("dir2/.wh.subdir2", false, 0o000), + ], + vec![ + // upper layer + ("dir3", true, 0o755), + ("dir3/subdir4", true, 0o755), + ], + ])?; + helper::debug_print_layers(&temp_dirs, false)?; + let ctx = Context::default(); + + // Test 1: Remove a directory that exists in the top layer + let dir3_name = CString::new("dir3").unwrap(); + let subdir4_name = CString::new("subdir4").unwrap(); + let dir3_entry = fs.lookup(ctx, 1, &dir3_name)?; + fs.rmdir(ctx, dir3_entry.inode, &subdir4_name)?; + assert!(!temp_dirs[2].path().join("dir3/subdir4").exists()); + + // Test 2: Remove a directory from middle layer + let dir1_name = CString::new("dir1").unwrap(); + let new_dir_name = CString::new("new_dir").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + fs.rmdir(ctx, dir1_entry.inode, &new_dir_name)?; + // Expect a whiteout created in the top layer for new_dir + assert!(temp_dirs[2].path().join("dir1/.wh.new_dir").exists()); + + // Test 3: Remove a directory from lowest layer + let subdir1_name = CString::new("subdir1").unwrap(); + fs.rmdir(ctx, dir1_entry.inode, &subdir1_name)?; + // Expect a whiteout in the top layer but the original directory remains in lower layer + assert!(temp_dirs[2].path().join("dir1/.wh.subdir1").exists()); + assert!(temp_dirs[0].path().join("dir1/subdir1").exists()); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/tests/overlayfs/write.rs b/src/devices/src/virtio/fs/tests/overlayfs/write.rs new file mode 100644 index 000000000..a823f8837 --- /dev/null +++ b/src/devices/src/virtio/fs/tests/overlayfs/write.rs @@ -0,0 +1,428 @@ +use std::{ffi::CString, io}; + +use crate::virtio::{fs::filesystem::{Context, FileSystem}, overlayfs::tests::helper::TestContainer}; + +use super::helper; + +//-------------------------------------------------------------------------------------------------- +// Tests +//-------------------------------------------------------------------------------------------------- + +#[test] +fn test_write_basic() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, (libc::O_WRONLY | libc::O_TRUNC) as u32)?; + let handle = handle.unwrap(); + + // Write content to the file + let content = b"Hello, World!"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(file_content, content); + + Ok(()) +} + +#[test] +fn test_write_with_offset() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file with initial content + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some initial content to the file + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_WRONLY as u32)?; + let handle = handle.unwrap(); + + // Write content at an offset + let content = b"Rusty"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + content.len() as u32, + 7, + None, + false, + false, + 0, + )?; + + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(&file_content, b"Hello, Rusty!"); + + Ok(()) +} + +#[test] +fn test_write_partial() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, (libc::O_WRONLY | libc::O_TRUNC) as u32)?; + let handle = handle.unwrap(); + + // Write content to the file, but request to write more than we have + let content = b"Hello, World!"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + 100, + 0, + None, + false, + false, + 0, + )?; + + // Should only write what's available + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(file_content, content); + + Ok(()) +} + +#[test] +fn test_write_whiteout() -> io::Result<()> { + // Create an overlayfs with two layers, where the top layer has a whiteout for file1 + let layers = vec![ + vec![("file1", false, 0o644)], + vec![(".wh.file1", false, 0o644)], // Whiteout for file1 + ]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file (should fail because it's whited out) + let file_name = CString::new("file1").unwrap(); + let lookup_result = fs.lookup(ctx, 1, &file_name); + assert!(lookup_result.is_err()); + + Ok(()) +} + +#[test] +fn test_write_after_copy_up() -> io::Result<()> { + // Create an overlayfs with two layers, where file1 exists in the lower layer + let layers = vec![vec![("file1", false, 0o644)], vec![]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + // Write some initial content to the file in the lower layer + std::fs::write(temp_dirs[0].path().join("file1"), b"Hello, World!")?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions (should trigger copy-up) + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, libc::O_WRONLY as u32)?; + let handle = handle.unwrap(); + + // Write new content to the file + let content = b"Hello, Rusty!"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly to the upper layer + let file_content = std::fs::read(temp_dirs[1].path().join("file1"))?; + assert_eq!(file_content, content); + + // The lower layer should remain unchanged + let lower_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(lower_content, b"Hello, World!"); + + Ok(()) +} + +#[test] +fn test_write_invalid_handle() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing a file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, _temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup the file + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + + // Try to write with an invalid handle + let invalid_handle = 12345; + let mut reader = TestContainer(b"Hello".to_vec()); + let result = fs.write( + ctx, + entry.inode, + invalid_handle, + &mut reader, + 5, + 0, + None, + false, + false, + 0, + ); + + // Should fail with EBADF + match result { + Err(e) => { + assert_eq!(e.raw_os_error(), Some(libc::EBADF)); + } + Ok(_) => panic!("Expected error for invalid handle"), + } + + Ok(()) +} + +#[test] +fn test_write_multiple_times() -> io::Result<()> { + // Create a simple overlayfs with a single layer containing an empty file + let layers = vec![vec![("file1", false, 0o644)]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup and open the file with write permissions + let file_name = CString::new("file1").unwrap(); + let entry = fs.lookup(ctx, 1, &file_name)?; + let (handle, _opts) = fs.open(ctx, entry.inode, (libc::O_WRONLY | libc::O_TRUNC) as u32)?; + let handle = handle.unwrap(); + + // Write content to the file in multiple operations + let content1 = b"Hello, "; + let mut reader1 = TestContainer(content1.to_vec()); + let bytes_written1 = fs.write( + ctx, + entry.inode, + handle, + &mut reader1, + content1.len() as u32, + 0, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written1, content1.len()); + + let content2 = b"World!"; + let mut reader2 = TestContainer(content2.to_vec()); + let bytes_written2 = fs.write( + ctx, + entry.inode, + handle, + &mut reader2, + content2.len() as u32, + bytes_written1 as u64, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written2, content2.len()); + + // Release the handle + fs.release(ctx, entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_content = std::fs::read(temp_dirs[0].path().join("file1"))?; + assert_eq!(file_content, b"Hello, World!"); + + Ok(()) +} + +#[test] +fn test_write_nested_directories() -> io::Result<()> { + // Create an overlayfs with nested directories + let layers = vec![vec![ + ("dir1", true, 0o755), + ("dir1/dir2", true, 0o755), + ("dir1/dir2/file1", false, 0o644), + ]]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Lookup the nested directories and file + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let dir2_name = CString::new("dir2").unwrap(); + let dir2_entry = fs.lookup(ctx, dir1_entry.inode, &dir2_name)?; + + let file_name = CString::new("file1").unwrap(); + let file_entry = fs.lookup(ctx, dir2_entry.inode, &file_name)?; + + // Open the file with write permissions + let (handle, _opts) = fs.open( + ctx, + file_entry.inode, + (libc::O_WRONLY | libc::O_TRUNC) as u32, + )?; + let handle = handle.unwrap(); + + // Write content to the file + let content = b"Nested file content"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + file_entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written, content.len()); + + // Release the handle + fs.release(ctx, file_entry.inode, 0, handle, false, false, None)?; + + // Verify the content was written correctly + let file_path = temp_dirs[0].path().join("dir1").join("dir2").join("file1"); + let file_content = std::fs::read(file_path)?; + assert_eq!(file_content, content); + + Ok(()) +} + +#[test] +fn test_write_with_whiteouts_and_opaque_dirs() -> io::Result<()> { + // Create an overlayfs with multiple layers, whiteouts, and opaque directories + let layers = vec![ + // Lower layer + vec![ + ("dir1", true, 0o755), + ("dir1/file1", false, 0o644), + ("dir1/file2", false, 0o644), + ("file3", false, 0o644), + ], + // Upper layer with whiteout for file2 and opaque dir1 + vec![ + ("dir1", true, 0o755), + ("dir1/.wh..wh..opq", false, 0o644), // Opaque dir marker + ("dir1/file4", false, 0o644), // New file in opaque dir + (".wh.file3", false, 0o644), // Whiteout for file3 + ], + ]; + let (fs, temp_dirs) = helper::create_overlayfs(layers)?; + + let ctx = Context::default(); + + // Test 1: Write to file4 in opaque directory + let dir1_name = CString::new("dir1").unwrap(); + let dir1_entry = fs.lookup(ctx, 1, &dir1_name)?; + + let file4_name = CString::new("file4").unwrap(); + let file4_entry = fs.lookup(ctx, dir1_entry.inode, &file4_name)?; + + let (handle, _opts) = fs.open(ctx, file4_entry.inode, libc::O_WRONLY as u32)?; + let handle = handle.unwrap(); + + let content = b"File in opaque dir"; + let mut reader = TestContainer(content.to_vec()); + let bytes_written = fs.write( + ctx, + file4_entry.inode, + handle, + &mut reader, + content.len() as u32, + 0, + None, + false, + false, + 0, + )?; + assert_eq!(bytes_written, content.len()); + + fs.release(ctx, file4_entry.inode, 0, handle, false, false, None)?; + + // Verify content + let file_path = temp_dirs[1].path().join("dir1").join("file4"); + let file_content = std::fs::read(file_path)?; + assert_eq!(file_content, content); + + // Test 2: Try to access file1 through opaque directory (should fail) + let file1_name = CString::new("file1").unwrap(); + assert!(fs.lookup(ctx, dir1_entry.inode, &file1_name).is_err()); + + // Test 3: Try to access file3 (should fail due to whiteout) + let file3_name = CString::new("file3").unwrap(); + assert!(fs.lookup(ctx, 1, &file3_name).is_err()); + + Ok(()) +} diff --git a/src/devices/src/virtio/fs/worker.rs b/src/devices/src/virtio/fs/worker.rs index a0b93e08d..b95d2019b 100644 --- a/src/devices/src/virtio/fs/worker.rs +++ b/src/devices/src/virtio/fs/worker.rs @@ -15,8 +15,10 @@ use vm_memory::GuestMemoryMmap; use super::super::{FsError, Queue, VIRTIO_MMIO_INT_VRING}; use super::defs::{HPQ_INDEX, REQ_INDEX}; use super::descriptor_utils::{Reader, Writer}; -use super::passthrough::{self, PassthroughFs}; -use super::server::Server; +use super::server::FsImplServer; +use super::overlayfs::OverlayFs; +use super::passthrough::PassthroughFs; +use super::{FsImpl, FsImplConfig}; use crate::legacy::GicV3; use crate::virtio::VirtioShmRegion; @@ -30,7 +32,7 @@ pub struct FsWorker { mem: GuestMemoryMmap, shm_region: Option, - server: Server, + server: FsImplServer, stop_fd: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, @@ -47,10 +49,19 @@ impl FsWorker { irq_line: Option, mem: GuestMemoryMmap, shm_region: Option, - passthrough_cfg: passthrough::Config, + fs_config: FsImplConfig, stop_fd: EventFd, #[cfg(target_os = "macos")] map_sender: Option>, ) -> Self { + let server = match fs_config { + FsImplConfig::Passthrough(passthrough_cfg) => FsImplServer::new(FsImpl::Passthrough( + PassthroughFs::new(passthrough_cfg).unwrap(), + )), + FsImplConfig::Overlayfs(overlayfs_cfg) => { + FsImplServer::new(FsImpl::Overlayfs(OverlayFs::new(overlayfs_cfg).unwrap())) + } + }; + Self { queues, queue_evts, @@ -58,10 +69,9 @@ impl FsWorker { interrupt_evt, intc, irq_line, - mem, shm_region, - server: Server::new(PassthroughFs::new(passthrough_cfg).unwrap()), + server, stop_fd, #[cfg(target_os = "macos")] map_sender, diff --git a/src/devices/src/virtio/vsock/device.rs b/src/devices/src/virtio/vsock/device.rs index 01df1317d..4386cfd0e 100644 --- a/src/devices/src/virtio/vsock/device.rs +++ b/src/devices/src/virtio/vsock/device.rs @@ -6,11 +6,13 @@ // found in the THIRD-PARTY file. use std::collections::HashMap; +use std::net::Ipv4Addr; use std::path::PathBuf; use std::result; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; +use ipnetwork::Ipv4Network; use utils::byte_order; use utils::eventfd::EventFd; use vm_memory::GuestMemoryMmap; @@ -20,6 +22,7 @@ use super::super::{ ActivateError, ActivateResult, DeviceState, Queue as VirtQueue, VirtioDevice, VsockError, VIRTIO_MMIO_INT_VRING, }; +use super::ip_filter::IpFilterConfig; use super::muxer::VsockMuxer; use super::packet::VsockPacket; use super::{defs, defs::uapi}; @@ -60,6 +63,9 @@ impl Vsock { host_port_map: Option>, queues: Vec, unix_ipc_port_map: Option>, + ip: Option, + subnet: Option, + scope: u8, ) -> super::Result { let mut queue_events = Vec::new(); for _ in 0..queues.len() { @@ -82,6 +88,11 @@ impl Vsock { interrupt_evt.try_clone().unwrap(), interrupt_status.clone(), unix_ipc_port_map, + IpFilterConfig { + ip, + subnet, + scope, + }, ), queue_rx, queue_tx, @@ -104,12 +115,15 @@ impl Vsock { cid: u64, host_port_map: Option>, unix_ipc_port_map: Option>, + ip: Option, + subnet: Option, + reach: u8, ) -> super::Result { let queues: Vec = defs::QUEUE_SIZES .iter() .map(|&max_size| VirtQueue::new(max_size)) .collect(); - Self::with_queues(cid, host_port_map, queues, unix_ipc_port_map) + Self::with_queues(cid, host_port_map, queues, unix_ipc_port_map, ip, subnet, reach) } pub fn id(&self) -> &str { diff --git a/src/devices/src/virtio/vsock/ip_filter.rs b/src/devices/src/virtio/vsock/ip_filter.rs new file mode 100644 index 000000000..94bf39ff6 --- /dev/null +++ b/src/devices/src/virtio/vsock/ip_filter.rs @@ -0,0 +1,93 @@ +use ipnetwork::Ipv4Network; +use std::net::Ipv4Addr; + +//-------------------------------------------------------------------------------------------------- +// Types +//-------------------------------------------------------------------------------------------------- + +/// Configuration for IP-based filtering in the Vsock Muxer. +#[derive(Clone, Debug)] +pub struct IpFilterConfig { + /// Defines the scope of allowed connections/bindings. + /// 0: None (Block all IP communication) + /// 1: Group (Allow within `subnet` if specified, otherwise behaves like scope 0) + /// 2: Public (Allow public IPs, bind only to `ip` if specified) + /// 3: Any (Allow any IP, bind only to `ip` if specified) + pub scope: u8, + + /// If specified, binding/listening is ONLY allowed on this specific IP address + /// (ignored if scope is 0). + pub ip: Option, + + /// The allowed subnet for Scope 1 (Group). Optional - if not provided when scope is 1, + /// all connections will be blocked (same as scope 0). + pub subnet: Option, +} + +//-------------------------------------------------------------------------------------------------- +// Methods +//-------------------------------------------------------------------------------------------------- + +impl IpFilterConfig { + /// Checks if the configuration is logically valid. + pub fn is_valid(&self) -> bool { + match self.scope { + 0 | 1 | 2 | 3 => true, // All valid scopes (subnet is optional for scope 1) + _ => false, // Invalid scope number + } + } + + /// Checks if an IP address is considered private. + /// (Includes loopback, private ranges, link-local, broadcast, documentation, shared CGN) + fn is_private(ip: Ipv4Addr) -> bool { + ip.is_loopback() + || ip.is_private() + || ip.is_link_local() + || ip.is_broadcast() + || ip.is_documentation() + || match ip.octets() { + [100, b, _, _] if b >= 64 && b <= 127 => true, // Shared Address Space (RFC 6598) + _ => false, + } + } + + /// Checks if connecting to a given destination IP is allowed by the filter rules. + pub fn is_allowed_connect(&self, dest_ip: Ipv4Addr) -> bool { + match self.scope { + 0 => false, // Scope 0: Deny all connections + 1 => { + // Scope 1: Group - Allow connection only if dest_ip is within the specified subnet + // If no subnet is specified, behaves like scope 0 (deny all) + self.subnet.map_or(false, |subnet| subnet.contains(dest_ip)) + } + 2 => { + // Scope 2: Public - Allow connection only if dest_ip is NOT private + !Self::is_private(dest_ip) + } + 3 => true, // Scope 3: Any - Allow connection to any IP + _ => false, // Invalid scope + } + } + + /// Checks if binding to a given IP is allowed by the filter rules. + pub fn is_allowed_bind(&self, bind_ip: Ipv4Addr) -> bool { + if self.scope == 0 { + return false; // Scope 0: Deny all binding + } + + // Rule: "if ip specified, only the ip can be bound to or listened on." + if let Some(allowed_bind_ip) = self.ip { + return bind_ip == allowed_bind_ip; + } + + // No specific IP specified, check based on scope rules for the bind_ip itself + match self.scope { + // Scope 1: Group - Allow binding within the subnet if no specific IP given + // If no subnet is specified, behaves like scope 0 (deny all) + 1 => self.subnet.map_or(false, |subnet| subnet.contains(bind_ip)), + // Scope 2 & 3: Any & Public - Allow binding to any IP if no specific IP given + 2 | 3 => true, + _ => false, // Invalid scope (scope 0 already handled) + } + } +} diff --git a/src/devices/src/virtio/vsock/mod.rs b/src/devices/src/virtio/vsock/mod.rs index 49917c5bf..b3916c472 100644 --- a/src/devices/src/virtio/vsock/mod.rs +++ b/src/devices/src/virtio/vsock/mod.rs @@ -19,6 +19,7 @@ mod tcp; mod timesync; mod udp; mod unix; +mod ip_filter; pub use self::defs::uapi::VIRTIO_ID_VSOCK as TYPE_VSOCK; pub use self::device::Vsock; diff --git a/src/devices/src/virtio/vsock/muxer.rs b/src/devices/src/virtio/vsock/muxer.rs index 6d027dcd0..75e947bd8 100644 --- a/src/devices/src/virtio/vsock/muxer.rs +++ b/src/devices/src/virtio/vsock/muxer.rs @@ -27,6 +27,8 @@ use vm_memory::GuestMemoryMmap; use std::net::Ipv4Addr; +use super::ip_filter::IpFilterConfig; + pub type ProxyMap = Arc>>>>; /// A muxer RX queue item. @@ -112,6 +114,7 @@ pub struct VsockMuxer { proxy_map: ProxyMap, reaper_sender: Option>, unix_ipc_port_map: Option>, + ip_filter: IpFilterConfig, } impl VsockMuxer { @@ -121,7 +124,12 @@ impl VsockMuxer { interrupt_evt: EventFd, interrupt_status: Arc, unix_ipc_port_map: Option>, + ip_filter: IpFilterConfig, ) -> Self { + if !ip_filter.is_valid() { + warn!("Invalid IpFilterConfig provided during VsockMuxer creation: {:?}. Scope value must be between 0 and 3.", ip_filter); + } + VsockMuxer { cid, host_port_map, @@ -136,6 +144,7 @@ impl VsockMuxer { proxy_map: Arc::new(RwLock::new(HashMap::new())), reaper_sender: None, unix_ipc_port_map, + ip_filter, } } @@ -321,6 +330,15 @@ impl VsockMuxer { fn process_connect(&self, pkt: &VsockPacket) { debug!("vsock: proxy connect request"); if let Some(req) = pkt.read_connect_req() { + if !self.check_destination_ip(req.addr) { + warn!( + "vsock: connect filtered: connection from guest:{}:{} to host:{} denied by IP filter rules", + pkt.src_cid(), pkt.src_port(), req.addr + ); + self.send_connect_rsp(pkt.src_port(), pkt.dst_port(), -libc::ECONNREFUSED); + return; + } + let id = (req.peer_port as u64) << 32 | defs::TSI_PROXY_PORT as u64; debug!("vsock: proxy connect request: id={}", id); let update = self @@ -333,6 +351,9 @@ impl VsockMuxer { if let Some(update) = update { self.process_proxy_update(id, update); } + } else { + warn!("vsock: could not parse connect request buffer for filtering"); + self.send_connect_rsp(pkt.src_port(), pkt.dst_port(), -libc::EINVAL); } } @@ -354,6 +375,17 @@ impl VsockMuxer { fn process_sendto_addr(&self, pkt: &VsockPacket) { debug!("vsock: new DGRAM sendto addr: src={}", pkt.src_port()); if let Some(req) = pkt.read_sendto_addr() { + if !self.check_destination_ip(req.addr) { + warn!( + "vsock: sendto_addr filtered: send from guest:{}:{} to host:{} denied by IP filter rules", + pkt.src_cid(), pkt.src_port(), req.addr + ); + + // Send error response back to the guest + self.send_sendto_addr_error_rsp(pkt.src_port(), -libc::ECONNREFUSED); + return; + } + let id = (req.peer_port as u64) << 32 | defs::TSI_PROXY_PORT as u64; debug!("vsock: new DGRAM sendto addr: id={}", id); let update = self @@ -380,6 +412,15 @@ impl VsockMuxer { fn process_listen_request(&self, pkt: &VsockPacket) { debug!("vsock: DGRAM listen request: src={}", pkt.src_port()); if let Some(req) = pkt.read_listen_req() { + if !self.check_bind_ip(req.addr) { + warn!( + "vsock: listen filtered: attempt to listen on host:{} from guest:{}:{} denied by IP filter rules", + req.addr, pkt.src_cid(), pkt.src_port() + ); + self.send_listen_rsp(pkt.src_port(), pkt.dst_port(), -libc::EACCES); + return; + } + let id = (req.peer_port as u64) << 32 | defs::TSI_PROXY_PORT as u64; debug!("vsock: DGRAM listen request: id={}", id); let update = self @@ -668,4 +709,82 @@ impl VsockMuxer { } Ok(()) } + + #[inline] + fn check_destination_ip(&self, dest_ip: Ipv4Addr) -> bool { + self.ip_filter.is_allowed_connect(dest_ip) + } + + #[inline] + fn check_bind_ip(&self, bind_ip: Ipv4Addr) -> bool { + self.ip_filter.is_allowed_bind(bind_ip) + } + + // Helper function to send different types of responses back to the guest + fn send_response(&self, rx: MuxerRx) { + // Get references to the needed components + let mem = match self.mem.as_ref() { + Some(m) => m, + None => { + error!("vsock: cannot send response: mem is None"); + return; + } + }; + let queue = match self.queue.as_ref() { + Some(q) => q, + None => { + error!("vsock: cannot send response: queue is None"); + return; + } + }; + + // Send the response to the guest + push_packet(self.cid, rx, &self.rxq, queue, mem); + } + + // Helper function for sending sendto_addr error responses + fn send_sendto_addr_error_rsp(&self, peer_port: u32, result: i32) { + debug!( + "vsock: sending sendto_addr error response: peer_port={}, result={}", + peer_port, result + ); + + // This response goes to the control port (DGRAM) + let rx = MuxerRx::ConnResponse { + local_port: defs::TSI_SENDTO_ADDR, + peer_port, + result, + }; + self.send_response(rx); + } + + fn send_connect_rsp(&self, local_port: u32, peer_port: u32, result: i32) { + debug!( + "vsock: sending connect response: local_port={}, peer_port={}, result={}", + local_port, peer_port, result + ); + + // This response goes to the control port (DGRAM) + let rx = MuxerRx::ConnResponse { + local_port: defs::TSI_CONNECT, // TSI_CONNECT = 1025 + peer_port, + result, + }; + self.send_response(rx); + } + + fn send_listen_rsp(&self, local_port: u32, peer_port: u32, result: i32) { + debug!( + "vsock: sending listen response: local_port={}, peer_port={}, result={}", + local_port, peer_port, result + ); + + // This response goes to the control port (DGRAM) + let rx = MuxerRx::ListenResponse { + local_port: defs::TSI_LISTEN, // TSI_LISTEN = 1029 + peer_port, + result, + }; + self.send_response(rx); + } } diff --git a/src/imago/Cargo.toml b/src/imago/Cargo.toml index 75b041d8c..3f169a9fe 100644 --- a/src/imago/Cargo.toml +++ b/src/imago/Cargo.toml @@ -47,25 +47,20 @@ version = "0.8" features = ["std"] [dependencies.serde] -version = "1.0" -features = ["derive"] +workspace = true [dependencies.tokio] -version = "1" -features = [ - "rt", - "sync", -] +workspace = true [dependencies.tracing] version = "0.1" [dependencies.vm-memory] -version = "0.16" optional = true +workspace = true [build-dependencies.rustc_version] version = "0.4.0" [target."cfg(unix)".dependencies.libc] -version = "0.2" +workspace = true diff --git a/src/kernel/Cargo.toml b/src/kernel/Cargo.toml index e8e96d88c..dc72320b8 100644 --- a/src/kernel/Cargo.toml +++ b/src/kernel/Cargo.toml @@ -4,6 +4,6 @@ version = "0.1.0" edition = "2021" [dependencies] -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true utils = { path = "../utils" } diff --git a/src/libkrun/Cargo.toml b/src/libkrun/Cargo.toml index e12347513..e6ab26106 100644 --- a/src/libkrun/Cargo.toml +++ b/src/libkrun/Cargo.toml @@ -18,9 +18,10 @@ virgl_resource_map2 = [] [dependencies] crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = ">=0.2.39" +libc.workspace = true log = "0.4.0" once_cell = "1.4.1" +ipnetwork = "0.21" devices = { path = "../devices" } polly = { path = "../polly" } diff --git a/src/libkrun/build.rs b/src/libkrun/build.rs index a3ccc2288..936b183aa 100644 --- a/src/libkrun/build.rs +++ b/src/libkrun/build.rs @@ -2,7 +2,7 @@ fn main() { #[cfg(target_os = "macos")] println!("cargo:rustc-link-lib=framework=Hypervisor"); #[cfg(target_os = "macos")] - println!("cargo:rustc-link-search=/opt/homebrew/lib"); + println!("cargo:rustc-link-search=/usr/local/lib"); #[cfg(all(not(feature = "tee"), not(feature = "efi")))] println!("cargo:rustc-link-lib=krunfw"); #[cfg(feature = "tee")] diff --git a/src/libkrun/src/lib.rs b/src/libkrun/src/lib.rs index 9af22c511..a06ccbd24 100644 --- a/src/libkrun/src/lib.rs +++ b/src/libkrun/src/lib.rs @@ -1,6 +1,21 @@ #[macro_use] extern crate log; +use crossbeam_channel::unbounded; +use devices::virtio::fs::FsImplShare; +#[cfg(feature = "net")] +use devices::virtio::net::device::VirtioNetBackend; +#[cfg(feature = "blk")] +use devices::virtio::CacheType; +use env_logger::Env; +#[cfg(target_os = "macos")] +use hvf::MemoryMapping; +use ipnetwork::Ipv4Network; +#[cfg(not(feature = "efi"))] +use libc::size_t; +use libc::{c_char, c_int}; +use once_cell::sync::Lazy; +use polly::event_manager::EventManager; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::convert::TryInto; @@ -8,6 +23,7 @@ use std::env; use std::ffi::CStr; #[cfg(target_os = "linux")] use std::ffi::CString; +use std::net::Ipv4Addr; #[cfg(target_os = "linux")] use std::os::fd::AsRawFd; use std::os::fd::RawFd; @@ -15,23 +31,6 @@ use std::path::PathBuf; use std::slice; use std::sync::atomic::{AtomicI32, Ordering}; use std::sync::Mutex; - -#[cfg(target_os = "macos")] -use crossbeam_channel::unbounded; -#[cfg(feature = "blk")] -use devices::virtio::block::ImageType; -#[cfg(feature = "net")] -use devices::virtio::net::device::VirtioNetBackend; -#[cfg(feature = "blk")] -use devices::virtio::CacheType; -use env_logger::Env; -#[cfg(target_os = "macos")] -use hvf::MemoryMapping; -#[cfg(not(feature = "efi"))] -use libc::size_t; -use libc::{c_char, c_int}; -use once_cell::sync::Lazy; -use polly::event_manager::EventManager; use utils::eventfd::EventFd; use vmm::resources::VmResources; #[cfg(feature = "blk")] @@ -62,6 +61,9 @@ const INIT_PATH: &str = "/init.krun"; #[derive(Default)] struct TsiConfig { port_map: Option>, + ip: Option, + subnet: Option, + scope: u8, } enum NetworkConfig { @@ -404,14 +406,76 @@ pub unsafe extern "C" fn krun_set_root(ctx_id: u32, c_root_path: *const c_char) }; let fs_id = "/dev/root".to_string(); - let shared_dir = root_path.to_string(); + let fs_share = FsImplShare::Passthrough(root_path.to_string()); match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + + // Check if root device is already set + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + cfg.vmr.add_fs_device(FsDeviceConfig { fs_id, - shared_dir, + fs_share, + // Default to a conservative 512 MB window. + shm_size: Some(1 << 29), + }); + } + Entry::Vacant(_) => return -libc::ENOENT, + } + + KRUN_SUCCESS +} + +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +#[cfg(not(feature = "tee"))] +pub unsafe extern "C" fn krun_set_overlayfs_root( + ctx_id: u32, + c_root_layers: *const *const c_char, +) -> i32 { + let mut layers = Vec::new(); + let layers_array: &[*const c_char] = slice::from_raw_parts(c_root_layers, MAX_ARGS); + + for item in layers_array.iter().take(MAX_ARGS) { + if item.is_null() { + break; + } else { + let layer_path = match CStr::from_ptr(*item).to_str() { + Ok(path) => path, + Err(_) => return -libc::EINVAL, + }; + layers.push(PathBuf::from(layer_path)); + } + } + + // Need at least one layer + if layers.is_empty() { + return -libc::EINVAL; + } + + let fs_id = "/dev/root".to_string(); + let fs_share = FsImplShare::Overlayfs(layers); + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + + // Check if root device is already set + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + + cfg.vmr.add_fs_device(FsDeviceConfig { + fs_id, + fs_share, // Default to a conservative 512 MB window. shm_size: Some(1 << 29), }); @@ -442,9 +506,18 @@ pub unsafe extern "C" fn krun_add_virtiofs( match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + + // Check if a device with the same tag already exists + let fs_id = tag.to_string(); + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + cfg.vmr.add_fs_device(FsDeviceConfig { - fs_id: tag.to_string(), - shared_dir: path.to_string(), + fs_id, + fs_share: FsImplShare::Passthrough(path.to_string()), shm_size: None, }); } @@ -475,9 +548,18 @@ pub unsafe extern "C" fn krun_add_virtiofs2( match CTX_MAP.lock().unwrap().entry(ctx_id) { Entry::Occupied(mut ctx_cfg) => { let cfg = ctx_cfg.get_mut(); + + // Check if a device with the same tag already exists + let fs_id = tag.to_string(); + for device in &cfg.vmr.fs { + if device.fs_id == fs_id { + return -libc::EEXIST; + } + } + cfg.vmr.add_fs_device(FsDeviceConfig { - fs_id: tag.to_string(), - shared_dir: path.to_string(), + fs_id, + fs_share: FsImplShare::Passthrough(path.to_string()), shm_size: Some(shm_size.try_into().unwrap()), }); } @@ -752,6 +834,77 @@ pub unsafe extern "C" fn krun_set_port_map(ctx_id: u32, c_port_map: *const *cons KRUN_SUCCESS } +#[allow(clippy::missing_safety_doc)] +#[no_mangle] +pub unsafe extern "C" fn krun_set_tsi_scope( + ctx_id: u32, + c_ip: *const c_char, + c_subnet: *const c_char, + scope: u8, +) -> i32 { + if scope > 3 { + error!("Invalid scope value: {}. Must be 0, 1, 2, or 3.", scope); + return -libc::EINVAL; + } + + let ip = if c_ip.is_null() { + None + } else { + match CStr::from_ptr(c_ip).to_str() { + Ok(s) if !s.is_empty() => { + // Parse IP format directly + match s.parse::() { + Ok(addr) => Some(addr), + Err(_) => { + error!("Invalid IP address format provided: {}", s); + return -libc::EINVAL; + } + } + } + Ok(_) => None, // Treat empty string as None + Err(_) => return -libc::EINVAL, + } + }; + + let subnet = if c_subnet.is_null() { + None + } else { + match CStr::from_ptr(c_subnet).to_str() { + Ok(s) if !s.is_empty() => { + // Parse Subnet format directly + match s.parse::() { + Ok(net) => Some(net), + Err(_) => { + error!("Invalid subnet format provided: {}", s); + return -libc::EINVAL; + } + } + } + Ok(_) => None, // Treat empty string as None + Err(_) => return -libc::EINVAL, + } + }; + + match CTX_MAP.lock().unwrap().entry(ctx_id) { + Entry::Occupied(mut ctx_cfg) => { + let cfg = ctx_cfg.get_mut(); + match &mut cfg.net_cfg { + NetworkConfig::Tsi(tsi_config) => { + tsi_config.ip = ip; + tsi_config.subnet = subnet; + tsi_config.scope = scope; + KRUN_SUCCESS + } + _ => { + error!("krun_set_tsi_scope is only supported for TSI network mode"); + -libc::ENOTSUP + } + } + } + Entry::Vacant(_) => -libc::ENOENT, + } +} + #[allow(clippy::missing_safety_doc)] #[no_mangle] pub unsafe extern "C" fn krun_set_rlimits(ctx_id: u32, c_rlimits: *const *const c_char) -> i32 { @@ -1176,6 +1329,9 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { guest_cid: 3, host_port_map: None, unix_ipc_port_map: None, + ip: None, + subnet: None, + scope: 0, }; if let Some(ref map) = ctx_cfg.unix_ipc_port_map { @@ -1187,6 +1343,9 @@ pub extern "C" fn krun_start_enter(ctx_id: u32) -> i32 { NetworkConfig::Tsi(tsi_cfg) => { vsock_config.host_port_map = tsi_cfg.port_map; vsock_set = true; + vsock_config.ip = tsi_cfg.ip; + vsock_config.subnet = tsi_cfg.subnet; + vsock_config.scope = tsi_cfg.scope; } NetworkConfig::VirtioNetPasst(_fd) => { #[cfg(feature = "net")] diff --git a/src/smbios/Cargo.toml b/src/smbios/Cargo.toml index de9836ec0..863971c62 100644 --- a/src/smbios/Cargo.toml +++ b/src/smbios/Cargo.toml @@ -4,4 +4,4 @@ version = "0.1.0" edition = "2021" [dependencies] -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true diff --git a/src/utils/Cargo.toml b/src/utils/Cargo.toml index e4ecd3420..29b1ef222 100644 --- a/src/utils/Cargo.toml +++ b/src/utils/Cargo.toml @@ -7,6 +7,6 @@ edition = "2021" [dependencies] bitflags = "1.2.0" env_logger = "0.9.0" -libc = ">=0.2.85" +libc.workspace = true log = "0.4.0" -vmm-sys-util = ">=0.11" +vmm-sys-util.workspace = true diff --git a/src/vmm/Cargo.toml b/src/vmm/Cargo.toml index 25ed38d72..04f9b1843 100644 --- a/src/vmm/Cargo.toml +++ b/src/vmm/Cargo.toml @@ -16,9 +16,9 @@ snd = [] [dependencies] crossbeam-channel = "0.5" env_logger = "0.9.0" -libc = ">=0.2.39" +libc.workspace = true log = "0.4.0" -vm-memory = { version = ">=0.13", features = ["backend-mmap"] } +vm-memory.workspace = true arch = { path = "../arch" } devices = { path = "../devices" } @@ -36,16 +36,17 @@ serde_json = { version = "1.0.64", optional = true } sev = { version = "4.0.0", features = ["openssl"], optional = true } curl = { version = "0.4", optional = true } nix = "0.24.1" +ipnetwork = "0.21" [target.'cfg(target_arch = "x86_64")'.dependencies] cpuid = { path = "../cpuid" } [target.'cfg(target_os = "linux")'.dependencies] -kvm-bindings = { version = ">=0.10", features = ["fam-wrappers"] } -kvm-ioctls = ">=0.17" +kvm-bindings.workspace = true +kvm-ioctls.workspace = true [target.'cfg(target_os = "macos")'.dependencies] hvf = { path = "../hvf" } [dev-dependencies] -vmm-sys-util = ">=0.11" +vmm-sys-util.workspace = true diff --git a/src/vmm/src/builder.rs b/src/vmm/src/builder.rs index 8eda82a10..f5a94154e 100644 --- a/src/vmm/src/builder.rs +++ b/src/vmm/src/builder.rs @@ -1203,7 +1203,7 @@ fn attach_fs_devices( for (i, config) in fs_devs.iter().enumerate() { let fs = Arc::new(Mutex::new( - devices::virtio::Fs::new(config.fs_id.clone(), config.shared_dir.clone()).unwrap(), + devices::virtio::Fs::new(config.fs_id.clone(), config.fs_share.clone()).unwrap(), )); let id = format!("{}{}", String::from(fs.lock().unwrap().id()), i); diff --git a/src/vmm/src/linux/vstate.rs b/src/vmm/src/linux/vstate.rs index 777268806..1a2ec92f9 100644 --- a/src/vmm/src/linux/vstate.rs +++ b/src/vmm/src/linux/vstate.rs @@ -864,7 +864,7 @@ impl Vcpu { /// Registers a signal handler which makes use of TLS and kvm immediate exit to /// kick the vcpu running on the current thread, if there is one. pub fn register_kick_signal_handler() { - extern "C" fn handle_signal(_: c_int, _: *mut siginfo_t, _: *mut c_void) { + extern "C" fn handle_signal(_: c_int, _: *mut nix::libc::siginfo_t, _: *mut nix::libc::c_void) { // This is safe because it's temporarily aliasing the `Vcpu` object, but we are // only reading `vcpu.fd` which does not change for the lifetime of the `Vcpu`. unsafe { @@ -1175,9 +1175,11 @@ impl Vcpu { self.fd .set_sregs(&state.sregs) .map_err(Error::VcpuSetSregs)?; - self.fd - .set_xsave(&state.xsave) - .map_err(Error::VcpuSetXsave)?; + unsafe { + self.fd + .set_xsave(&state.xsave) + .map_err(Error::VcpuSetXsave)?; + } self.fd.set_xcrs(&state.xcrs).map_err(Error::VcpuSetXcrs)?; self.fd .set_debug_regs(&state.debug_regs) diff --git a/src/vmm/src/signal_handler.rs b/src/vmm/src/signal_handler.rs index 9a6067bfc..f8c6a0a57 100644 --- a/src/vmm/src/signal_handler.rs +++ b/src/vmm/src/signal_handler.rs @@ -23,7 +23,7 @@ static CONSOLE_SIGINT_FD: AtomicI32 = AtomicI32::new(-1); /// /// Increments the `seccomp.num_faults` metric, logs an error message and terminates the process /// with a specific exit code. -extern "C" fn sigsys_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigsys_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; let si_code = unsafe { (*info).si_code }; @@ -52,7 +52,7 @@ extern "C" fn sigsys_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_v /// Signal handler for `SIGBUS` and `SIGSEGV`. /// /// Logs an error message and terminates the process with a specific exit code. -extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; let si_code = unsafe { (*info).si_code }; @@ -80,7 +80,7 @@ extern "C" fn sigbus_sigsegv_handler(num: c_int, info: *mut siginfo_t, _unused: }; } -extern "C" fn sigwinch_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigwinch_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; @@ -95,7 +95,7 @@ extern "C" fn sigwinch_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c let _ = unsafe { libc::write(console_fd, &val as *const _ as *const c_void, 8) }; } -extern "C" fn sigint_handler(num: c_int, info: *mut siginfo_t, _unused: *mut c_void) { +extern "C" fn sigint_handler(num: c_int, info: *mut nix::libc::siginfo_t, _unused: *mut nix::libc::c_void) { // Safe because we're just reading some fields from a supposedly valid argument. let si_signo = unsafe { (*info).si_signo }; diff --git a/src/vmm/src/vmm_config/fs.rs b/src/vmm/src/vmm_config/fs.rs index cc7995021..f5690f56c 100644 --- a/src/vmm/src/vmm_config/fs.rs +++ b/src/vmm/src/vmm_config/fs.rs @@ -1,6 +1,8 @@ +use devices::virtio::fs::FsImplShare; + #[derive(Clone, Debug)] pub struct FsDeviceConfig { pub fs_id: String, - pub shared_dir: String, + pub fs_share: FsImplShare, pub shm_size: Option, } diff --git a/src/vmm/src/vmm_config/vsock.rs b/src/vmm/src/vmm_config/vsock.rs index 5aafe8582..8b909475f 100644 --- a/src/vmm/src/vmm_config/vsock.rs +++ b/src/vmm/src/vmm_config/vsock.rs @@ -3,9 +3,12 @@ use std::collections::HashMap; use std::fmt; +use std::net::Ipv4Addr; use std::path::PathBuf; use std::sync::{Arc, Mutex}; +use ipnetwork::Ipv4Network; + use devices::virtio::{Vsock, VsockError}; type MutexVsock = Arc>; @@ -30,7 +33,7 @@ type Result = std::result::Result; /// This struct represents the strongly typed equivalent of the json body /// from vsock related requests. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub struct VsockDeviceConfig { /// ID of the vsock device. pub vsock_id: String, @@ -40,6 +43,12 @@ pub struct VsockDeviceConfig { pub host_port_map: Option>, /// An optional map of guest port to host UNIX domain sockets for IPC. pub unix_ipc_port_map: Option>, + /// Optional static IP address for TSI. + pub ip: Option, + /// Optional subnet for TSI. + pub subnet: Option, + /// Scope for TSI (0-3). + pub scope: u8, } struct VsockWrapper { @@ -78,6 +87,9 @@ impl VsockBuilder { u64::from(cfg.guest_cid), cfg.host_port_map, cfg.unix_ipc_port_map, + cfg.ip, + cfg.subnet, + cfg.scope, ) .map_err(VsockConfigError::CreateVsockDevice) } @@ -115,6 +127,9 @@ pub(crate) mod tests { guest_cid: 3, host_port_map: None, unix_ipc_port_map: None, + ip: None, + subnet: None, + scope: 0, } }