From 2620f141202c188f282666e30523eee931977f1f Mon Sep 17 00:00:00 2001 From: mox692 Date: Thu, 8 May 2025 21:53:44 +0900 Subject: [PATCH 01/22] rt: add infrastructure code for io_uring --- .github/workflows/ci.yml | 42 +++++- Cargo.toml | 1 + spellcheck.dic | 6 +- tokio/Cargo.toml | 6 + tokio/src/io/mod.rs | 4 +- tokio/src/io/poll_evented.rs | 2 +- tokio/src/macros/cfg.rs | 111 ++++++++++++++++ tokio/src/runtime/driver.rs | 21 ++- tokio/src/runtime/driver/op.rs | 161 +++++++++++++++++++++++ tokio/src/runtime/io/driver.rs | 85 +++++++++--- tokio/src/runtime/io/driver/uring.rs | 188 +++++++++++++++++++++++++++ tokio/src/runtime/io/mod.rs | 5 +- tokio/src/runtime/io/registration.rs | 2 +- tokio/src/runtime/mod.rs | 2 +- tokio/src/util/linked_list.rs | 2 +- tokio/src/util/mod.rs | 4 +- 16 files changed, 604 insertions(+), 38 deletions(-) create mode 100644 tokio/src/runtime/driver/op.rs create mode 100644 tokio/src/runtime/io/driver/uring.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f6680967ed2..2362d043eab 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -350,6 +350,40 @@ jobs: # the unstable cfg to RustDoc RUSTDOCFLAGS: --cfg tokio_unstable --cfg tokio_taskdump + test-unstable-uring: + name: test tokio full --cfg tokio_unstable_uring + needs: basics + runs-on: ${{ matrix.os }} + strategy: + matrix: + include: + - os: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust ${{ env.rust_stable }} + uses: dtolnay/rust-toolchain@stable + with: + toolchain: ${{ env.rust_stable }} + + - name: Install cargo-nextest + uses: taiki-e/install-action@v2 + with: + tool: cargo-nextest + + - uses: Swatinem/rust-cache@v2 + # Run `tokio` with "unstable" and "taskdump" cfg flags. + - name: test tokio full --cfg tokio_unstable_uring + run: | + set -euxo pipefail + cargo nextest run --all-features + cargo test --doc --all-features + working-directory: tokio + env: + RUSTFLAGS: --cfg tokio_unstable_uring -Dwarnings + # in order to run doctests for unstable features, we must also pass + # the unstable cfg to RustDoc + RUSTDOCFLAGS: --cfg tokio_unstable_uring + check-unstable-mt-counters: name: check tokio full --internal-mt-counters needs: basics @@ -702,7 +736,7 @@ jobs: # Try with unstable feature flags - { name: "--unstable", rustflags: "--cfg tokio_unstable -Dwarnings" } # Try with unstable and taskdump feature flags - - { name: "--unstable --taskdump", rustflags: "--cfg tokio_unstable -Dwarnings --cfg tokio_taskdump" } + - { name: "--unstable --taskdump", rustflags: "--cfg tokio_unstable -Dwarnings --cfg tokio_taskdump --cfg tokio_unstable_uring" } steps: - uses: actions/checkout@v4 - name: Install Rust ${{ env.rust_nightly }} @@ -765,7 +799,7 @@ jobs: cargo hack check --all-features --ignore-private - name: "check --all-features --unstable -Z minimal-versions" env: - RUSTFLAGS: --cfg tokio_unstable --cfg tokio_taskdump -Dwarnings + RUSTFLAGS: --cfg tokio_unstable --cfg tokio_taskdump --cfg tokio_unstable_uring -Dwarnings run: | # Remove dev-dependencies from Cargo.toml to prevent the next `cargo update` # from determining minimal versions based on dev-dependencies. @@ -817,8 +851,8 @@ jobs: run: - os: windows-latest - os: ubuntu-latest - RUSTFLAGS: --cfg tokio_taskdump - RUSTDOCFLAGS: --cfg tokio_taskdump + RUSTFLAGS: --cfg tokio_taskdump --cfg tokio_unstable_uring + RUSTDOCFLAGS: --cfg tokio_taskdump --cfg tokio_unstable_uring steps: - uses: actions/checkout@v4 diff --git a/Cargo.toml b/Cargo.toml index 618b310e32c..f6c04346deb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,4 +29,5 @@ unexpected_cfgs = { level = "warn", check-cfg = [ 'cfg(tokio_no_tuning_tests)', 'cfg(tokio_taskdump)', 'cfg(tokio_unstable)', + 'cfg(tokio_unstable_uring)', ] } diff --git a/spellcheck.dic b/spellcheck.dic index 39b98a7ee86..a8140e3b7e2 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -302 +306 & + < @@ -70,6 +70,9 @@ connectionless coroutines cpu cpus +cqe +CQE +cqe's customizable Customizable datagram @@ -287,6 +290,7 @@ unsets Unsets unsynchronized untrusted +uring usecases Valgrind Varghese diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index 44eb58d40ed..921b63dcce9 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -103,6 +103,12 @@ socket2 = { version = "0.5.5", optional = true, features = ["all"] } [target.'cfg(tokio_unstable)'.dependencies] tracing = { version = "0.1.29", default-features = false, features = ["std"], optional = true } # Not in full +[target.'cfg(all(tokio_unstable_uring, target_os = "linux"))'.dependencies] +io-uring = { version = "0.7.6", default-features = false } +libc = { version = "0.2.168" } +mio = { version = "1.0.1", default-features = false, features = ["os-poll", "os-ext"] } +slab = "0.4.9" + # Currently unstable. The API exposed by these features may be broken at any time. # Requires `--cfg tokio_unstable` to enable. [target.'cfg(tokio_taskdump)'.dependencies] diff --git a/tokio/src/io/mod.rs b/tokio/src/io/mod.rs index dc2c4309e66..6178188324f 100644 --- a/tokio/src/io/mod.rs +++ b/tokio/src/io/mod.rs @@ -214,11 +214,11 @@ pub use self::read_buf::ReadBuf; #[doc(no_inline)] pub use std::io::{Error, ErrorKind, Result, SeekFrom}; -cfg_io_driver_impl! { +cfg_io_driver_impl_or_uring! { pub(crate) mod interest; pub(crate) mod ready; - cfg_net! { + cfg_net_or_uring! { pub use interest::Interest; pub use ready::Ready; } diff --git a/tokio/src/io/poll_evented.rs b/tokio/src/io/poll_evented.rs index 0e34fbe3c22..8fabf9d6964 100644 --- a/tokio/src/io/poll_evented.rs +++ b/tokio/src/io/poll_evented.rs @@ -9,7 +9,7 @@ use std::ops::Deref; use std::panic::{RefUnwindSafe, UnwindSafe}; use std::task::ready; -cfg_io_driver! { +cfg_io_driver_or_uring! { /// Associates an I/O resource that implements the [`std::io::Read`] and/or /// [`std::io::Write`] traits with the reactor that drives it. /// diff --git a/tokio/src/macros/cfg.rs b/tokio/src/macros/cfg.rs index c9bd644bd02..d01fac65c3f 100644 --- a/tokio/src/macros/cfg.rs +++ b/tokio/src/macros/cfg.rs @@ -131,6 +131,36 @@ macro_rules! cfg_io_driver { } } +macro_rules! cfg_io_driver_or_uring { + ($($item:item)*) => { + $( + #[cfg(any( + feature = "net", + all(unix, feature = "process"), + all(unix, feature = "signal"), + all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ) + ))] + #[cfg_attr(docsrs, doc(cfg(any( + feature = "net", + all(unix, feature = "process"), + all(unix, feature = "signal"), + all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ) + ))))] + $item + )* + } +} + macro_rules! cfg_io_driver_impl { ( $( $item:item )* ) => { $( @@ -157,6 +187,44 @@ macro_rules! cfg_not_io_driver { } } +macro_rules! cfg_io_driver_impl_or_uring { + ( $( $item:item )* ) => { + $( + #[cfg(any( + feature = "net", + all(unix, feature = "process"), + all(unix, feature = "signal"), + all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ) + ))] + $item + )* + } +} + +macro_rules! cfg_not_io_driver_impl_or_uring { + ( $( $item:item )* ) => { + $( + #[cfg(not(any( + feature = "net", + all(unix, feature = "process"), + all(unix, feature = "signal"), + all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ) + )))] + $item + )* + } +} + macro_rules! cfg_io_readiness { ($($item:item)*) => { $( @@ -279,6 +347,35 @@ macro_rules! cfg_net { } } +macro_rules! cfg_net_or_uring { + ($($item:item)*) => { + $( + #[cfg(any( + feature = "net", + all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ) + ))] + #[cfg_attr( + docsrs, + doc(cfg(any( + feature = "net", + all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ) + ))) + )] + $item + )* + } +} + macro_rules! cfg_net_unix { ($($item:item)*) => { $( @@ -616,3 +713,17 @@ macro_rules! cfg_metrics_variant { } } } + +macro_rules! cfg_tokio_unstable_uring { + ($($item:item)*) => { + $( + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + $item + )* + }; +} diff --git a/tokio/src/runtime/driver.rs b/tokio/src/runtime/driver.rs index 3b84a86699b..7f1fe4cf4c5 100644 --- a/tokio/src/runtime/driver.rs +++ b/tokio/src/runtime/driver.rs @@ -84,7 +84,7 @@ impl Handle { self.io.unpark(); } - cfg_io_driver! { + cfg_io_driver_or_uring! { #[track_caller] pub(crate) fn io(&self) -> &crate::runtime::io::Handle { self.io @@ -121,7 +121,7 @@ impl Handle { // ===== io driver ===== -cfg_io_driver! { +cfg_io_driver_impl_or_uring! { pub(crate) type IoDriver = crate::runtime::io::Driver; #[derive(Debug)] @@ -196,7 +196,7 @@ cfg_io_driver! { } } -cfg_not_io_driver! { +cfg_not_io_driver_impl_or_uring! { pub(crate) type IoHandle = UnparkThread; #[derive(Debug)] @@ -226,6 +226,13 @@ cfg_not_io_driver! { false } } + cfg_io_driver_or_uring! { + impl IoHandle { + pub(crate) fn as_ref(&self) -> Option<&crate::runtime::io::Handle> { + todo!() + } + } + } } // ===== signal driver ===== @@ -244,7 +251,7 @@ cfg_signal_internal_and_unix! { cfg_not_signal_internal! { pub(crate) type SignalHandle = (); - cfg_io_driver! { + cfg_io_driver_impl_or_uring! { type SignalDriver = IoDriver; fn create_signal_driver(io_driver: IoDriver, _io_handle: &crate::runtime::io::Handle) -> io::Result<(SignalDriver, SignalHandle)> { @@ -264,7 +271,7 @@ cfg_process_driver! { } cfg_not_process_driver! { - cfg_io_driver! { + cfg_io_driver_impl_or_uring! { type ProcessDriver = SignalDriver; fn create_process_driver(signal_driver: SignalDriver) -> ProcessDriver { @@ -347,3 +354,7 @@ cfg_not_time! { (io_stack, ()) } } + +cfg_tokio_unstable_uring! { + pub(crate) mod op; +} diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs new file mode 100644 index 00000000000..023a85c79fe --- /dev/null +++ b/tokio/src/runtime/driver/op.rs @@ -0,0 +1,161 @@ +use crate::runtime::Handle; +use io_uring::cqueue; +use io_uring::squeue::Entry; +use std::future::Future; +use std::pin::Pin; +use std::task::Context; +use std::task::Poll; +use std::task::Waker; +use std::{io, mem}; + +#[derive(Debug)] +pub(crate) enum Lifecycle { + /// The operation has been submitted to uring and is currently in-flight + Submitted, + + /// The submitter is waiting for the completion of the operation + Waiting(Waker), + + /// The submitter no longer has interest in the operation result. The state + /// must be passed to the driver and held until the operation completes. + Cancelled(#[allow(unused)] Box), + + /// The operation has completed with a single cqe result + Completed(io_uring::cqueue::Entry), +} + +pub(crate) enum State { + #[allow(dead_code)] + Initialize(Option), + Polled(usize), + Complete, +} + +pub(crate) struct Op { + // State of this Op + state: State, + // Per operation data. + data: Option, +} + +impl Op { + /// # Safety + /// + /// Callers must ensure that parameters of the entry (such as buffer) are valid and will + /// be valid for the entire duration of the operation, otherwise it may cause memory problems. + #[allow(dead_code)] + pub(crate) unsafe fn new(entry: Entry, data: T) -> Self { + Self { + data: Some(data), + state: State::Initialize(Some(entry)), + } + } + pub(crate) fn take_data(&mut self) -> Option { + self.data.take() + } +} + +impl Drop for Op { + fn drop(&mut self) { + match self.state { + // We've already dropped this Op. + State::Complete => (), + // We will cancel this Op. + State::Polled(index) => { + let handle = Handle::current(); + handle.inner.driver().io().cancel_op(self, index); + } + // This Op has not been polled yet. + // We don't need to do anything here. + State::Initialize(_) => (), + } + } +} + +/// A single CQE result +pub(crate) struct CqeResult { + #[allow(dead_code)] + pub(crate) result: io::Result, +} + +impl From for CqeResult { + fn from(cqe: cqueue::Entry) -> Self { + let res = cqe.result(); + let result = if res >= 0 { + Ok(res as u32) + } else { + Err(io::Error::from_raw_os_error(-res)) + }; + CqeResult { result } + } +} + +/// A trait that converts a CQE result into a usable value for each operation. +pub(crate) trait Completable { + type Output; + fn complete(self, cqe: CqeResult) -> io::Result; +} + +impl Future for Op { + type Output = io::Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let this = self.get_mut(); + let handle = Handle::current(); + let driver = &handle.inner.driver().io(); + + match &mut this.state { + State::Initialize(entry_opt) => { + let entry = entry_opt.take().expect("Entry must be present"); + let waker = cx.waker().clone(); + // SAFETY: entry is valid for the entire duration of the operation + let idx = unsafe { driver.register_op(entry, waker)? }; + this.state = State::Polled(idx); + Poll::Pending + } + + State::Polled(idx) => { + let mut ctx = driver.get_uring().lock(); + let lifecycle = ctx.ops.get_mut(*idx).expect("Lifecycle must be present"); + + match mem::replace(lifecycle, Lifecycle::Submitted) { + // Only replace the stored waker if it wouldn't wake the new one + Lifecycle::Waiting(prev) if !prev.will_wake(cx.waker()) => { + let waker = cx.waker().clone(); + *lifecycle = Lifecycle::Waiting(waker); + Poll::Pending + } + + Lifecycle::Waiting(prev) => { + *lifecycle = Lifecycle::Waiting(prev); + Poll::Pending + } + + Lifecycle::Completed(cqe) => { + // Clean up and complete the future + ctx.remove_op(*idx); + + this.state = State::Complete; + + let data = this + .take_data() + .expect("Data must be present on completion"); + Poll::Ready(data.complete(cqe.into())) + } + + Lifecycle::Submitted => { + unreachable!("Submitted lifecycle should never be seen here"); + } + + Lifecycle::Cancelled(_) => { + unreachable!("Cancelled lifecycle should never be seen here"); + } + } + } + + State::Complete => { + panic!("Future polled after completion"); + } + } + } +} diff --git a/tokio/src/runtime/io/driver.rs b/tokio/src/runtime/io/driver.rs index 1139cbf580c..76ced77a90d 100644 --- a/tokio/src/runtime/io/driver.rs +++ b/tokio/src/runtime/io/driver.rs @@ -2,6 +2,10 @@ cfg_signal_internal_and_unix! { mod signal; } +cfg_tokio_unstable_uring! { + mod uring; + use uring::UringContext; +} use crate::io::interest::Interest; use crate::io::ready::Ready; @@ -45,6 +49,14 @@ pub(crate) struct Handle { waker: mio::Waker, pub(crate) metrics: IoDriverMetrics, + + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + pub(crate) uring_context: Mutex, } #[derive(Debug)] @@ -79,6 +91,9 @@ pub(super) enum Tick { const TOKEN_WAKEUP: mio::Token = mio::Token(0); const TOKEN_SIGNAL: mio::Token = mio::Token(1); +cfg_tokio_unstable_uring! { + pub(crate) const TOKEN_URING: mio::Token = mio::Token(2); +} fn _assert_kinds() { fn _assert() {} @@ -112,8 +127,25 @@ impl Driver { #[cfg(not(target_os = "wasi"))] waker, metrics: IoDriverMetrics::default(), + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + uring_context: Mutex::new(UringContext::new()), }; + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + { + handle.add_uring_source(Interest::READABLE)?; + } + Ok((driver, handle)) } @@ -162,25 +194,40 @@ impl Driver { for event in events.iter() { let token = event.token(); - if token == TOKEN_WAKEUP { - // Nothing to do, the event is used to unblock the I/O driver - } else if token == TOKEN_SIGNAL { - self.signal_ready = true; - } else { - let ready = Ready::from_mio(event); - let ptr = super::EXPOSE_IO.from_exposed_addr(token.0); - - // Safety: we ensure that the pointers used as tokens are not freed - // until they are both deregistered from mio **and** we know the I/O - // driver is not concurrently polling. The I/O driver holds ownership of - // an `Arc` so we can safely cast this to a ref. - let io: &ScheduledIo = unsafe { &*ptr }; - - io.set_readiness(Tick::Set, |curr| curr | ready); - io.wake(ready); - - ready_count += 1; - } + match token { + TOKEN_WAKEUP => { + // Nothing to do, the event is used to unblock the I/O driver + } + TOKEN_SIGNAL => { + self.signal_ready = true; + } + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + TOKEN_URING => { + let mut guard = handle.get_uring().lock(); + let ctx = &mut *guard; + ctx.dispatch_completions(); + } + _ => { + let ready = Ready::from_mio(event); + let ptr = super::EXPOSE_IO.from_exposed_addr(token.0); + + // Safety: we ensure that the pointers used as tokens are not freed + // until they are both deregistered from mio **and** we know the I/O + // driver is not concurrently polling. The I/O driver holds ownership of + // an `Arc` so we can safely cast this to a ref. + let io: &ScheduledIo = unsafe { &*ptr }; + + io.set_readiness(Tick::Set, |curr| curr | ready); + io.wake(ready); + + ready_count += 1; + } + }; } handle.metrics.incr_ready_count_by(ready_count); diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs new file mode 100644 index 00000000000..ce122aab6d6 --- /dev/null +++ b/tokio/src/runtime/io/driver/uring.rs @@ -0,0 +1,188 @@ +use io_uring::{squeue::Entry, IoUring}; +use mio::unix::SourceFd; +use slab::Slab; + +use crate::runtime::driver::op::{Lifecycle, Op}; +use crate::{io::Interest, loom::sync::Mutex}; + +use super::{Handle, TOKEN_URING}; + +use std::os::fd::AsRawFd; +use std::{io, mem, task::Waker}; + +const DEFAULT_RING_SIZE: u32 = 256; + +pub(crate) struct UringContext { + pub(crate) uring: io_uring::IoUring, + pub(crate) ops: slab::Slab, +} + +impl UringContext { + pub(crate) fn new() -> Self { + Self { + ops: Slab::new(), + // TODO: make configurable + uring: IoUring::new(DEFAULT_RING_SIZE).unwrap(), + } + } + + pub(crate) fn dispatch_completions(&mut self) { + let ops = &mut self.ops; + let cq = self.uring.completion(); + + for cqe in cq { + let idx = cqe.user_data() as usize; + + match ops.get_mut(idx) { + Some(Lifecycle::Waiting(waker)) => { + waker.wake_by_ref(); + *ops.get_mut(idx).unwrap() = Lifecycle::Completed(cqe); + } + Some(Lifecycle::Cancelled(_)) => { + // Op future was cancelled, so we discard the result. + // We just remove the entry from the slab. + ops.remove(idx); + } + Some(other) => { + panic!("unexpected lifecycle for slot {}: {:?}", idx, other); + } + None => { + panic!("no op at index {}", idx); + } + } + } + + // `cq`'s drop gets called here, updating the latest head pointer + } + + pub(crate) fn submit(&mut self) -> io::Result<()> { + loop { + // Errors from io_uring_enter: https://man7.org/linux/man-pages/man2/io_uring_enter.2.html#ERRORS + match self.uring.submit() { + Ok(_) => { + return Ok(()); + } + + // If the submission queue is full, we dispatch completions and try again. + Err(ref e) if e.raw_os_error() == Some(libc::EBUSY) => { + self.dispatch_completions(); + } + // For other errors, we currently return the error as is. + Err(e) => { + return Err(e); + } + } + } + } + + pub(crate) fn remove_op(&mut self, index: usize) -> Lifecycle { + self.ops.remove(index) + } +} + +/// Drop the driver, cancelling any in-progress ops and waiting for them to terminate. +impl Drop for UringContext { + fn drop(&mut self) { + // Make sure we flush the submission queue before dropping the driver. + while !self.uring.submission().is_empty() { + self.submit().expect("Internal error when dropping driver"); + } + + let mut cancel_ops = Slab::new(); + let mut keys_to_move = Vec::new(); + + for (key, lifecycle) in self.ops.iter() { + match lifecycle { + Lifecycle::Waiting(_) | Lifecycle::Submitted | Lifecycle::Cancelled(_) => { + // these should be cancelled + keys_to_move.push(key); + } + // We don't wait for completed ops. + Lifecycle::Completed(_) => {} + } + } + + for key in keys_to_move { + let lifecycle = self.remove_op(key); + cancel_ops.insert(lifecycle); + } + + while !cancel_ops.is_empty() { + // Wait until at least one completion is available. + self.uring + .submit_and_wait(1) + .expect("Internal error when dropping driver"); + + for cqe in self.uring.completion() { + let idx = cqe.user_data() as usize; + cancel_ops.remove(idx); + } + } + } +} + +impl Handle { + #[allow(dead_code)] + pub(crate) fn add_uring_source(&self, interest: Interest) -> io::Result<()> { + // setup for io_uring + let uringfd = self.get_uring().lock().uring.as_raw_fd(); + let mut source = SourceFd(&uringfd); + self.registry + .register(&mut source, TOKEN_URING, interest.to_mio()) + } + + pub(crate) fn get_uring(&self) -> &Mutex { + &self.uring_context + } + + /// # Safety + /// + /// Callers must ensure that parameters of the entry (such as buffer) are valid and will + /// be valid for the entire duration of the operation, otherwise it may cause memory problems. + pub(crate) unsafe fn register_op(&self, entry: Entry, waker: Waker) -> io::Result { + let mut guard = self.get_uring().lock(); + let ctx = &mut *guard; + let index = ctx.ops.insert(Lifecycle::Waiting(waker)); + let entry = entry.user_data(index as u64); + + let submit_or_remove = |ctx: &mut UringContext| -> io::Result<()> { + if let Err(e) = ctx.submit() { + // Submission failed, remove the entry from the slab and return the error + ctx.remove_op(index); + return Err(e); + } + Ok(()) + }; + + // SAFETY: entry is valid for the entire duration of the operation + while unsafe { ctx.uring.submission().push(&entry).is_err() } { + // If the submission queue is full, flush it to the kernel + submit_or_remove(ctx)?; + } + + // Note: For now, we submit the entry immediately without utilizing batching. + submit_or_remove(ctx)?; + + Ok(index) + } + + pub(crate) fn cancel_op(&self, op: &mut Op, index: usize) { + let mut guard = self.get_uring().lock(); + let ctx = &mut *guard; + let ops = &mut ctx.ops; + let Some(lifecycle) = ops.get_mut(index) else { + // The corresponding index doesn't exist anymore, so this Op is already complete. + return; + }; + + // This Op will be cancelled. Here, we don't remove the lifecycle from the slab to keep + // uring data alive until the operation completes. + + match mem::replace(lifecycle, Lifecycle::Cancelled(Box::new(op.take_data()))) { + Lifecycle::Submitted | Lifecycle::Waiting(_) => (), + // The driver saw the completion, but it was never polled. + Lifecycle::Completed(_) => (), + prev => panic!("Unexpected state: {:?}", prev), + }; + } +} diff --git a/tokio/src/runtime/io/mod.rs b/tokio/src/runtime/io/mod.rs index 404359bf528..0dc30d70766 100644 --- a/tokio/src/runtime/io/mod.rs +++ b/tokio/src/runtime/io/mod.rs @@ -1,4 +1,7 @@ -#![cfg_attr(not(all(feature = "rt", feature = "net")), allow(dead_code))] +#![cfg_attr( + not(all(feature = "rt", feature = "net", tokio_unstable_uring)), + allow(dead_code) +)] mod driver; use driver::{Direction, Tick}; pub(crate) use driver::{Driver, Handle, ReadyEvent}; diff --git a/tokio/src/runtime/io/registration.rs b/tokio/src/runtime/io/registration.rs index c6e4e32cb71..b542a73cfd7 100644 --- a/tokio/src/runtime/io/registration.rs +++ b/tokio/src/runtime/io/registration.rs @@ -9,7 +9,7 @@ use std::io; use std::sync::Arc; use std::task::{ready, Context, Poll}; -cfg_io_driver! { +cfg_io_driver_impl_or_uring! { /// Associates an I/O resource with the reactor instance that drives it. /// /// A registration represents an I/O resource registered with a Reactor such diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index 78a0114f48e..e450f5a0fca 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -327,7 +327,7 @@ mod driver; pub(crate) mod scheduler; -cfg_io_driver_impl! { +cfg_io_driver_or_uring! { pub(crate) mod io; } diff --git a/tokio/src/util/linked_list.rs b/tokio/src/util/linked_list.rs index 3650f87fbb0..36a6ac556c9 100644 --- a/tokio/src/util/linked_list.rs +++ b/tokio/src/util/linked_list.rs @@ -263,7 +263,7 @@ impl Default for LinkedList { // ===== impl DrainFilter ===== -cfg_io_driver_impl! { +cfg_io_driver_impl_or_uring! { pub(crate) struct DrainFilter<'a, T: Link, F> { list: &'a mut LinkedList, filter: F, diff --git a/tokio/src/util/mod.rs b/tokio/src/util/mod.rs index b57c6acfe97..9c634ee76a2 100644 --- a/tokio/src/util/mod.rs +++ b/tokio/src/util/mod.rs @@ -1,4 +1,4 @@ -cfg_io_driver! { +cfg_io_driver_impl_or_uring! { pub(crate) mod bit; } @@ -96,7 +96,7 @@ pub(crate) mod markers; pub(crate) mod cacheline; -cfg_io_driver_impl! { +cfg_io_driver_impl_or_uring! { pub(crate) mod ptr_expose; } From 437e414c7e5b4612c68342d32b06340b579f40ce Mon Sep 17 00:00:00 2001 From: Motoyuki Kimura Date: Thu, 8 May 2025 21:55:29 +0900 Subject: [PATCH 02/22] fs: add io_uring `open` operation --- tokio/src/fs/open_options.rs | 104 +++++++++++++--- .../src/fs/open_options/uring_open_options.rs | 103 ++++++++++++++++ tokio/src/io/mod.rs | 4 + tokio/src/io/uring/mod.rs | 2 + tokio/src/io/uring/open.rs | 44 +++++++ tokio/src/io/uring/utils.rs | 6 + tokio/src/macros/cfg.rs | 14 +++ tokio/src/runtime/builder.rs | 33 +++++ tokio/src/runtime/mod.rs | 2 +- tokio/tests/fs_open_options.rs | 4 +- tokio/tests/fs_uring.rs | 116 ++++++++++++++++++ 11 files changed, 413 insertions(+), 19 deletions(-) create mode 100644 tokio/src/fs/open_options/uring_open_options.rs create mode 100644 tokio/src/io/uring/mod.rs create mode 100644 tokio/src/io/uring/open.rs create mode 100644 tokio/src/io/uring/utils.rs create mode 100644 tokio/tests/fs_uring.rs diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index e70e6aa0b6f..3ebc20fa6d4 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -1,8 +1,14 @@ -use crate::fs::{asyncify, File}; +use crate::fs::File; use std::io; use std::path::Path; +cfg_tokio_unstable_uring! { + mod uring_open_options; + use uring_open_options::UringOpenOptions; + use crate::runtime::driver::op::Op; +} + #[cfg(test)] mod mock_open_options; #[cfg(test)] @@ -10,8 +16,12 @@ use mock_open_options::MockOpenOptions as StdOpenOptions; #[cfg(not(test))] use std::fs::OpenOptions as StdOpenOptions; -#[cfg(unix)] -use std::os::unix::fs::OpenOptionsExt; +cfg_not_tokio_unstable_uring! { + #[cfg(unix)] + use std::os::unix::fs::OpenOptionsExt; + use crate::fs::asyncify; +} + #[cfg(windows)] use std::os::windows::fs::OpenOptionsExt; @@ -79,7 +89,22 @@ use std::os::windows::fs::OpenOptionsExt; /// } /// ``` #[derive(Clone, Debug)] -pub struct OpenOptions(StdOpenOptions); +pub struct OpenOptions( + #[cfg(not(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + )))] + StdOpenOptions, + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + pub(crate) UringOpenOptions, +); impl OpenOptions { /// Creates a blank new set of options ready for configuration. @@ -99,7 +124,22 @@ impl OpenOptions { /// let future = options.read(true).open("foo.txt"); /// ``` pub fn new() -> OpenOptions { - OpenOptions(StdOpenOptions::new()) + OpenOptions( + #[cfg(not(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + )))] + StdOpenOptions::new(), + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + UringOpenOptions::new(), + ) } /// Sets the option for read access. @@ -386,17 +426,35 @@ impl OpenOptions { /// [`Other`]: std::io::ErrorKind::Other /// [`PermissionDenied`]: std::io::ErrorKind::PermissionDenied pub async fn open(&self, path: impl AsRef) -> io::Result { - let path = path.as_ref().to_owned(); - let opts = self.0.clone(); + self.open_inner(path).await + } - let std = asyncify(move || opts.open(path)).await?; - Ok(File::from_std(std)) + cfg_not_tokio_unstable_uring! { + async fn open_inner(&self, path: impl AsRef) -> io::Result { + let path = path.as_ref().to_owned(); + let opts = self.0.clone(); + + let std = asyncify(move || opts.open(path)).await?; + Ok(File::from_std(std)) + } + + /// Returns a mutable reference to the underlying `std::fs::OpenOptions` + #[cfg(any(windows, unix))] + pub(super) fn as_inner_mut(&mut self) -> &mut StdOpenOptions { + &mut self.0 + } } - /// Returns a mutable reference to the underlying `std::fs::OpenOptions` - #[cfg(any(windows, unix))] - pub(super) fn as_inner_mut(&mut self) -> &mut StdOpenOptions { - &mut self.0 + cfg_tokio_unstable_uring! { + async fn open_inner(&self, path: impl AsRef) -> io::Result { + Op::open(path.as_ref(), self)?.await + } + + /// Returns a mutable reference to the underlying `std::fs::OpenOptions` + #[cfg(any(windows, unix))] + pub(super) fn as_inner_mut(&mut self) -> &mut UringOpenOptions { + &mut self.0 + } } } @@ -649,9 +707,23 @@ cfg_windows! { } } -impl From for OpenOptions { - fn from(options: StdOpenOptions) -> OpenOptions { - OpenOptions(options) +cfg_not_tokio_unstable_uring! { + impl From for OpenOptions { + fn from(options: StdOpenOptions) -> OpenOptions { + OpenOptions(options) + } + } +} + +cfg_tokio_unstable_uring! { + impl From for OpenOptions { + fn from(_options: StdOpenOptions) -> OpenOptions { + // It's not straitforward to convert from std's OpenOptions to io_uring's one. + // * https://github.com/rust-lang/rust/issues/74943 + // * https://github.com/rust-lang/rust/issues/76801 + + panic!("Conversion from std's OpenOptions to io_uring's one is not supported") + } } } diff --git a/tokio/src/fs/open_options/uring_open_options.rs b/tokio/src/fs/open_options/uring_open_options.rs new file mode 100644 index 00000000000..98b47a76b66 --- /dev/null +++ b/tokio/src/fs/open_options/uring_open_options.rs @@ -0,0 +1,103 @@ +use std::io; + +#[derive(Debug, Clone)] +pub(crate) struct UringOpenOptions { + pub(crate) read: bool, + pub(crate) write: bool, + pub(crate) append: bool, + pub(crate) truncate: bool, + pub(crate) create: bool, + pub(crate) create_new: bool, + pub(crate) mode: libc::mode_t, + pub(crate) custom_flags: libc::c_int, +} + +impl UringOpenOptions { + pub(crate) fn new() -> Self { + Self { + read: false, + write: false, + append: false, + truncate: false, + create: false, + create_new: false, + mode: 0o666, + custom_flags: 0, + } + } + + pub(crate) fn append(&mut self, append: bool) -> &mut Self { + self.append = append; + self + } + + pub(crate) fn create(&mut self, create: bool) -> &mut Self { + self.create = create; + self + } + + pub(crate) fn create_new(&mut self, create_new: bool) -> &mut Self { + self.create_new = create_new; + self + } + + pub(crate) fn read(&mut self, read: bool) -> &mut Self { + self.read = read; + self + } + + pub(crate) fn write(&mut self, write: bool) -> &mut Self { + self.write = write; + self + } + + pub(crate) fn truncate(&mut self, truncate: bool) -> &mut Self { + self.truncate = truncate; + self + } + + pub(crate) fn mode(&mut self, mode: u32) -> &mut Self { + self.mode = mode as libc::mode_t; + self + } + + pub(crate) fn custom_flags(&mut self, flags: i32) -> &mut Self { + self.custom_flags = flags; + self + } + + pub(crate) fn access_mode(&self) -> io::Result { + match (self.read, self.write, self.append) { + (true, false, false) => Ok(libc::O_RDONLY), + (false, true, false) => Ok(libc::O_WRONLY), + (true, true, false) => Ok(libc::O_RDWR), + (false, _, true) => Ok(libc::O_WRONLY | libc::O_APPEND), + (true, _, true) => Ok(libc::O_RDWR | libc::O_APPEND), + (false, false, false) => Err(io::Error::from_raw_os_error(libc::EINVAL)), + } + } + + pub(crate) fn creation_mode(&self) -> io::Result { + match (self.write, self.append) { + (true, false) => {} + (false, false) => { + if self.truncate || self.create || self.create_new { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + } + (_, true) => { + if self.truncate && !self.create_new { + return Err(io::Error::from_raw_os_error(libc::EINVAL)); + } + } + } + + Ok(match (self.create, self.truncate, self.create_new) { + (false, false, false) => 0, + (true, false, false) => libc::O_CREAT, + (false, true, false) => libc::O_TRUNC, + (true, true, false) => libc::O_CREAT | libc::O_TRUNC, + (_, _, true) => libc::O_CREAT | libc::O_EXCL, + }) + } +} diff --git a/tokio/src/io/mod.rs b/tokio/src/io/mod.rs index 6178188324f..839e8b42061 100644 --- a/tokio/src/io/mod.rs +++ b/tokio/src/io/mod.rs @@ -293,3 +293,7 @@ cfg_io_blocking! { pub(crate) use crate::blocking::JoinHandle as Blocking; } } + +cfg_tokio_unstable_uring! { + pub(crate) mod uring; +} diff --git a/tokio/src/io/uring/mod.rs b/tokio/src/io/uring/mod.rs new file mode 100644 index 00000000000..e5ac85af604 --- /dev/null +++ b/tokio/src/io/uring/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod open; +pub(crate) mod utils; diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs new file mode 100644 index 00000000000..d9e88d0c642 --- /dev/null +++ b/tokio/src/io/uring/open.rs @@ -0,0 +1,44 @@ +use super::utils::cstr; +use crate::{ + fs::OpenOptions, + runtime::driver::op::{Completable, CqeResult, Op}, +}; +use io_uring::{opcode, types}; +use std::{ffi::CString, io, os::fd::FromRawFd, path::Path}; + +pub(crate) struct Open { + #[allow(dead_code)] + path: CString, +} + +impl Completable for Open { + type Output = crate::fs::File; + fn complete(self, cqe: CqeResult) -> io::Result { + let fd = cqe.result? as i32; + let file = unsafe { crate::fs::File::from_raw_fd(fd) }; + Ok(file) + } +} + +impl Op { + /// Submit a request to open a file. + pub(crate) fn open(path: &Path, options: &OpenOptions) -> io::Result> { + let inner_opt = options; + let path = cstr(path)?; + + let custom_flags = inner_opt.0.custom_flags; + let flags = libc::O_CLOEXEC + | options.0.access_mode()? + | options.0.creation_mode()? + | (custom_flags & !libc::O_ACCMODE); + + let open_op = opcode::OpenAt::new(types::Fd(libc::AT_FDCWD), path.as_ptr()) + .flags(flags) + .mode(inner_opt.0.mode) + .build(); + + // SAFETY: Parameters are valid for the entire duration of the operation + let op = unsafe { Op::new(open_op, Open { path }) }; + Ok(op) + } +} diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs new file mode 100644 index 00000000000..789b4e80032 --- /dev/null +++ b/tokio/src/io/uring/utils.rs @@ -0,0 +1,6 @@ +use std::{ffi::CString, io, path::Path}; + +pub(crate) fn cstr(p: &Path) -> io::Result { + use std::os::unix::ffi::OsStrExt; + Ok(CString::new(p.as_os_str().as_bytes())?) +} diff --git a/tokio/src/macros/cfg.rs b/tokio/src/macros/cfg.rs index d01fac65c3f..8e43aad5024 100644 --- a/tokio/src/macros/cfg.rs +++ b/tokio/src/macros/cfg.rs @@ -727,3 +727,17 @@ macro_rules! cfg_tokio_unstable_uring { )* }; } + +macro_rules! cfg_not_tokio_unstable_uring { + ($($item:item)*) => { + $( + #[cfg(not(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + )))] + $item + )* + }; +} diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index 93c67c5b5c8..68a16772abc 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -338,6 +338,15 @@ impl Builder { all(unix, feature = "signal") ))] self.enable_io(); + + #[cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", + ))] + self.enable_uring(); + #[cfg(feature = "time")] self.enable_time(); @@ -1575,6 +1584,30 @@ cfg_time! { } } +cfg_tokio_unstable_uring! { + impl Builder { + /// Enables the `tokio-uring` driver. + /// + /// Doing this enables using io_uring operations on the runtime. + /// + /// # Examples + /// + /// ``` + /// use tokio::runtime; + /// + /// let rt = runtime::Builder::new_multi_thread() + /// .enable_uring() + /// .build() + /// .unwrap(); + /// ``` + pub fn enable_uring(&mut self) -> &mut Self { + // Currently, the uring flag is represented as `enable_io`. + self.enable_io = true; + self + } + } +} + cfg_test_util! { impl Builder { /// Controls if the runtime's clock starts paused or advancing. diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index e450f5a0fca..bd212ac09d4 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -323,7 +323,7 @@ pub(crate) mod context; pub(crate) mod park; -mod driver; +pub(crate) mod driver; pub(crate) mod scheduler; diff --git a/tokio/tests/fs_open_options.rs b/tokio/tests/fs_open_options.rs index 58d7de647e2..58982d679df 100644 --- a/tokio/tests/fs_open_options.rs +++ b/tokio/tests/fs_open_options.rs @@ -58,7 +58,7 @@ async fn open_options_mode() { let mode = format!("{:?}", OpenOptions::new().mode(0o644)); // TESTING HACK: use Debug output to check the stored data assert!( - mode.contains("mode: 420 ") || mode.contains("mode: 0o000644 "), + mode.contains("mode: 420") || mode.contains("mode: 0o000644"), "mode is: {mode}" ); } @@ -69,7 +69,7 @@ async fn open_options_custom_flags_linux() { // TESTING HACK: use Debug output to check the stored data assert!( format!("{:?}", OpenOptions::new().custom_flags(libc::O_TRUNC)) - .contains("custom_flags: 512,") + .contains("custom_flags: 512") ); } diff --git a/tokio/tests/fs_uring.rs b/tokio/tests/fs_uring.rs new file mode 100644 index 00000000000..4fb1a4fba07 --- /dev/null +++ b/tokio/tests/fs_uring.rs @@ -0,0 +1,116 @@ +#![cfg(all( + tokio_unstable_uring, + feature = "rt", + feature = "fs", + target_os = "linux", +))] + +use std::sync::mpsc; + +use tempfile::NamedTempFile; +use tokio::{ + fs::OpenOptions, + io::AsyncReadExt, + runtime::{Builder, Runtime}, + task::JoinSet, +}; + +fn multi_rt(n: usize) -> Box Runtime> { + Box::new(move || { + Builder::new_multi_thread() + .worker_threads(n) + .enable_all() + .build() + .unwrap() + }) +} + +fn current_rt() -> Box Runtime> { + Box::new(|| Builder::new_current_thread().enable_all().build().unwrap()) +} + +#[test] +fn all_tests() { + let rt_conbination = vec![current_rt(), multi_rt(1), multi_rt(8)]; + + for rt in rt_conbination { + shutdown_runtime_while_performing_io_uring_ops(rt()); + process_many_files(rt()); + } +} + +fn shutdown_runtime_while_performing_io_uring_ops(rt: Runtime) { + let (tx, rx) = mpsc::channel(); + let (done_tx, done_rx) = mpsc::channel(); + + rt.spawn(async { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + + let mut set = JoinSet::new(); + + // spawning a bunch of uring operations. + loop { + let path = path.clone(); + set.spawn(async move { + let mut opt = OpenOptions::new(); + opt.read(true); + opt.open(&path).await.unwrap(); + }); + } + }); + + std::thread::spawn(move || { + let rt: Runtime = rx.recv().unwrap(); + rt.shutdown_background(); + done_tx.send(()).unwrap(); + }); + + tx.send(rt).unwrap(); + + done_rx.recv().unwrap(); +} + +fn process_many_files(rt: Runtime) { + rt.block_on(async { + const NUM_FILES: usize = 512; + const FILE_SIZE: usize = 64; + + use rand::Rng; + use std::io::Write; + use tempfile::NamedTempFile; + + let mut files = Vec::with_capacity(NUM_FILES); + for _ in 0..NUM_FILES { + let mut tmp = NamedTempFile::new().unwrap(); + let mut data = vec![0u8; FILE_SIZE]; + rand::thread_rng().fill(&mut data[..]); + tmp.write_all(&data).unwrap(); + tmp.flush().unwrap(); + let path = tmp.path().to_path_buf(); + files.push((tmp, data, path)); + } + + let mut handles = Vec::with_capacity(NUM_FILES); + for (tmp, original, path) in files { + handles.push(tokio::spawn(async move { + let _keep_alive = tmp; + + let mut file = tokio::fs::OpenOptions::new() + .read(true) + .open(&path) + .await + .unwrap(); + let mut buf = vec![0u8; FILE_SIZE]; + + file.read_exact(&mut buf).await.unwrap(); + + assert_eq!(buf, original); + })); + } + + for h in handles { + h.await.unwrap(); + } + }); +} From 05239cc76b1a4b25a9befa1960d4dbf209532859 Mon Sep 17 00:00:00 2001 From: Motoyuki Kimura Date: Thu, 8 May 2025 21:55:40 +0900 Subject: [PATCH 03/22] make ci work --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2362d043eab..090aff155d4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,8 @@ on: push: branches: ["master", "tokio-*.x"] pull_request: - branches: ["master", "tokio-*.x"] + # Temporarily commented out to run CI + #branches: ["master", "tokio-*.x"] name: CI From d0dc6b2acfa57338922ba020d097f478795299f2 Mon Sep 17 00:00:00 2001 From: mox692 Date: Sun, 11 May 2025 16:34:37 +0900 Subject: [PATCH 04/22] add benchmark --- benches/Cargo.toml | 1 + benches/fs.rs | 92 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 82 insertions(+), 11 deletions(-) diff --git a/benches/Cargo.toml b/benches/Cargo.toml index de39565b398..1c53dc78f4d 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -13,6 +13,7 @@ tokio = { version = "1.5.0", path = "../tokio", features = ["full"] } criterion = "0.5.1" rand = "0.8" rand_chacha = "0.3" +tempfile = "3.1.0" [dev-dependencies] tokio-util = { version = "0.7.0", path = "../tokio-util", features = ["full"] } diff --git a/benches/fs.rs b/benches/fs.rs index 2964afbd46e..97f9857f4cf 100644 --- a/benches/fs.rs +++ b/benches/fs.rs @@ -1,21 +1,40 @@ #![cfg(unix)] +use tokio::runtime::{Builder, Runtime}; +use tokio::task::JoinSet; use tokio_stream::StreamExt; -use tokio::fs::File; +use tokio::fs::{File, OpenOptions}; use tokio::io::AsyncReadExt; use tokio_util::codec::{BytesCodec, FramedRead /*FramedWrite*/}; -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{criterion_group, BenchmarkId}; +use std::path::PathBuf; +use std::time::Instant; +use tempfile::NamedTempFile; + +use criterion::{criterion_main, Criterion}; use std::fs::File as StdFile; use std::io::Read as StdRead; -fn rt() -> tokio::runtime::Runtime { - tokio::runtime::Builder::new_multi_thread() - .worker_threads(2) - .build() - .unwrap() +fn rt(worker_threads: usize) -> Runtime { + if worker_threads == 1 { + let mut builder = Builder::new_current_thread(); + #[cfg(tokio_unstable_uring)] + { + builder.enable_uring(); + } + builder.build().unwrap() + } else { + let mut builder = Builder::new_multi_thread(); + builder.worker_threads(worker_threads); + #[cfg(tokio_unstable_uring)] + { + builder.enable_uring(); + } + builder.build().unwrap() + } } const BLOCK_COUNT: usize = 1_000; @@ -24,7 +43,7 @@ const BUFFER_SIZE: usize = 4096; const DEV_ZERO: &str = "/dev/zero"; fn async_read_codec(c: &mut Criterion) { - let rt = rt(); + let rt = rt(2); c.bench_function("async_read_codec", |b| { b.iter(|| { @@ -44,7 +63,7 @@ fn async_read_codec(c: &mut Criterion) { } fn async_read_buf(c: &mut Criterion) { - let rt = rt(); + let rt = rt(2); c.bench_function("async_read_buf", |b| { b.iter(|| { @@ -66,7 +85,7 @@ fn async_read_buf(c: &mut Criterion) { } fn async_read_std_file(c: &mut Criterion) { - let rt = rt(); + let rt = rt(2); c.bench_function("async_read_std_file", |b| { b.iter(|| { @@ -102,11 +121,62 @@ fn sync_read(c: &mut Criterion) { }); } +fn create_tmp_files(num_files: usize) -> (Vec, Vec) { + let mut files = Vec::with_capacity(num_files); + for _ in 0..num_files { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + files.push((tmp, path)); + } + + files.into_iter().unzip() +} + +fn open_many_files(c: &mut Criterion) { + const NUM_FILES: usize = 512; + + let (_tmp_files, paths): (Vec, Vec) = create_tmp_files(NUM_FILES); + + let mut group = c.benchmark_group("open_many_files"); + + for &threads in &[1, 2, 4, 8, 16, 32] { + let rt = rt(threads); + + let paths = paths.clone(); + group.bench_with_input( + BenchmarkId::from_parameter(threads), + &threads, + move |b, &_threads| { + b.iter_custom(|iter| { + rt.block_on(async { + let mut set = JoinSet::new(); + + let start = Instant::now(); + + for i in 0..(iter as usize) { + let path = paths.get(i % NUM_FILES).unwrap().clone(); + set.spawn(async move { + let _file = OpenOptions::new().read(true).open(path).await.unwrap(); + }); + } + while let Some(Ok(_)) = set.join_next().await {} + + start.elapsed() + }) + }) + }, + ); + } + + group.finish(); +} + criterion_group!( file, async_read_std_file, async_read_buf, async_read_codec, - sync_read + sync_read, + open_many_files ); criterion_main!(file); From c491ccb19f5383e3e9e5a2fcaf8371e9440bccff Mon Sep 17 00:00:00 2001 From: mox692 Date: Thu, 22 May 2025 02:19:44 +0900 Subject: [PATCH 05/22] rt: add check for uring availability at runtime --- tokio/src/runtime/io/driver.rs | 11 ++- tokio/src/runtime/io/driver/uring.rs | 143 ++++++++++++++++++++++++--- 2 files changed, 136 insertions(+), 18 deletions(-) diff --git a/tokio/src/runtime/io/driver.rs b/tokio/src/runtime/io/driver.rs index fb496f140ff..1a49b474291 100644 --- a/tokio/src/runtime/io/driver.rs +++ b/tokio/src/runtime/io/driver.rs @@ -5,6 +5,7 @@ cfg_signal_internal_and_unix! { cfg_tokio_uring! { mod uring; use uring::UringContext; + use crate::loom::sync::atomic::AtomicUsize; } use crate::io::interest::Interest; @@ -52,6 +53,9 @@ pub(crate) struct Handle { #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] pub(crate) uring_context: Mutex, + + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] + pub(crate) uring_state: AtomicUsize, } #[derive(Debug)] @@ -121,13 +125,10 @@ impl Driver { metrics: IoDriverMetrics::default(), #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] uring_context: Mutex::new(UringContext::new()), + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] + uring_state: AtomicUsize::new(0), }; - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - { - handle.add_uring_source(Interest::READABLE)?; - } - Ok((driver, handle)) } diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index 8a129bc494d..dc4cfb1b869 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -2,6 +2,7 @@ use io_uring::{squeue::Entry, IoUring}; use mio::unix::SourceFd; use slab::Slab; +use crate::loom::sync::atomic::Ordering; use crate::runtime::driver::op::{Cancellable, Lifecycle}; use crate::{io::Interest, loom::sync::Mutex}; @@ -12,8 +13,66 @@ use std::{io, mem, task::Waker}; const DEFAULT_RING_SIZE: u32 = 256; +#[repr(usize)] +#[derive(Debug, PartialEq, Eq)] +enum State { + Uninitialized = 0, + Initialized = 1, + Unsupported = 2, +} + +impl State { + fn as_usize(self) -> usize { + self as usize + } + + fn from_usize(value: usize) -> Self { + match value { + 0 => State::Uninitialized, + 1 => State::Initialized, + 2 => State::Unsupported, + _ => unreachable!("invalid Uring state: {}", value), + } + } +} + +/// A wrapper around `IoUring` that lazily initializes it. +struct LazyUring { + inner: Option, +} + +impl LazyUring { + fn new() -> Self { + Self { inner: None } + } + + /// Perform `io_uring_setup` system call. + /// + /// If the machine doesn't support io_uring, then this will return an + /// `ENOSYS` error. This returns `true` if the ring was initialized, + /// and `false` if it was already initialized. + fn initialize(&mut self) -> io::Result { + if self.inner.is_some() { + // Already initialized + return Ok(false); + } + + self.inner.replace(IoUring::new(DEFAULT_RING_SIZE)?); + + Ok(true) + } + + fn as_mut(&mut self) -> Option<&mut IoUring> { + self.inner.as_mut() + } + + fn as_ref(&self) -> Option<&IoUring> { + self.inner.as_ref() + } +} + pub(crate) struct UringContext { - pub(crate) uring: io_uring::IoUring, + uring: LazyUring, pub(crate) ops: slab::Slab, } @@ -21,16 +80,30 @@ impl UringContext { pub(crate) fn new() -> Self { Self { ops: Slab::new(), - // TODO: make configurable - uring: IoUring::new(DEFAULT_RING_SIZE).unwrap(), + uring: LazyUring::new(), } } + pub(crate) fn ring(&self) -> &io_uring::IoUring { + self.uring.as_ref().expect("io_uring not initialized") + } + + pub(crate) fn ring_mut(&mut self) -> &mut io_uring::IoUring { + self.uring.as_mut().expect("io_uring not initialized") + } + + pub(crate) fn initialize(&mut self) -> io::Result { + self.uring.initialize() + } + pub(crate) fn dispatch_completions(&mut self) { let ops = &mut self.ops; - let cq = self.uring.completion(); + let Some(mut cq) = self.uring.inner.take() else { + // Uring is not initialized yet. + return; + }; - for cqe in cq { + for cqe in cq.completion() { let idx = cqe.user_data() as usize; match ops.get_mut(idx) { @@ -52,13 +125,15 @@ impl UringContext { } } + self.uring.inner.replace(cq); + // `cq`'s drop gets called here, updating the latest head pointer } pub(crate) fn submit(&mut self) -> io::Result<()> { loop { // Errors from io_uring_enter: https://man7.org/linux/man-pages/man2/io_uring_enter.2.html#ERRORS - match self.uring.submit() { + match self.ring().submit() { Ok(_) => { return Ok(()); } @@ -83,8 +158,13 @@ impl UringContext { /// Drop the driver, cancelling any in-progress ops and waiting for them to terminate. impl Drop for UringContext { fn drop(&mut self) { + if self.uring.inner.is_none() { + // Uring is not initialized or not Initialized. + return; + } + // Make sure we flush the submission queue before dropping the driver. - while !self.uring.submission().is_empty() { + while !self.ring_mut().submission().is_empty() { self.submit().expect("Internal error when dropping driver"); } @@ -108,12 +188,12 @@ impl Drop for UringContext { } while !cancel_ops.is_empty() { - // Wait until at least one completion is available. - self.uring + // Wait until at least one completion is Initialized. + self.ring_mut() .submit_and_wait(1) .expect("Internal error when dropping driver"); - for cqe in self.uring.completion() { + for cqe in self.ring_mut().completion() { let idx = cqe.user_data() as usize; cancel_ops.remove(idx); } @@ -122,10 +202,11 @@ impl Drop for UringContext { } impl Handle { + // TODO: this should be delayed. #[allow(dead_code)] - pub(crate) fn add_uring_source(&self, interest: Interest) -> io::Result<()> { + fn add_uring_source(&self, interest: Interest) -> io::Result<()> { // setup for io_uring - let uringfd = self.get_uring().lock().uring.as_raw_fd(); + let uringfd = self.get_uring().lock().ring().as_raw_fd(); let mut source = SourceFd(&uringfd); self.registry .register(&mut source, TOKEN_WAKEUP, interest.to_mio()) @@ -135,11 +216,47 @@ impl Handle { &self.uring_context } + fn initialize_uring(&self) -> io::Result<()> { + self.add_uring_source(Interest::READABLE)?; + let mut guard = self.get_uring().lock(); + if guard.initialize()? { + self.set_state(State::Initialized); + } + + Ok(()) + } + + fn set_state(&self, state: State) { + self.uring_state.store(state.as_usize(), Ordering::Relaxed); + } + + fn get_state(&self) -> State { + State::from_usize(self.uring_state.load(Ordering::Relaxed)) + } + /// # Safety /// /// Callers must ensure that parameters of the entry (such as buffer) are valid and will /// be valid for the entire duration of the operation, otherwise it may cause memory problems. pub(crate) unsafe fn register_op(&self, entry: Entry, waker: Waker) -> io::Result { + match self.get_state() { + // This is the first uring operation, so we need to initialize it. + State::Uninitialized => { + self.initialize_uring().map_err(|e| { + if e.raw_os_error() == Some(libc::ENOSYS) { + self.set_state(State::Unsupported); + } + e + })?; + } + State::Unsupported => { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + _ => {} + } + + // Uring is initialized + let mut guard = self.get_uring().lock(); let ctx = &mut *guard; let index = ctx.ops.insert(Lifecycle::Waiting(waker)); @@ -155,7 +272,7 @@ impl Handle { }; // SAFETY: entry is valid for the entire duration of the operation - while unsafe { ctx.uring.submission().push(&entry).is_err() } { + while unsafe { ctx.ring_mut().submission().push(&entry).is_err() } { // If the submission queue is full, flush it to the kernel submit_or_remove(ctx)?; } From ea9d152ba94f023e496e9b32de7e06c2566f0edc Mon Sep 17 00:00:00 2001 From: mox692 Date: Thu, 22 May 2025 23:25:19 +0900 Subject: [PATCH 06/22] Remove LazyUring --- tokio/src/runtime/io/driver/uring.rs | 75 ++++++++++------------------ 1 file changed, 27 insertions(+), 48 deletions(-) diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index dc4cfb1b869..738b8e5e36b 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -36,43 +36,8 @@ impl State { } } -/// A wrapper around `IoUring` that lazily initializes it. -struct LazyUring { - inner: Option, -} - -impl LazyUring { - fn new() -> Self { - Self { inner: None } - } - - /// Perform `io_uring_setup` system call. - /// - /// If the machine doesn't support io_uring, then this will return an - /// `ENOSYS` error. This returns `true` if the ring was initialized, - /// and `false` if it was already initialized. - fn initialize(&mut self) -> io::Result { - if self.inner.is_some() { - // Already initialized - return Ok(false); - } - - self.inner.replace(IoUring::new(DEFAULT_RING_SIZE)?); - - Ok(true) - } - - fn as_mut(&mut self) -> Option<&mut IoUring> { - self.inner.as_mut() - } - - fn as_ref(&self) -> Option<&IoUring> { - self.inner.as_ref() - } -} - pub(crate) struct UringContext { - uring: LazyUring, + pub(crate) uring: Option, pub(crate) ops: slab::Slab, } @@ -80,7 +45,7 @@ impl UringContext { pub(crate) fn new() -> Self { Self { ops: Slab::new(), - uring: LazyUring::new(), + uring: None, } } @@ -92,18 +57,32 @@ impl UringContext { self.uring.as_mut().expect("io_uring not initialized") } + /// Perform `io_uring_setup` system call. + /// + /// If the machine doesn't support io_uring, then this will return an + /// `ENOSYS` error. This returns `true` if the ring was initialized, + /// and `false` if it was already initialized. pub(crate) fn initialize(&mut self) -> io::Result { - self.uring.initialize() + if self.uring.is_some() { + // Already initialized + return Ok(false); + } + + self.uring.replace(IoUring::new(DEFAULT_RING_SIZE)?); + + Ok(true) } pub(crate) fn dispatch_completions(&mut self) { let ops = &mut self.ops; - let Some(mut cq) = self.uring.inner.take() else { + let Some(mut uring) = self.uring.take() else { // Uring is not initialized yet. return; }; - for cqe in cq.completion() { + let cq = uring.completion(); + + for cqe in cq { let idx = cqe.user_data() as usize; match ops.get_mut(idx) { @@ -125,7 +104,7 @@ impl UringContext { } } - self.uring.inner.replace(cq); + self.uring.replace(uring); // `cq`'s drop gets called here, updating the latest head pointer } @@ -158,7 +137,7 @@ impl UringContext { /// Drop the driver, cancelling any in-progress ops and waiting for them to terminate. impl Drop for UringContext { fn drop(&mut self) { - if self.uring.inner.is_none() { + if self.uring.is_none() { // Uring is not initialized or not Initialized. return; } @@ -188,7 +167,7 @@ impl Drop for UringContext { } while !cancel_ops.is_empty() { - // Wait until at least one completion is Initialized. + // Wait until at least one completion is available. self.ring_mut() .submit_and_wait(1) .expect("Internal error when dropping driver"); @@ -220,17 +199,17 @@ impl Handle { self.add_uring_source(Interest::READABLE)?; let mut guard = self.get_uring().lock(); if guard.initialize()? { - self.set_state(State::Initialized); + self.set_uring_state(State::Initialized); } Ok(()) } - fn set_state(&self, state: State) { + fn set_uring_state(&self, state: State) { self.uring_state.store(state.as_usize(), Ordering::Relaxed); } - fn get_state(&self) -> State { + fn get_uring_state(&self) -> State { State::from_usize(self.uring_state.load(Ordering::Relaxed)) } @@ -239,12 +218,12 @@ impl Handle { /// Callers must ensure that parameters of the entry (such as buffer) are valid and will /// be valid for the entire duration of the operation, otherwise it may cause memory problems. pub(crate) unsafe fn register_op(&self, entry: Entry, waker: Waker) -> io::Result { - match self.get_state() { + match self.get_uring_state() { // This is the first uring operation, so we need to initialize it. State::Uninitialized => { self.initialize_uring().map_err(|e| { if e.raw_os_error() == Some(libc::ENOSYS) { - self.set_state(State::Unsupported); + self.set_uring_state(State::Unsupported); } e })?; From d65962622f3ed590a509468620e98a122ec176e8 Mon Sep 17 00:00:00 2001 From: mox692 Date: Sat, 24 May 2025 00:48:01 +0900 Subject: [PATCH 07/22] fallback when io_uring is not available --- tokio/src/fs/mod.rs | 3 + tokio/src/fs/open_options.rs | 189 +++++++++++------- .../src/fs/open_options/uring_open_options.rs | 19 ++ tokio/src/io/uring/open.rs | 12 +- tokio/src/runtime/io/driver/uring.rs | 29 ++- 5 files changed, 172 insertions(+), 80 deletions(-) diff --git a/tokio/src/fs/mod.rs b/tokio/src/fs/mod.rs index c1855c42aeb..7e0c35ba84a 100644 --- a/tokio/src/fs/mod.rs +++ b/tokio/src/fs/mod.rs @@ -237,6 +237,9 @@ pub use self::metadata::metadata; mod open_options; pub use self::open_options::OpenOptions; +cfg_tokio_uring! { + pub(crate) use self::open_options::UringOpenOptions; +} mod read; pub use self::read::read; diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index 99038399f42..dbeb2e026aa 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -1,11 +1,11 @@ -use crate::fs::File; +use crate::fs::{asyncify, File}; use std::io; use std::path::Path; cfg_tokio_uring! { mod uring_open_options; - use uring_open_options::UringOpenOptions; + pub(crate) use uring_open_options::UringOpenOptions; use crate::runtime::driver::op::Op; } @@ -16,12 +16,8 @@ use mock_open_options::MockOpenOptions as StdOpenOptions; #[cfg(not(test))] use std::fs::OpenOptions as StdOpenOptions; -cfg_not_tokio_uring! { - #[cfg(unix)] - use std::os::unix::fs::OpenOptionsExt; - use crate::fs::asyncify; -} - +#[cfg(unix)] +use std::os::unix::fs::OpenOptionsExt; #[cfg(windows)] use std::os::windows::fs::OpenOptionsExt; @@ -89,12 +85,16 @@ use std::os::windows::fs::OpenOptionsExt; /// } /// ``` #[derive(Clone, Debug)] -pub struct OpenOptions( - #[cfg(not(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",)))] - StdOpenOptions, - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - pub(crate) UringOpenOptions, -); +pub struct OpenOptions { + inner: Kind, +} + +#[derive(Debug, Clone)] +enum Kind { + Std(StdOpenOptions), + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Uring(UringOpenOptions), +} impl OpenOptions { /// Creates a blank new set of options ready for configuration. @@ -114,12 +114,12 @@ impl OpenOptions { /// let future = options.read(true).open("foo.txt"); /// ``` pub fn new() -> OpenOptions { - OpenOptions( - #[cfg(not(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",)))] - StdOpenOptions::new(), - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - UringOpenOptions::new(), - ) + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + let inner = Kind::Uring(UringOpenOptions::new()); + #[cfg(not(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux")))] + let inner = Kind::Std(StdOpenOptions::new()); + + OpenOptions { inner } } /// Sets the option for read access. @@ -148,7 +148,15 @@ impl OpenOptions { /// } /// ``` pub fn read(&mut self, read: bool) -> &mut OpenOptions { - self.0.read(read); + match &mut self.inner { + Kind::Std(opts) => { + opts.read(read); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.read(read); + } + } self } @@ -178,7 +186,15 @@ impl OpenOptions { /// } /// ``` pub fn write(&mut self, write: bool) -> &mut OpenOptions { - self.0.write(write); + match &mut self.inner { + Kind::Std(opts) => { + opts.write(write); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.write(write); + } + } self } @@ -237,7 +253,15 @@ impl OpenOptions { /// } /// ``` pub fn append(&mut self, append: bool) -> &mut OpenOptions { - self.0.append(append); + match &mut self.inner { + Kind::Std(opts) => { + opts.append(append); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.append(append); + } + } self } @@ -270,7 +294,15 @@ impl OpenOptions { /// } /// ``` pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions { - self.0.truncate(truncate); + match &mut self.inner { + Kind::Std(opts) => { + opts.truncate(truncate); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.truncate(truncate); + } + } self } @@ -306,7 +338,15 @@ impl OpenOptions { /// } /// ``` pub fn create(&mut self, create: bool) -> &mut OpenOptions { - self.0.create(create); + match &mut self.inner { + Kind::Std(opts) => { + opts.create(create); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.create(create); + } + } self } @@ -349,7 +389,15 @@ impl OpenOptions { /// } /// ``` pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions { - self.0.create_new(create_new); + match &mut self.inner { + Kind::Std(opts) => { + opts.create_new(create_new); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.create_new(create_new); + } + } self } @@ -406,34 +454,37 @@ impl OpenOptions { /// [`Other`]: std::io::ErrorKind::Other /// [`PermissionDenied`]: std::io::ErrorKind::PermissionDenied pub async fn open(&self, path: impl AsRef) -> io::Result { - self.open_inner(path).await - } + match &self.inner { + Kind::Std(opts) => Self::std_open(opts, path).await, + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + let handle = crate::runtime::Handle::current(); + let driver_handle = handle.inner.driver().io(); - cfg_not_tokio_uring! { - async fn open_inner(&self, path: impl AsRef) -> io::Result { - let path = path.as_ref().to_owned(); - let opts = self.0.clone(); - - let std = asyncify(move || opts.open(path)).await?; - Ok(File::from_std(std)) - } - - /// Returns a mutable reference to the underlying `std::fs::OpenOptions` - #[cfg(any(windows, unix))] - pub(super) fn as_inner_mut(&mut self) -> &mut StdOpenOptions { - &mut self.0 + if driver_handle.uring_available_or_init() { + Op::open(path.as_ref(), opts)?.await + } else { + let opts = opts.clone().into(); + Self::std_open(&opts, path).await + } + } } } - cfg_tokio_uring! { - async fn open_inner(&self, path: impl AsRef) -> io::Result { - Op::open(path.as_ref(), self)?.await - } + async fn std_open(opts: &StdOpenOptions, path: impl AsRef) -> io::Result { + let path = path.as_ref().to_owned(); + let opts = opts.clone(); - /// Returns a mutable reference to the underlying `std::fs::OpenOptions` - #[cfg(any(windows, unix))] - pub(super) fn as_inner_mut(&mut self) -> &mut UringOpenOptions { - &mut self.0 + let std = asyncify(move || opts.open(path)).await?; + Ok(File::from_std(std)) + } + + #[cfg(windows)] + pub(super) fn as_inner_mut(&mut self) -> &mut StdOpenOptions { + match &mut self.inner { + Kind::Std(ref mut opts) => opts, + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(_) => panic!("Should not be called on UringOpenOptions"), } } } @@ -466,7 +517,15 @@ feature! { /// } /// ``` pub fn mode(&mut self, mode: u32) -> &mut OpenOptions { - self.as_inner_mut().mode(mode); + match &mut self.inner { + Kind::Std(opts) => { + opts.mode(mode); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.mode(mode); + } + } self } @@ -497,7 +556,15 @@ feature! { /// } /// ``` pub fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions { - self.as_inner_mut().custom_flags(flags); + match &mut self.inner { + Kind::Std(opts) => { + opts.custom_flags(flags); + } + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] + Kind::Uring(opts) => { + opts.custom_flags(flags); + } + } self } } @@ -687,22 +754,10 @@ cfg_windows! { } } -cfg_not_tokio_uring! { - impl From for OpenOptions { - fn from(options: StdOpenOptions) -> OpenOptions { - OpenOptions(options) - } - } -} - -cfg_tokio_uring! { - impl From for OpenOptions { - fn from(_options: StdOpenOptions) -> OpenOptions { - // It's not straitforward to convert from std's OpenOptions to io_uring's one. - // * https://github.com/rust-lang/rust/issues/74943 - // * https://github.com/rust-lang/rust/issues/76801 - - panic!("Conversion from std's OpenOptions to io_uring's one is not supported") +impl From for OpenOptions { + fn from(options: StdOpenOptions) -> OpenOptions { + OpenOptions { + inner: Kind::Std(options), } } } diff --git a/tokio/src/fs/open_options/uring_open_options.rs b/tokio/src/fs/open_options/uring_open_options.rs index 98b47a76b66..0140c7f4302 100644 --- a/tokio/src/fs/open_options/uring_open_options.rs +++ b/tokio/src/fs/open_options/uring_open_options.rs @@ -1,5 +1,10 @@ use std::io; +#[cfg(test)] +use super::mock_open_options::MockOpenOptions as StdOpenOptions; +#[cfg(not(test))] +use std::fs::OpenOptions as StdOpenOptions; + #[derive(Debug, Clone)] pub(crate) struct UringOpenOptions { pub(crate) read: bool, @@ -101,3 +106,17 @@ impl UringOpenOptions { }) } } + +impl From for StdOpenOptions { + fn from(value: UringOpenOptions) -> Self { + let mut std = StdOpenOptions::new(); + + std.read(value.read); + std.write(value.write); + std.append(value.append); + std.create(value.create); + std.create_new(value.create_new); + + std + } +} diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs index 9897e2910e4..c7687ed8c37 100644 --- a/tokio/src/io/uring/open.rs +++ b/tokio/src/io/uring/open.rs @@ -1,6 +1,6 @@ use super::utils::cstr; use crate::{ - fs::OpenOptions, + fs::UringOpenOptions, runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}, }; use io_uring::{opcode, types}; @@ -28,19 +28,19 @@ impl Cancellable for Open { impl Op { /// Submit a request to open a file. - pub(crate) fn open(path: &Path, options: &OpenOptions) -> io::Result> { + pub(crate) fn open(path: &Path, options: &UringOpenOptions) -> io::Result> { let inner_opt = options; let path = cstr(path)?; - let custom_flags = inner_opt.0.custom_flags; + let custom_flags = inner_opt.custom_flags; let flags = libc::O_CLOEXEC - | options.0.access_mode()? - | options.0.creation_mode()? + | options.access_mode()? + | options.creation_mode()? | (custom_flags & !libc::O_ACCMODE); let open_op = opcode::OpenAt::new(types::Fd(libc::AT_FDCWD), path.as_ptr()) .flags(flags) - .mode(inner_opt.0.mode) + .mode(inner_opt.mode) .build(); // SAFETY: Parameters are valid for the entire duration of the operation diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index 738b8e5e36b..6d4a7b763c3 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -8,14 +8,14 @@ use crate::{io::Interest, loom::sync::Mutex}; use super::{Handle, TOKEN_WAKEUP}; -use std::os::fd::AsRawFd; +use std::os::fd::{AsRawFd, RawFd}; use std::{io, mem, task::Waker}; const DEFAULT_RING_SIZE: u32 = 256; #[repr(usize)] #[derive(Debug, PartialEq, Eq)] -enum State { +pub(crate) enum State { Uninitialized = 0, Initialized = 1, Unsupported = 2, @@ -182,10 +182,8 @@ impl Drop for UringContext { impl Handle { // TODO: this should be delayed. - #[allow(dead_code)] - fn add_uring_source(&self, interest: Interest) -> io::Result<()> { + fn add_uring_source(&self, uringfd: RawFd, interest: Interest) -> io::Result<()> { // setup for io_uring - let uringfd = self.get_uring().lock().ring().as_raw_fd(); let mut source = SourceFd(&uringfd); self.registry .register(&mut source, TOKEN_WAKEUP, interest.to_mio()) @@ -196,9 +194,9 @@ impl Handle { } fn initialize_uring(&self) -> io::Result<()> { - self.add_uring_source(Interest::READABLE)?; let mut guard = self.get_uring().lock(); if guard.initialize()? { + self.add_uring_source(guard.ring().as_raw_fd(), Interest::READABLE)?; self.set_uring_state(State::Initialized); } @@ -209,10 +207,27 @@ impl Handle { self.uring_state.store(state.as_usize(), Ordering::Relaxed); } - fn get_uring_state(&self) -> State { + pub(crate) fn get_uring_state(&self) -> State { State::from_usize(self.uring_state.load(Ordering::Relaxed)) } + pub(crate) fn uring_available_or_init(&self) -> bool { + match self.get_uring_state() { + State::Uninitialized => { + if let Err(e) = self.initialize_uring() { + if e.raw_os_error() == Some(libc::ENOSYS) { + self.set_uring_state(State::Unsupported); + } + return false; + } + } + State::Unsupported => return false, + _ => {} + } + + true + } + /// # Safety /// /// Callers must ensure that parameters of the entry (such as buffer) are valid and will From 15542a722e9a602148d46ee4a1aeb745663e1fed Mon Sep 17 00:00:00 2001 From: mox692 Date: Thu, 22 May 2025 02:19:44 +0900 Subject: [PATCH 08/22] rt: add check for io_uring availability at runtime --- tokio/src/runtime/driver/op.rs | 2 +- tokio/src/runtime/io/driver.rs | 11 ++- tokio/src/runtime/io/driver/uring.rs | 132 ++++++++++++++++++++++++--- 3 files changed, 124 insertions(+), 21 deletions(-) diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index 0d7ca945554..40a135d744b 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -106,7 +106,7 @@ pub(crate) trait Completable { /// Extracts the `CancelData` needed to safely cancel an in-flight io_uring operation. pub(crate) trait Cancellable { - fn cancell(self) -> CancelData; + fn cancel(self) -> CancelData; } impl Unpin for Op {} diff --git a/tokio/src/runtime/io/driver.rs b/tokio/src/runtime/io/driver.rs index fb496f140ff..1a49b474291 100644 --- a/tokio/src/runtime/io/driver.rs +++ b/tokio/src/runtime/io/driver.rs @@ -5,6 +5,7 @@ cfg_signal_internal_and_unix! { cfg_tokio_uring! { mod uring; use uring::UringContext; + use crate::loom::sync::atomic::AtomicUsize; } use crate::io::interest::Interest; @@ -52,6 +53,9 @@ pub(crate) struct Handle { #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] pub(crate) uring_context: Mutex, + + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] + pub(crate) uring_state: AtomicUsize, } #[derive(Debug)] @@ -121,13 +125,10 @@ impl Driver { metrics: IoDriverMetrics::default(), #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] uring_context: Mutex::new(UringContext::new()), + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] + uring_state: AtomicUsize::new(0), }; - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - { - handle.add_uring_source(Interest::READABLE)?; - } - Ok((driver, handle)) } diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index 8a129bc494d..9d97386cdfe 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -2,18 +2,42 @@ use io_uring::{squeue::Entry, IoUring}; use mio::unix::SourceFd; use slab::Slab; +use crate::loom::sync::atomic::Ordering; use crate::runtime::driver::op::{Cancellable, Lifecycle}; use crate::{io::Interest, loom::sync::Mutex}; use super::{Handle, TOKEN_WAKEUP}; -use std::os::fd::AsRawFd; +use std::os::fd::{AsRawFd, RawFd}; use std::{io, mem, task::Waker}; const DEFAULT_RING_SIZE: u32 = 256; +#[repr(usize)] +#[derive(Debug, PartialEq, Eq)] +enum State { + Uninitialized = 0, + Initialized = 1, + Unsupported = 2, +} + +impl State { + fn as_usize(self) -> usize { + self as usize + } + + fn from_usize(value: usize) -> Self { + match value { + 0 => State::Uninitialized, + 1 => State::Initialized, + 2 => State::Unsupported, + _ => unreachable!("invalid Uring state: {}", value), + } + } +} + pub(crate) struct UringContext { - pub(crate) uring: io_uring::IoUring, + pub(crate) uring: Option, pub(crate) ops: slab::Slab, } @@ -21,14 +45,41 @@ impl UringContext { pub(crate) fn new() -> Self { Self { ops: Slab::new(), - // TODO: make configurable - uring: IoUring::new(DEFAULT_RING_SIZE).unwrap(), + uring: None, + } + } + + pub(crate) fn ring(&self) -> &io_uring::IoUring { + self.uring.as_ref().expect("io_uring not initialized") + } + + pub(crate) fn ring_mut(&mut self) -> &mut io_uring::IoUring { + self.uring.as_mut().expect("io_uring not initialized") + } + + /// Perform `io_uring_setup` system call. + /// + /// If the machine doesn't support io_uring, then this will return an + /// `ENOSYS` error. + pub(crate) fn try_init(&mut self) -> io::Result<()> { + if self.uring.is_some() { + // Already initialized. + return Ok(()); } + + self.uring.replace(IoUring::new(DEFAULT_RING_SIZE)?); + + Ok(()) } pub(crate) fn dispatch_completions(&mut self) { let ops = &mut self.ops; - let cq = self.uring.completion(); + let Some(mut uring) = self.uring.take() else { + // Uring is not initialized yet. + return; + }; + + let cq = uring.completion(); for cqe in cq { let idx = cqe.user_data() as usize; @@ -52,13 +103,15 @@ impl UringContext { } } + self.uring.replace(uring); + // `cq`'s drop gets called here, updating the latest head pointer } pub(crate) fn submit(&mut self) -> io::Result<()> { loop { // Errors from io_uring_enter: https://man7.org/linux/man-pages/man2/io_uring_enter.2.html#ERRORS - match self.uring.submit() { + match self.ring().submit() { Ok(_) => { return Ok(()); } @@ -83,8 +136,13 @@ impl UringContext { /// Drop the driver, cancelling any in-progress ops and waiting for them to terminate. impl Drop for UringContext { fn drop(&mut self) { + if self.uring.is_none() { + // Uring is not initialized or not Initialized. + return; + } + // Make sure we flush the submission queue before dropping the driver. - while !self.uring.submission().is_empty() { + while !self.ring_mut().submission().is_empty() { self.submit().expect("Internal error when dropping driver"); } @@ -109,11 +167,11 @@ impl Drop for UringContext { while !cancel_ops.is_empty() { // Wait until at least one completion is available. - self.uring + self.ring_mut() .submit_and_wait(1) .expect("Internal error when dropping driver"); - for cqe in self.uring.completion() { + for cqe in self.ring_mut().completion() { let idx = cqe.user_data() as usize; cancel_ops.remove(idx); } @@ -123,23 +181,67 @@ impl Drop for UringContext { impl Handle { #[allow(dead_code)] - pub(crate) fn add_uring_source(&self, interest: Interest) -> io::Result<()> { - // setup for io_uring - let uringfd = self.get_uring().lock().uring.as_raw_fd(); + fn add_uring_source(&self, uringfd: RawFd) -> io::Result<()> { let mut source = SourceFd(&uringfd); self.registry - .register(&mut source, TOKEN_WAKEUP, interest.to_mio()) + .register(&mut source, TOKEN_WAKEUP, Interest::READABLE.to_mio()) } pub(crate) fn get_uring(&self) -> &Mutex { &self.uring_context } + fn set_uring_state(&self, state: State) { + self.uring_state.store(state.as_usize(), Ordering::Release); + } + + /// Check if the io_uring context is initialized. If not, it will try to initialize it. + pub(crate) fn check_and_init(&self) -> io::Result { + match State::from_usize(self.uring_state.load(Ordering::Acquire)) { + State::Uninitialized => match self.try_init() { + Ok(()) => { + self.set_uring_state(State::Initialized); + Ok(true) + } + // If the system doesn't support io_uring, we set the state to Unsupported. + Err(e) if e.raw_os_error() == Some(libc::ENOSYS) => { + self.set_uring_state(State::Unsupported); + Ok(false) + } + // For other system errors, we just return it. + Err(e) => Err(e), + }, + State::Unsupported => Ok(false), + State::Initialized => Ok(true), + } + } + + /// Initialize the io_uring context if it hasn't been initialized yet. + fn try_init(&self) -> io::Result<()> { + let mut guard = self.get_uring().lock(); + guard.try_init()?; + self.add_uring_source(guard.ring().as_raw_fd()) + } + + /// Register an operation with the io_uring. + /// + /// If this is the first io_uring operation, it will also initialize the io_uring context. + /// If io_uring isn't supported, this function returns an ENOSYS error, so the caller can + /// perform custom handling, such as falling back to an alternative mechanism. + /// /// # Safety /// /// Callers must ensure that parameters of the entry (such as buffer) are valid and will /// be valid for the entire duration of the operation, otherwise it may cause memory problems. pub(crate) unsafe fn register_op(&self, entry: Entry, waker: Waker) -> io::Result { + // Fist, check if the uring is initialized. Callers can use + // Note: Maybe this check can be removed if upstream callers consistently use `check_and_init`. + if !self.check_and_init()? { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Uring is initialized. + let mut guard = self.get_uring().lock(); let ctx = &mut *guard; let index = ctx.ops.insert(Lifecycle::Waiting(waker)); @@ -155,7 +257,7 @@ impl Handle { }; // SAFETY: entry is valid for the entire duration of the operation - while unsafe { ctx.uring.submission().push(&entry).is_err() } { + while unsafe { ctx.ring_mut().submission().push(&entry).is_err() } { // If the submission queue is full, flush it to the kernel submit_or_remove(ctx)?; } @@ -180,7 +282,7 @@ impl Handle { // This Op will be cancelled. Here, we don't remove the lifecycle from the slab to keep // uring data alive until the operation completes. - let cancell_data = data.expect("Data should be present").cancell(); + let cancell_data = data.expect("Data should be present").cancel(); match mem::replace(lifecycle, Lifecycle::Cancelled(cancell_data)) { Lifecycle::Submitted | Lifecycle::Waiting(_) => (), // The driver saw the completion, but it was never polled. From 8da05c3655faff1004b3f413ffbed45b946755ff Mon Sep 17 00:00:00 2001 From: mox692 Date: Thu, 22 May 2025 02:19:44 +0900 Subject: [PATCH 09/22] rt: add check for io_uring availability at runtime --- spellcheck.dic | 3 +- tokio/src/runtime/driver/op.rs | 2 +- tokio/src/runtime/io/driver.rs | 11 ++- tokio/src/runtime/io/driver/uring.rs | 136 ++++++++++++++++++++++++--- 4 files changed, 130 insertions(+), 22 deletions(-) diff --git a/spellcheck.dic b/spellcheck.dic index a8140e3b7e2..fb0eba88542 100644 --- a/spellcheck.dic +++ b/spellcheck.dic @@ -1,4 +1,4 @@ -306 +307 & + < @@ -104,6 +104,7 @@ DNS DoS dwOpenMode endian +ENOSYS enqueue enqueued EntryInner diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index 0d7ca945554..40a135d744b 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -106,7 +106,7 @@ pub(crate) trait Completable { /// Extracts the `CancelData` needed to safely cancel an in-flight io_uring operation. pub(crate) trait Cancellable { - fn cancell(self) -> CancelData; + fn cancel(self) -> CancelData; } impl Unpin for Op {} diff --git a/tokio/src/runtime/io/driver.rs b/tokio/src/runtime/io/driver.rs index fb496f140ff..1a49b474291 100644 --- a/tokio/src/runtime/io/driver.rs +++ b/tokio/src/runtime/io/driver.rs @@ -5,6 +5,7 @@ cfg_signal_internal_and_unix! { cfg_tokio_uring! { mod uring; use uring::UringContext; + use crate::loom::sync::atomic::AtomicUsize; } use crate::io::interest::Interest; @@ -52,6 +53,9 @@ pub(crate) struct Handle { #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] pub(crate) uring_context: Mutex, + + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] + pub(crate) uring_state: AtomicUsize, } #[derive(Debug)] @@ -121,13 +125,10 @@ impl Driver { metrics: IoDriverMetrics::default(), #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] uring_context: Mutex::new(UringContext::new()), + #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] + uring_state: AtomicUsize::new(0), }; - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - { - handle.add_uring_source(Interest::READABLE)?; - } - Ok((driver, handle)) } diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index 8a129bc494d..3d15bf5ee17 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -2,18 +2,42 @@ use io_uring::{squeue::Entry, IoUring}; use mio::unix::SourceFd; use slab::Slab; +use crate::loom::sync::atomic::Ordering; use crate::runtime::driver::op::{Cancellable, Lifecycle}; use crate::{io::Interest, loom::sync::Mutex}; use super::{Handle, TOKEN_WAKEUP}; -use std::os::fd::AsRawFd; +use std::os::fd::{AsRawFd, RawFd}; use std::{io, mem, task::Waker}; const DEFAULT_RING_SIZE: u32 = 256; +#[repr(usize)] +#[derive(Debug, PartialEq, Eq)] +enum State { + Uninitialized = 0, + Initialized = 1, + Unsupported = 2, +} + +impl State { + fn as_usize(self) -> usize { + self as usize + } + + fn from_usize(value: usize) -> Self { + match value { + 0 => State::Uninitialized, + 1 => State::Initialized, + 2 => State::Unsupported, + _ => unreachable!("invalid Uring state: {}", value), + } + } +} + pub(crate) struct UringContext { - pub(crate) uring: io_uring::IoUring, + pub(crate) uring: Option, pub(crate) ops: slab::Slab, } @@ -21,14 +45,42 @@ impl UringContext { pub(crate) fn new() -> Self { Self { ops: Slab::new(), - // TODO: make configurable - uring: IoUring::new(DEFAULT_RING_SIZE).unwrap(), + uring: None, } } + pub(crate) fn ring(&self) -> &io_uring::IoUring { + self.uring.as_ref().expect("io_uring not initialized") + } + + pub(crate) fn ring_mut(&mut self) -> &mut io_uring::IoUring { + self.uring.as_mut().expect("io_uring not initialized") + } + + /// Perform `io_uring_setup` system call, and Returns true if this + /// actually initialized the io_uring. + /// + /// If the machine doesn't support io_uring, then this will return an + /// `ENOSYS` error. + pub(crate) fn try_init(&mut self) -> io::Result { + if self.uring.is_some() { + // Already initialized. + return Ok(false); + } + + self.uring.replace(IoUring::new(DEFAULT_RING_SIZE)?); + + Ok(true) + } + pub(crate) fn dispatch_completions(&mut self) { let ops = &mut self.ops; - let cq = self.uring.completion(); + let Some(mut uring) = self.uring.take() else { + // Uring is not initialized yet. + return; + }; + + let cq = uring.completion(); for cqe in cq { let idx = cqe.user_data() as usize; @@ -52,13 +104,15 @@ impl UringContext { } } + self.uring.replace(uring); + // `cq`'s drop gets called here, updating the latest head pointer } pub(crate) fn submit(&mut self) -> io::Result<()> { loop { // Errors from io_uring_enter: https://man7.org/linux/man-pages/man2/io_uring_enter.2.html#ERRORS - match self.uring.submit() { + match self.ring().submit() { Ok(_) => { return Ok(()); } @@ -83,8 +137,13 @@ impl UringContext { /// Drop the driver, cancelling any in-progress ops and waiting for them to terminate. impl Drop for UringContext { fn drop(&mut self) { + if self.uring.is_none() { + // Uring is not initialized or not Initialized. + return; + } + // Make sure we flush the submission queue before dropping the driver. - while !self.uring.submission().is_empty() { + while !self.ring_mut().submission().is_empty() { self.submit().expect("Internal error when dropping driver"); } @@ -109,11 +168,11 @@ impl Drop for UringContext { while !cancel_ops.is_empty() { // Wait until at least one completion is available. - self.uring + self.ring_mut() .submit_and_wait(1) .expect("Internal error when dropping driver"); - for cqe in self.uring.completion() { + for cqe in self.ring_mut().completion() { let idx = cqe.user_data() as usize; cancel_ops.remove(idx); } @@ -123,23 +182,70 @@ impl Drop for UringContext { impl Handle { #[allow(dead_code)] - pub(crate) fn add_uring_source(&self, interest: Interest) -> io::Result<()> { - // setup for io_uring - let uringfd = self.get_uring().lock().uring.as_raw_fd(); + fn add_uring_source(&self, uringfd: RawFd) -> io::Result<()> { let mut source = SourceFd(&uringfd); self.registry - .register(&mut source, TOKEN_WAKEUP, interest.to_mio()) + .register(&mut source, TOKEN_WAKEUP, Interest::READABLE.to_mio()) } pub(crate) fn get_uring(&self) -> &Mutex { &self.uring_context } + fn set_uring_state(&self, state: State) { + self.uring_state.store(state.as_usize(), Ordering::Release); + } + + /// Check if the io_uring context is initialized. If not, it will try to initialize it. + pub(crate) fn check_and_init(&self) -> io::Result { + match State::from_usize(self.uring_state.load(Ordering::Acquire)) { + State::Uninitialized => match self.try_init() { + Ok(()) => { + self.set_uring_state(State::Initialized); + Ok(true) + } + // If the system doesn't support io_uring, we set the state to Unsupported. + Err(e) if e.raw_os_error() == Some(libc::ENOSYS) => { + self.set_uring_state(State::Unsupported); + Ok(false) + } + // For other system errors, we just return it. + Err(e) => Err(e), + }, + State::Unsupported => Ok(false), + State::Initialized => Ok(true), + } + } + + /// Initialize the io_uring context if it hasn't been initialized yet. + fn try_init(&self) -> io::Result<()> { + let mut guard = self.get_uring().lock(); + if guard.try_init()? { + self.add_uring_source(guard.ring().as_raw_fd())?; + } + + Ok(()) + } + + /// Register an operation with the io_uring. + /// + /// If this is the first io_uring operation, it will also initialize the io_uring context. + /// If io_uring isn't supported, this function returns an ENOSYS error, so the caller can + /// perform custom handling, such as falling back to an alternative mechanism. + /// /// # Safety /// /// Callers must ensure that parameters of the entry (such as buffer) are valid and will /// be valid for the entire duration of the operation, otherwise it may cause memory problems. pub(crate) unsafe fn register_op(&self, entry: Entry, waker: Waker) -> io::Result { + // Fist, check if the uring is initialized. Callers can use + // Note: Maybe this check can be removed if upstream callers consistently use `check_and_init`. + if !self.check_and_init()? { + return Err(io::Error::from_raw_os_error(libc::ENOSYS)); + } + + // Uring is initialized. + let mut guard = self.get_uring().lock(); let ctx = &mut *guard; let index = ctx.ops.insert(Lifecycle::Waiting(waker)); @@ -155,7 +261,7 @@ impl Handle { }; // SAFETY: entry is valid for the entire duration of the operation - while unsafe { ctx.uring.submission().push(&entry).is_err() } { + while unsafe { ctx.ring_mut().submission().push(&entry).is_err() } { // If the submission queue is full, flush it to the kernel submit_or_remove(ctx)?; } @@ -180,7 +286,7 @@ impl Handle { // This Op will be cancelled. Here, we don't remove the lifecycle from the slab to keep // uring data alive until the operation completes. - let cancell_data = data.expect("Data should be present").cancell(); + let cancell_data = data.expect("Data should be present").cancel(); match mem::replace(lifecycle, Lifecycle::Cancelled(cancell_data)) { Lifecycle::Submitted | Lifecycle::Waiting(_) => (), // The driver saw the completion, but it was never polled. From c80c3ca920869230c3a73ccedd8dd508acac51c3 Mon Sep 17 00:00:00 2001 From: mox692 Date: Sat, 14 Jun 2025 01:12:01 +0900 Subject: [PATCH 10/22] revert temporary ci change --- .github/workflows/ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dc48016da98..8dc7bde0164 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,8 +2,7 @@ on: push: branches: ["master", "tokio-*.x"] pull_request: - # Temporarily commented out to run CI - #branches: ["master", "tokio-*.x"] + branches: ["master", "tokio-*.x"] name: CI From 74834016b2a647e40629ba72ca7683530feff413 Mon Sep 17 00:00:00 2001 From: mox692 Date: Sat, 14 Jun 2025 01:41:43 +0900 Subject: [PATCH 11/22] handle cancel for open --- tokio/src/io/uring/open.rs | 3 ++- tokio/src/runtime/driver/op.rs | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs index abbe3913698..a50868605f8 100644 --- a/tokio/src/io/uring/open.rs +++ b/tokio/src/io/uring/open.rs @@ -6,6 +6,7 @@ use crate::{ use io_uring::{opcode, types}; use std::{ffi::CString, io, os::fd::FromRawFd, path::Path}; +#[derive(Debug)] pub(crate) struct Open { #[allow(dead_code)] path: CString, @@ -22,7 +23,7 @@ impl Completable for Open { impl Cancellable for Open { fn cancel(self) -> CancelData { - todo!() + CancelData::Open(self) } } diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index 40a135d744b..af1c7e139aa 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -1,3 +1,4 @@ +use crate::io::uring::open::Open; use crate::runtime::Handle; use io_uring::cqueue; use io_uring::squeue::Entry; @@ -8,8 +9,11 @@ use std::task::Poll; use std::task::Waker; use std::{io, mem}; +#[allow(dead_code)] #[derive(Debug)] -pub(crate) enum CancelData {} +pub(crate) enum CancelData { + Open(Open), +} #[derive(Debug)] pub(crate) enum Lifecycle { @@ -21,6 +25,7 @@ pub(crate) enum Lifecycle { /// The submitter no longer has interest in the operation result. The state /// must be passed to the driver and held until the operation completes. + #[allow(dead_code)] Cancelled(CancelData), /// The operation has completed with a single cqe result From 26485c836f275e280ee589b64779cb6d55c736c1 Mon Sep 17 00:00:00 2001 From: mox692 Date: Sat, 14 Jun 2025 15:52:01 +0900 Subject: [PATCH 12/22] updates --- benches/Cargo.toml | 1 - benches/fs.rs | 94 ++------- tokio/src/fs/open_options.rs | 10 + .../src/fs/open_options/uring_open_options.rs | 12 +- tokio/src/io/uring/open.rs | 2 + tokio/src/io/uring/utils.rs | 2 +- tokio/src/macros/cfg.rs | 14 -- tokio/src/runtime/builder.rs | 11 +- tokio/src/runtime/io/driver/uring.rs | 5 + tokio/tests/fs_uring.rs | 182 +++++++++++------- 10 files changed, 154 insertions(+), 179 deletions(-) diff --git a/benches/Cargo.toml b/benches/Cargo.toml index fd1f35bb287..50fa6a7b50f 100644 --- a/benches/Cargo.toml +++ b/benches/Cargo.toml @@ -13,7 +13,6 @@ tokio = { version = "1.5.0", path = "../tokio", features = ["full"] } criterion = "0.5.1" rand = "0.9" rand_chacha = "0.9" -tempfile = "3.1.0" [dev-dependencies] tokio-util = { version = "0.7.0", path = "../tokio-util", features = ["full"] } diff --git a/benches/fs.rs b/benches/fs.rs index 85a994b33a6..43c532e8499 100644 --- a/benches/fs.rs +++ b/benches/fs.rs @@ -1,40 +1,19 @@ -#![cfg(unix)] - -use tokio::runtime::{Builder, Runtime}; -use tokio::task::JoinSet; use tokio_stream::StreamExt; -use tokio::fs::{File, OpenOptions}; +use tokio::fs::File; use tokio::io::AsyncReadExt; use tokio_util::codec::{BytesCodec, FramedRead /*FramedWrite*/}; -use criterion::{criterion_group, BenchmarkId}; -use std::path::PathBuf; -use std::time::Instant; -use tempfile::NamedTempFile; - -use criterion::{criterion_main, Criterion}; +use criterion::{criterion_group, criterion_main, Criterion}; use std::fs::File as StdFile; use std::io::Read as StdRead; -fn rt(worker_threads: usize) -> Runtime { - if worker_threads == 1 { - let mut builder = Builder::new_current_thread(); - #[cfg(tokio_uring)] - { - builder.enable_uring(); - } - builder.build().unwrap() - } else { - let mut builder = Builder::new_multi_thread(); - builder.worker_threads(worker_threads); - #[cfg(tokio_uring)] - { - builder.enable_uring(); - } - builder.build().unwrap() - } +fn rt() -> tokio::runtime::Runtime { + tokio::runtime::Builder::new_multi_thread() + .worker_threads(2) + .build() + .unwrap() } const BLOCK_COUNT: usize = 1_000; @@ -43,7 +22,7 @@ const BUFFER_SIZE: usize = 4096; const DEV_ZERO: &str = "/dev/zero"; fn async_read_codec(c: &mut Criterion) { - let rt = rt(2); + let rt = rt(); c.bench_function("async_read_codec", |b| { b.iter(|| { @@ -63,7 +42,7 @@ fn async_read_codec(c: &mut Criterion) { } fn async_read_buf(c: &mut Criterion) { - let rt = rt(2); + let rt = rt(); c.bench_function("async_read_buf", |b| { b.iter(|| { @@ -85,7 +64,7 @@ fn async_read_buf(c: &mut Criterion) { } fn async_read_std_file(c: &mut Criterion) { - let rt = rt(2); + let rt = rt(); c.bench_function("async_read_std_file", |b| { b.iter(|| { @@ -121,62 +100,11 @@ fn sync_read(c: &mut Criterion) { }); } -fn create_tmp_files(num_files: usize) -> (Vec, Vec) { - let mut files = Vec::with_capacity(num_files); - for _ in 0..num_files { - let tmp = NamedTempFile::new().unwrap(); - let path = tmp.path().to_path_buf(); - files.push((tmp, path)); - } - - files.into_iter().unzip() -} - -fn open_many_files(c: &mut Criterion) { - const NUM_FILES: usize = 512; - - let (_tmp_files, paths): (Vec, Vec) = create_tmp_files(NUM_FILES); - - let mut group = c.benchmark_group("open_many_files"); - - for &threads in &[1, 2, 4, 8, 16, 32] { - let rt = rt(threads); - - let paths = paths.clone(); - group.bench_with_input( - BenchmarkId::from_parameter(threads), - &threads, - move |b, &_threads| { - b.iter_custom(|iter| { - rt.block_on(async { - let mut set = JoinSet::new(); - - let start = Instant::now(); - - for i in 0..(iter as usize) { - let path = paths.get(i % NUM_FILES).unwrap().clone(); - set.spawn(async move { - let _file = OpenOptions::new().read(true).open(path).await.unwrap(); - }); - } - while let Some(Ok(_)) = set.join_next().await {} - - start.elapsed() - }) - }) - }, - ); - } - - group.finish(); -} - criterion_group!( file, async_read_std_file, async_read_buf, async_read_codec, - sync_read, - open_many_files + sync_read ); criterion_main!(file); diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index ca05ec634f1..9b192dba427 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -434,6 +434,12 @@ impl OpenOptions { /// open files, too long filename, too many symbolic links in the /// specified path (Unix-like systems only), etc. /// + /// # io_uring support + /// + /// On Linux, you can also use `io_uring` for executing system calls. + /// To enable `io_uring`, you need to specify the `--cfg tokio_uring` flag + /// at compile time and set the `Builder::enable_io_uring` runtime option. + /// /// # Examples /// /// ```no_run @@ -758,6 +764,10 @@ impl From for OpenOptions { fn from(options: StdOpenOptions) -> OpenOptions { OpenOptions { inner: Kind::Std(options), + // TODO: Add support for converting `StdOpenOptions` to `UringOpenOptions` + // if user enables the `--cfg tokio_uring`. It is blocked by: + // * https://github.com/rust-lang/rust/issues/74943 + // * https://github.com/rust-lang/rust/issues/76801 } } } diff --git a/tokio/src/fs/open_options/uring_open_options.rs b/tokio/src/fs/open_options/uring_open_options.rs index 0140c7f4302..48297ca3b5b 100644 --- a/tokio/src/fs/open_options/uring_open_options.rs +++ b/tokio/src/fs/open_options/uring_open_options.rs @@ -1,4 +1,4 @@ -use std::io; +use std::{io, os::unix::fs::OpenOptionsExt}; #[cfg(test)] use super::mock_open_options::MockOpenOptions as StdOpenOptions; @@ -71,6 +71,7 @@ impl UringOpenOptions { self } + // Equivalent to https://github.com/rust-lang/rust/blob/64c81fd10509924ca4da5d93d6052a65b75418a5/library/std/src/sys/fs/unix.rs#L1118-L1127 pub(crate) fn access_mode(&self) -> io::Result { match (self.read, self.write, self.append) { (true, false, false) => Ok(libc::O_RDONLY), @@ -82,6 +83,7 @@ impl UringOpenOptions { } } + // Equivalent to https://github.com/rust-lang/rust/blob/64c81fd10509924ca4da5d93d6052a65b75418a5/library/std/src/sys/fs/unix.rs#L1129-L1151 pub(crate) fn creation_mode(&self) -> io::Result { match (self.write, self.append) { (true, false) => {} @@ -111,11 +113,15 @@ impl From for StdOpenOptions { fn from(value: UringOpenOptions) -> Self { let mut std = StdOpenOptions::new(); - std.read(value.read); - std.write(value.write); std.append(value.append); std.create(value.create); std.create_new(value.create_new); + std.read(value.read); + std.truncate(value.truncate); + std.write(value.write); + + std.mode(value.mode); + std.custom_flags(value.custom_flags); std } diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs index a50868605f8..68f434ff174 100644 --- a/tokio/src/io/uring/open.rs +++ b/tokio/src/io/uring/open.rs @@ -8,6 +8,8 @@ use std::{ffi::CString, io, os::fd::FromRawFd, path::Path}; #[derive(Debug)] pub(crate) struct Open { + /// This field will be read by the kernel during the operation, so we + /// need to ensure it is valid for the entire duration of the operation. #[allow(dead_code)] path: CString, } diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs index 789b4e80032..e30e7a5ddc4 100644 --- a/tokio/src/io/uring/utils.rs +++ b/tokio/src/io/uring/utils.rs @@ -1,6 +1,6 @@ +use std::os::unix::ffi::OsStrExt; use std::{ffi::CString, io, path::Path}; pub(crate) fn cstr(p: &Path) -> io::Result { - use std::os::unix::ffi::OsStrExt; Ok(CString::new(p.as_os_str().as_bytes())?) } diff --git a/tokio/src/macros/cfg.rs b/tokio/src/macros/cfg.rs index f136b3f2a32..10da9570870 100644 --- a/tokio/src/macros/cfg.rs +++ b/tokio/src/macros/cfg.rs @@ -683,17 +683,3 @@ macro_rules! cfg_tokio_uring { )* }; } - -macro_rules! cfg_not_tokio_uring { - ($($item:item)*) => { - $( - #[cfg(not(all( - tokio_uring, - feature = "rt", - feature = "fs", - target_os = "linux", - )))] - $item - )* - }; -} diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index c52ad7e61f5..76800296a27 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -340,7 +340,7 @@ impl Builder { self.enable_io(); #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - self.enable_uring(); + self.enable_io_uring(); #[cfg(feature = "time")] self.enable_time(); @@ -1584,7 +1584,7 @@ cfg_time! { cfg_tokio_uring! { impl Builder { - /// Enables the `tokio-uring` driver. + /// Enables the tokio's io_uring driver. /// /// Doing this enables using io_uring operations on the runtime. /// @@ -1594,12 +1594,13 @@ cfg_tokio_uring! { /// use tokio::runtime; /// /// let rt = runtime::Builder::new_multi_thread() - /// .enable_uring() + /// .enable_io_uring() /// .build() /// .unwrap(); /// ``` - pub fn enable_uring(&mut self) -> &mut Self { - // Currently, the uring flag is represented as `enable_io`. + #[cfg_attr(docsrs, doc(cfg(tokio_uring)))] + pub fn enable_io_uring(&mut self) -> &mut Self { + // Currently, the uring flag is equivalent to `enable_io`. self.enable_io = true; self } diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index 7b0b798614b..bd3391eba4f 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -265,6 +265,11 @@ impl Handle { submit_or_remove(ctx)?; } + // Ensure that the completion queue is not full before submitting the entry. + while ctx.ring_mut().completion().is_full() { + ctx.dispatch_completions(); + } + // Note: For now, we submit the entry immediately without utilizing batching. submit_or_remove(ctx)?; diff --git a/tokio/tests/fs_uring.rs b/tokio/tests/fs_uring.rs index 11b2ab82a79..db0a3130217 100644 --- a/tokio/tests/fs_uring.rs +++ b/tokio/tests/fs_uring.rs @@ -1,11 +1,14 @@ -#![cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] +//! Uring file operations tests. -use std::sync::mpsc; +#![cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] +use futures::future::FutureExt; +use std::sync::mpsc; +use std::task::Poll; +use std::{future::poll_fn, path::PathBuf}; use tempfile::NamedTempFile; use tokio::{ fs::OpenOptions, - io::AsyncReadExt, runtime::{Builder, Runtime}, task::JoinSet, }; @@ -24,88 +27,123 @@ fn current_rt() -> Box Runtime> { Box::new(|| Builder::new_current_thread().enable_all().build().unwrap()) } +fn rt_combinations() -> Vec Runtime>> { + vec![ + current_rt(), + multi_rt(1), + multi_rt(2), + multi_rt(8), + multi_rt(64), + multi_rt(256), + ] +} + #[test] -fn all_tests() { - let rt_conbination = vec![current_rt(), multi_rt(1), multi_rt(8)]; +fn shutdown_runtime_while_performing_io_uring_ops() { + fn run(rt: Runtime) { + let (tx, rx) = mpsc::channel(); + let (done_tx, done_rx) = mpsc::channel(); + + rt.spawn(async { + let (_tmp, path) = create_tmp_files(1); + let path = path[0].clone(); + + // spawning a bunch of uring operations. + loop { + let path = path.clone(); + tokio::spawn(async move { + let mut opt = OpenOptions::new(); + opt.read(true); + opt.open(&path).await.unwrap(); + }); + + // Avoid busy looping. + tokio::time::sleep(std::time::Duration::from_millis(10)).await; + } + }); + + std::thread::spawn(move || { + let rt: Runtime = rx.recv().unwrap(); + rt.shutdown_background(); + done_tx.send(()).unwrap(); + }); + + tx.send(rt).unwrap(); + done_rx.recv().unwrap(); + } - for rt in rt_conbination { - shutdown_runtime_while_performing_io_uring_ops(rt()); - process_many_files(rt()); + for rt in rt_combinations() { + run(rt()); } } -fn shutdown_runtime_while_performing_io_uring_ops(rt: Runtime) { - let (tx, rx) = mpsc::channel(); - let (done_tx, done_rx) = mpsc::channel(); +#[test] +fn open_many_files() { + fn run(rt: Runtime) { + const NUM_FILES: usize = 512; - rt.spawn(async { - let tmp = NamedTempFile::new().unwrap(); - let path = tmp.path().to_path_buf(); + let (_tmp_files, paths): (Vec, Vec) = create_tmp_files(NUM_FILES); + + rt.block_on(async move { + let mut set = JoinSet::new(); + + for i in 0..10_000 { + let path = paths.get(i % NUM_FILES).unwrap().clone(); + set.spawn(async move { + let _file = OpenOptions::new().read(true).open(path).await.unwrap(); + }); + } + while let Some(Ok(_)) = set.join_next().await {} + }); + } + + for rt in rt_combinations() { + run(rt()); + } +} - let mut set = JoinSet::new(); +#[tokio::test] +async fn cancel_op_future() { + let (_tmp_file, path): (Vec, Vec) = create_tmp_files(1); - // spawning a bunch of uring operations. - loop { - let path = path.clone(); - set.spawn(async move { - let mut opt = OpenOptions::new(); + let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); + let handle = tokio::spawn(async move { + poll_fn(|cx| { + let opt = { + let mut opt = tokio::fs::OpenOptions::new(); opt.read(true); - opt.open(&path).await.unwrap(); - }); - } - }); + opt + }; + + let fut = opt.open(&path[0]); + let res = Box::pin(fut).poll_unpin(cx); - std::thread::spawn(move || { - let rt: Runtime = rx.recv().unwrap(); - rt.shutdown_background(); - done_tx.send(()).unwrap(); + // First poll should be pending. + assert!(res.is_pending(), "Expected the open to be pending"); + + tx.send(()).unwrap(); + + Poll::<()>::Pending + }) + .await; }); - tx.send(rt).unwrap(); + // Wait for the first poll + rx.recv().await.unwrap(); + + handle.abort(); - done_rx.recv().unwrap(); + let res = handle.await.unwrap_err(); + assert!(res.is_cancelled()); } -fn process_many_files(rt: Runtime) { - rt.block_on(async { - const NUM_FILES: usize = 512; - const FILE_SIZE: usize = 64; - - use rand::Rng; - use std::io::Write; - use tempfile::NamedTempFile; - - let mut files = Vec::with_capacity(NUM_FILES); - for _ in 0..NUM_FILES { - let mut tmp = NamedTempFile::new().unwrap(); - let mut data = vec![0u8; FILE_SIZE]; - rand::rng().fill(&mut data[..]); - tmp.write_all(&data).unwrap(); - tmp.flush().unwrap(); - let path = tmp.path().to_path_buf(); - files.push((tmp, data, path)); - } - - let mut handles = Vec::with_capacity(NUM_FILES); - for (tmp, original, path) in files { - handles.push(tokio::spawn(async move { - let _keep_alive = tmp; - - let mut file = tokio::fs::OpenOptions::new() - .read(true) - .open(&path) - .await - .unwrap(); - let mut buf = vec![0u8; FILE_SIZE]; - - file.read_exact(&mut buf).await.unwrap(); - - assert_eq!(buf, original); - })); - } - - for h in handles { - h.await.unwrap(); - } - }); +fn create_tmp_files(num_files: usize) -> (Vec, Vec) { + let mut files = Vec::with_capacity(num_files); + for _ in 0..num_files { + let tmp = NamedTempFile::new().unwrap(); + let path = tmp.path().to_path_buf(); + files.push((tmp, path)); + } + + files.into_iter().unzip() } From 2ec848268f9d57e241729f67b320d2ac8b14f5a3 Mon Sep 17 00:00:00 2001 From: mox692 Date: Thu, 19 Jun 2025 21:33:12 +0900 Subject: [PATCH 13/22] apply reivew: add verification step in CI to check if uring is supported --- .github/workflows/ci.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8dc7bde0164..eef22fe9189 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -359,6 +359,17 @@ jobs: include: - os: ubuntu-latest steps: + - name: check if io-uring is supported in the CI environment + run: | + # Try to read the io-uring setting in the kernel config file. + # https://github.com/torvalds/linux/blob/75f5f23f8787c5e184fcb2fbcd02d8e9317dc5e7/init/Kconfig#L1782-L1789 + CONFIG_FILE="/boot/config-$(uname -r)" + echo "Checking $CONFIG_FILE for io-uring support" + if ! grep -q "CONFIG_IO_URING=y" "$CONFIG_FILE"; then + echo "Error: io_uring is not supported" + exit 1 + fi + - uses: actions/checkout@v4 - name: Install Rust ${{ env.rust_stable }} uses: dtolnay/rust-toolchain@stable From 32638f0e4cb528df2b91251253c0da792c818fd6 Mon Sep 17 00:00:00 2001 From: mox692 Date: Fri, 20 Jun 2025 21:25:50 +0900 Subject: [PATCH 14/22] apply review: remove unreachable code path --- tokio/src/fs/open_options.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index 9b192dba427..fe94a732169 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -489,8 +489,6 @@ impl OpenOptions { pub(super) fn as_inner_mut(&mut self) -> &mut StdOpenOptions { match &mut self.inner { Kind::Std(ref mut opts) => opts, - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(_) => panic!("Should not be called on UringOpenOptions"), } } } From e981341340c8703a6604f14c5b2b11eb0ecd36a7 Mon Sep 17 00:00:00 2001 From: mox692 Date: Mon, 23 Jun 2025 20:23:16 +0900 Subject: [PATCH 15/22] apply review: mention that io_uring is experimental --- tokio/src/fs/open_options.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index fe94a732169..c10f4ee48bc 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -440,6 +440,9 @@ impl OpenOptions { /// To enable `io_uring`, you need to specify the `--cfg tokio_uring` flag /// at compile time and set the `Builder::enable_io_uring` runtime option. /// + /// Support for `io_uring` is currently experimental, so its behavior may + /// change or it may be removed in future versions. + /// /// # Examples /// /// ```no_run From fdba9e27a6f26614403de90b098da233956931ec Mon Sep 17 00:00:00 2001 From: Motoyuki Kimura Date: Wed, 25 Jun 2025 19:46:29 +0900 Subject: [PATCH 16/22] Update tokio/tests/fs_uring.rs Co-authored-by: Qi --- tokio/tests/fs_uring.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tokio/tests/fs_uring.rs b/tokio/tests/fs_uring.rs index db0a3130217..0cd2827d4b6 100644 --- a/tokio/tests/fs_uring.rs +++ b/tokio/tests/fs_uring.rs @@ -118,7 +118,8 @@ async fn cancel_op_future() { let fut = opt.open(&path[0]); let res = Box::pin(fut).poll_unpin(cx); - // First poll should be pending. + // The first poll only submit the 'open' operation + // to the kernel. assert!(res.is_pending(), "Expected the open to be pending"); tx.send(()).unwrap(); From a31cffdcb860b17535756252a13fb5d4227a95ff Mon Sep 17 00:00:00 2001 From: mox692 Date: Fri, 27 Jun 2025 02:57:03 +0900 Subject: [PATCH 17/22] apply review: use task tracker --- tokio/Cargo.toml | 1 + tokio/tests/fs_uring.rs | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index 8444de7a934..66fa4870b4c 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -136,6 +136,7 @@ features = [ [dev-dependencies] tokio-test = { version = "0.4.0", path = "../tokio-test" } tokio-stream = { version = "0.1", path = "../tokio-stream" } +tokio-util = { version = "0.7", path = "../tokio-util", features = ["rt"] } futures = { version = "0.3.0", features = ["async-await"] } mockall = "0.13.0" async-stream = "0.3" diff --git a/tokio/tests/fs_uring.rs b/tokio/tests/fs_uring.rs index 0cd2827d4b6..8880e4d6480 100644 --- a/tokio/tests/fs_uring.rs +++ b/tokio/tests/fs_uring.rs @@ -10,8 +10,8 @@ use tempfile::NamedTempFile; use tokio::{ fs::OpenOptions, runtime::{Builder, Runtime}, - task::JoinSet, }; +use tokio_util::task::TaskTracker; fn multi_rt(n: usize) -> Box Runtime> { Box::new(move || { @@ -85,15 +85,16 @@ fn open_many_files() { let (_tmp_files, paths): (Vec, Vec) = create_tmp_files(NUM_FILES); rt.block_on(async move { - let mut set = JoinSet::new(); + let tracker = TaskTracker::new(); for i in 0..10_000 { let path = paths.get(i % NUM_FILES).unwrap().clone(); - set.spawn(async move { + tracker.spawn(async move { let _file = OpenOptions::new().read(true).open(path).await.unwrap(); }); } - while let Some(Ok(_)) = set.join_next().await {} + tracker.close(); + tracker.wait().await; }); } From dff87112ac7475b55ac81171d29fcd9acb543128 Mon Sep 17 00:00:00 2001 From: mox692 Date: Mon, 30 Jun 2025 20:04:45 +0900 Subject: [PATCH 18/22] apply review: update test to use shutdown_timeout --- tokio/tests/fs_uring.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tokio/tests/fs_uring.rs b/tokio/tests/fs_uring.rs index 8880e4d6480..cd02909f4d6 100644 --- a/tokio/tests/fs_uring.rs +++ b/tokio/tests/fs_uring.rs @@ -5,6 +5,7 @@ use futures::future::FutureExt; use std::sync::mpsc; use std::task::Poll; +use std::time::Duration; use std::{future::poll_fn, path::PathBuf}; use tempfile::NamedTempFile; use tokio::{ @@ -44,8 +45,8 @@ fn shutdown_runtime_while_performing_io_uring_ops() { let (tx, rx) = mpsc::channel(); let (done_tx, done_rx) = mpsc::channel(); - rt.spawn(async { - let (_tmp, path) = create_tmp_files(1); + let (_tmp, path) = create_tmp_files(1); + rt.spawn(async move { let path = path[0].clone(); // spawning a bunch of uring operations. @@ -64,7 +65,7 @@ fn shutdown_runtime_while_performing_io_uring_ops() { std::thread::spawn(move || { let rt: Runtime = rx.recv().unwrap(); - rt.shutdown_background(); + rt.shutdown_timeout(Duration::from_millis(300)); done_tx.send(()).unwrap(); }); From 7237f56a706de998eb37a44e194e852294ed67cb Mon Sep 17 00:00:00 2001 From: mox692 Date: Tue, 1 Jul 2025 21:34:16 +0900 Subject: [PATCH 19/22] runtime: fix handling of cancelled Ops --- tokio/src/runtime/io/driver/uring.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index bd3391eba4f..877b63283fe 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -294,7 +294,10 @@ impl Handle { match mem::replace(lifecycle, Lifecycle::Cancelled(cancel_data)) { Lifecycle::Submitted | Lifecycle::Waiting(_) => (), // The driver saw the completion, but it was never polled. - Lifecycle::Completed(_) => (), + Lifecycle::Completed(_) => { + // We can safely remove the entry from the slab, as it has already been completed. + ops.remove(index); + } prev => panic!("Unexpected state: {:?}", prev), }; } From de2da0598407a52e336894d695c9a6f7943868f7 Mon Sep 17 00:00:00 2001 From: mox692 Date: Tue, 1 Jul 2025 21:47:40 +0900 Subject: [PATCH 20/22] enable ci in sub-feature branch temporary --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eef22fe9189..288f2a4cc52 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ on: push: branches: ["master", "tokio-*.x"] pull_request: - branches: ["master", "tokio-*.x"] + # branches: ["master", "tokio-*.x"] name: CI From 7af790aa21dd55051768150aa53d33512657b37a Mon Sep 17 00:00:00 2001 From: mox692 Date: Tue, 1 Jul 2025 23:37:36 +0900 Subject: [PATCH 21/22] Revert "enable ci in sub-feature branch temporary" This reverts commit de2da0598407a52e336894d695c9a6f7943868f7. --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 288f2a4cc52..eef22fe9189 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -2,7 +2,7 @@ on: push: branches: ["master", "tokio-*.x"] pull_request: - # branches: ["master", "tokio-*.x"] + branches: ["master", "tokio-*.x"] name: CI From 739d533cc035453e2fdb92aa25baf8252f3918b5 Mon Sep 17 00:00:00 2001 From: mox692 Date: Wed, 2 Jul 2025 00:12:37 +0900 Subject: [PATCH 22/22] update base --- .github/workflows/ci.yml | 11 -- tokio/Cargo.toml | 1 - tokio/src/fs/mod.rs | 3 - tokio/src/fs/open_options.rs | 148 ++--------------- .../src/fs/open_options/uring_open_options.rs | 128 --------------- tokio/src/io/mod.rs | 4 - tokio/src/io/uring/mod.rs | 2 - tokio/src/io/uring/open.rs | 53 ------ tokio/src/io/uring/utils.rs | 6 - tokio/src/runtime/builder.rs | 29 ---- tokio/src/runtime/driver/op.rs | 7 +- tokio/src/runtime/io/driver/uring.rs | 7 +- tokio/src/runtime/mod.rs | 2 +- tokio/tests/fs_open_options.rs | 4 +- tokio/tests/fs_uring.rs | 152 ------------------ 15 files changed, 20 insertions(+), 537 deletions(-) delete mode 100644 tokio/src/fs/open_options/uring_open_options.rs delete mode 100644 tokio/src/io/uring/mod.rs delete mode 100644 tokio/src/io/uring/open.rs delete mode 100644 tokio/src/io/uring/utils.rs delete mode 100644 tokio/tests/fs_uring.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eef22fe9189..8dc7bde0164 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -359,17 +359,6 @@ jobs: include: - os: ubuntu-latest steps: - - name: check if io-uring is supported in the CI environment - run: | - # Try to read the io-uring setting in the kernel config file. - # https://github.com/torvalds/linux/blob/75f5f23f8787c5e184fcb2fbcd02d8e9317dc5e7/init/Kconfig#L1782-L1789 - CONFIG_FILE="/boot/config-$(uname -r)" - echo "Checking $CONFIG_FILE for io-uring support" - if ! grep -q "CONFIG_IO_URING=y" "$CONFIG_FILE"; then - echo "Error: io_uring is not supported" - exit 1 - fi - - uses: actions/checkout@v4 - name: Install Rust ${{ env.rust_stable }} uses: dtolnay/rust-toolchain@stable diff --git a/tokio/Cargo.toml b/tokio/Cargo.toml index 66fa4870b4c..8444de7a934 100644 --- a/tokio/Cargo.toml +++ b/tokio/Cargo.toml @@ -136,7 +136,6 @@ features = [ [dev-dependencies] tokio-test = { version = "0.4.0", path = "../tokio-test" } tokio-stream = { version = "0.1", path = "../tokio-stream" } -tokio-util = { version = "0.7", path = "../tokio-util", features = ["rt"] } futures = { version = "0.3.0", features = ["async-await"] } mockall = "0.13.0" async-stream = "0.3" diff --git a/tokio/src/fs/mod.rs b/tokio/src/fs/mod.rs index 7e0c35ba84a..c1855c42aeb 100644 --- a/tokio/src/fs/mod.rs +++ b/tokio/src/fs/mod.rs @@ -237,9 +237,6 @@ pub use self::metadata::metadata; mod open_options; pub use self::open_options::OpenOptions; -cfg_tokio_uring! { - pub(crate) use self::open_options::UringOpenOptions; -} mod read; pub use self::read::read; diff --git a/tokio/src/fs/open_options.rs b/tokio/src/fs/open_options.rs index c10f4ee48bc..e70e6aa0b6f 100644 --- a/tokio/src/fs/open_options.rs +++ b/tokio/src/fs/open_options.rs @@ -3,12 +3,6 @@ use crate::fs::{asyncify, File}; use std::io; use std::path::Path; -cfg_tokio_uring! { - mod uring_open_options; - pub(crate) use uring_open_options::UringOpenOptions; - use crate::runtime::driver::op::Op; -} - #[cfg(test)] mod mock_open_options; #[cfg(test)] @@ -85,16 +79,7 @@ use std::os::windows::fs::OpenOptionsExt; /// } /// ``` #[derive(Clone, Debug)] -pub struct OpenOptions { - inner: Kind, -} - -#[derive(Debug, Clone)] -enum Kind { - Std(StdOpenOptions), - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Uring(UringOpenOptions), -} +pub struct OpenOptions(StdOpenOptions); impl OpenOptions { /// Creates a blank new set of options ready for configuration. @@ -114,12 +99,7 @@ impl OpenOptions { /// let future = options.read(true).open("foo.txt"); /// ``` pub fn new() -> OpenOptions { - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - let inner = Kind::Uring(UringOpenOptions::new()); - #[cfg(not(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux")))] - let inner = Kind::Std(StdOpenOptions::new()); - - OpenOptions { inner } + OpenOptions(StdOpenOptions::new()) } /// Sets the option for read access. @@ -148,15 +128,7 @@ impl OpenOptions { /// } /// ``` pub fn read(&mut self, read: bool) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.read(read); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.read(read); - } - } + self.0.read(read); self } @@ -186,15 +158,7 @@ impl OpenOptions { /// } /// ``` pub fn write(&mut self, write: bool) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.write(write); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.write(write); - } - } + self.0.write(write); self } @@ -253,15 +217,7 @@ impl OpenOptions { /// } /// ``` pub fn append(&mut self, append: bool) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.append(append); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.append(append); - } - } + self.0.append(append); self } @@ -294,15 +250,7 @@ impl OpenOptions { /// } /// ``` pub fn truncate(&mut self, truncate: bool) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.truncate(truncate); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.truncate(truncate); - } - } + self.0.truncate(truncate); self } @@ -338,15 +286,7 @@ impl OpenOptions { /// } /// ``` pub fn create(&mut self, create: bool) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.create(create); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.create(create); - } - } + self.0.create(create); self } @@ -389,15 +329,7 @@ impl OpenOptions { /// } /// ``` pub fn create_new(&mut self, create_new: bool) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.create_new(create_new); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.create_new(create_new); - } - } + self.0.create_new(create_new); self } @@ -434,15 +366,6 @@ impl OpenOptions { /// open files, too long filename, too many symbolic links in the /// specified path (Unix-like systems only), etc. /// - /// # io_uring support - /// - /// On Linux, you can also use `io_uring` for executing system calls. - /// To enable `io_uring`, you need to specify the `--cfg tokio_uring` flag - /// at compile time and set the `Builder::enable_io_uring` runtime option. - /// - /// Support for `io_uring` is currently experimental, so its behavior may - /// change or it may be removed in future versions. - /// /// # Examples /// /// ```no_run @@ -463,36 +386,17 @@ impl OpenOptions { /// [`Other`]: std::io::ErrorKind::Other /// [`PermissionDenied`]: std::io::ErrorKind::PermissionDenied pub async fn open(&self, path: impl AsRef) -> io::Result { - match &self.inner { - Kind::Std(opts) => Self::std_open(opts, path).await, - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - let handle = crate::runtime::Handle::current(); - let driver_handle = handle.inner.driver().io(); - - if driver_handle.check_and_init()? { - Op::open(path.as_ref(), opts)?.await - } else { - let opts = opts.clone().into(); - Self::std_open(&opts, path).await - } - } - } - } - - async fn std_open(opts: &StdOpenOptions, path: impl AsRef) -> io::Result { let path = path.as_ref().to_owned(); - let opts = opts.clone(); + let opts = self.0.clone(); let std = asyncify(move || opts.open(path)).await?; Ok(File::from_std(std)) } - #[cfg(windows)] + /// Returns a mutable reference to the underlying `std::fs::OpenOptions` + #[cfg(any(windows, unix))] pub(super) fn as_inner_mut(&mut self) -> &mut StdOpenOptions { - match &mut self.inner { - Kind::Std(ref mut opts) => opts, - } + &mut self.0 } } @@ -524,15 +428,7 @@ feature! { /// } /// ``` pub fn mode(&mut self, mode: u32) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.mode(mode); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.mode(mode); - } - } + self.as_inner_mut().mode(mode); self } @@ -563,15 +459,7 @@ feature! { /// } /// ``` pub fn custom_flags(&mut self, flags: i32) -> &mut OpenOptions { - match &mut self.inner { - Kind::Std(opts) => { - opts.custom_flags(flags); - } - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - Kind::Uring(opts) => { - opts.custom_flags(flags); - } - } + self.as_inner_mut().custom_flags(flags); self } } @@ -763,13 +651,7 @@ cfg_windows! { impl From for OpenOptions { fn from(options: StdOpenOptions) -> OpenOptions { - OpenOptions { - inner: Kind::Std(options), - // TODO: Add support for converting `StdOpenOptions` to `UringOpenOptions` - // if user enables the `--cfg tokio_uring`. It is blocked by: - // * https://github.com/rust-lang/rust/issues/74943 - // * https://github.com/rust-lang/rust/issues/76801 - } + OpenOptions(options) } } diff --git a/tokio/src/fs/open_options/uring_open_options.rs b/tokio/src/fs/open_options/uring_open_options.rs deleted file mode 100644 index 48297ca3b5b..00000000000 --- a/tokio/src/fs/open_options/uring_open_options.rs +++ /dev/null @@ -1,128 +0,0 @@ -use std::{io, os::unix::fs::OpenOptionsExt}; - -#[cfg(test)] -use super::mock_open_options::MockOpenOptions as StdOpenOptions; -#[cfg(not(test))] -use std::fs::OpenOptions as StdOpenOptions; - -#[derive(Debug, Clone)] -pub(crate) struct UringOpenOptions { - pub(crate) read: bool, - pub(crate) write: bool, - pub(crate) append: bool, - pub(crate) truncate: bool, - pub(crate) create: bool, - pub(crate) create_new: bool, - pub(crate) mode: libc::mode_t, - pub(crate) custom_flags: libc::c_int, -} - -impl UringOpenOptions { - pub(crate) fn new() -> Self { - Self { - read: false, - write: false, - append: false, - truncate: false, - create: false, - create_new: false, - mode: 0o666, - custom_flags: 0, - } - } - - pub(crate) fn append(&mut self, append: bool) -> &mut Self { - self.append = append; - self - } - - pub(crate) fn create(&mut self, create: bool) -> &mut Self { - self.create = create; - self - } - - pub(crate) fn create_new(&mut self, create_new: bool) -> &mut Self { - self.create_new = create_new; - self - } - - pub(crate) fn read(&mut self, read: bool) -> &mut Self { - self.read = read; - self - } - - pub(crate) fn write(&mut self, write: bool) -> &mut Self { - self.write = write; - self - } - - pub(crate) fn truncate(&mut self, truncate: bool) -> &mut Self { - self.truncate = truncate; - self - } - - pub(crate) fn mode(&mut self, mode: u32) -> &mut Self { - self.mode = mode as libc::mode_t; - self - } - - pub(crate) fn custom_flags(&mut self, flags: i32) -> &mut Self { - self.custom_flags = flags; - self - } - - // Equivalent to https://github.com/rust-lang/rust/blob/64c81fd10509924ca4da5d93d6052a65b75418a5/library/std/src/sys/fs/unix.rs#L1118-L1127 - pub(crate) fn access_mode(&self) -> io::Result { - match (self.read, self.write, self.append) { - (true, false, false) => Ok(libc::O_RDONLY), - (false, true, false) => Ok(libc::O_WRONLY), - (true, true, false) => Ok(libc::O_RDWR), - (false, _, true) => Ok(libc::O_WRONLY | libc::O_APPEND), - (true, _, true) => Ok(libc::O_RDWR | libc::O_APPEND), - (false, false, false) => Err(io::Error::from_raw_os_error(libc::EINVAL)), - } - } - - // Equivalent to https://github.com/rust-lang/rust/blob/64c81fd10509924ca4da5d93d6052a65b75418a5/library/std/src/sys/fs/unix.rs#L1129-L1151 - pub(crate) fn creation_mode(&self) -> io::Result { - match (self.write, self.append) { - (true, false) => {} - (false, false) => { - if self.truncate || self.create || self.create_new { - return Err(io::Error::from_raw_os_error(libc::EINVAL)); - } - } - (_, true) => { - if self.truncate && !self.create_new { - return Err(io::Error::from_raw_os_error(libc::EINVAL)); - } - } - } - - Ok(match (self.create, self.truncate, self.create_new) { - (false, false, false) => 0, - (true, false, false) => libc::O_CREAT, - (false, true, false) => libc::O_TRUNC, - (true, true, false) => libc::O_CREAT | libc::O_TRUNC, - (_, _, true) => libc::O_CREAT | libc::O_EXCL, - }) - } -} - -impl From for StdOpenOptions { - fn from(value: UringOpenOptions) -> Self { - let mut std = StdOpenOptions::new(); - - std.append(value.append); - std.create(value.create); - std.create_new(value.create_new); - std.read(value.read); - std.truncate(value.truncate); - std.write(value.write); - - std.mode(value.mode); - std.custom_flags(value.custom_flags); - - std - } -} diff --git a/tokio/src/io/mod.rs b/tokio/src/io/mod.rs index 763a3fabbf4..bfdd1ccfd1c 100644 --- a/tokio/src/io/mod.rs +++ b/tokio/src/io/mod.rs @@ -293,7 +293,3 @@ cfg_io_blocking! { pub(crate) use crate::blocking::JoinHandle as Blocking; } } - -cfg_tokio_uring! { - pub(crate) mod uring; -} diff --git a/tokio/src/io/uring/mod.rs b/tokio/src/io/uring/mod.rs deleted file mode 100644 index e5ac85af604..00000000000 --- a/tokio/src/io/uring/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod open; -pub(crate) mod utils; diff --git a/tokio/src/io/uring/open.rs b/tokio/src/io/uring/open.rs deleted file mode 100644 index 68f434ff174..00000000000 --- a/tokio/src/io/uring/open.rs +++ /dev/null @@ -1,53 +0,0 @@ -use super::utils::cstr; -use crate::{ - fs::UringOpenOptions, - runtime::driver::op::{CancelData, Cancellable, Completable, CqeResult, Op}, -}; -use io_uring::{opcode, types}; -use std::{ffi::CString, io, os::fd::FromRawFd, path::Path}; - -#[derive(Debug)] -pub(crate) struct Open { - /// This field will be read by the kernel during the operation, so we - /// need to ensure it is valid for the entire duration of the operation. - #[allow(dead_code)] - path: CString, -} - -impl Completable for Open { - type Output = crate::fs::File; - fn complete(self, cqe: CqeResult) -> io::Result { - let fd = cqe.result? as i32; - let file = unsafe { crate::fs::File::from_raw_fd(fd) }; - Ok(file) - } -} - -impl Cancellable for Open { - fn cancel(self) -> CancelData { - CancelData::Open(self) - } -} - -impl Op { - /// Submit a request to open a file. - pub(crate) fn open(path: &Path, options: &UringOpenOptions) -> io::Result> { - let inner_opt = options; - let path = cstr(path)?; - - let custom_flags = inner_opt.custom_flags; - let flags = libc::O_CLOEXEC - | options.access_mode()? - | options.creation_mode()? - | (custom_flags & !libc::O_ACCMODE); - - let open_op = opcode::OpenAt::new(types::Fd(libc::AT_FDCWD), path.as_ptr()) - .flags(flags) - .mode(inner_opt.mode) - .build(); - - // SAFETY: Parameters are valid for the entire duration of the operation - let op = unsafe { Op::new(open_op, Open { path }) }; - Ok(op) - } -} diff --git a/tokio/src/io/uring/utils.rs b/tokio/src/io/uring/utils.rs deleted file mode 100644 index e30e7a5ddc4..00000000000 --- a/tokio/src/io/uring/utils.rs +++ /dev/null @@ -1,6 +0,0 @@ -use std::os::unix::ffi::OsStrExt; -use std::{ffi::CString, io, path::Path}; - -pub(crate) fn cstr(p: &Path) -> io::Result { - Ok(CString::new(p.as_os_str().as_bytes())?) -} diff --git a/tokio/src/runtime/builder.rs b/tokio/src/runtime/builder.rs index 76800296a27..a0207b3a046 100644 --- a/tokio/src/runtime/builder.rs +++ b/tokio/src/runtime/builder.rs @@ -338,10 +338,6 @@ impl Builder { all(unix, feature = "signal") ))] self.enable_io(); - - #[cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux",))] - self.enable_io_uring(); - #[cfg(feature = "time")] self.enable_time(); @@ -1582,31 +1578,6 @@ cfg_time! { } } -cfg_tokio_uring! { - impl Builder { - /// Enables the tokio's io_uring driver. - /// - /// Doing this enables using io_uring operations on the runtime. - /// - /// # Examples - /// - /// ``` - /// use tokio::runtime; - /// - /// let rt = runtime::Builder::new_multi_thread() - /// .enable_io_uring() - /// .build() - /// .unwrap(); - /// ``` - #[cfg_attr(docsrs, doc(cfg(tokio_uring)))] - pub fn enable_io_uring(&mut self) -> &mut Self { - // Currently, the uring flag is equivalent to `enable_io`. - self.enable_io = true; - self - } - } -} - cfg_test_util! { impl Builder { /// Controls if the runtime's clock starts paused or advancing. diff --git a/tokio/src/runtime/driver/op.rs b/tokio/src/runtime/driver/op.rs index af1c7e139aa..40a135d744b 100644 --- a/tokio/src/runtime/driver/op.rs +++ b/tokio/src/runtime/driver/op.rs @@ -1,4 +1,3 @@ -use crate::io::uring::open::Open; use crate::runtime::Handle; use io_uring::cqueue; use io_uring::squeue::Entry; @@ -9,11 +8,8 @@ use std::task::Poll; use std::task::Waker; use std::{io, mem}; -#[allow(dead_code)] #[derive(Debug)] -pub(crate) enum CancelData { - Open(Open), -} +pub(crate) enum CancelData {} #[derive(Debug)] pub(crate) enum Lifecycle { @@ -25,7 +21,6 @@ pub(crate) enum Lifecycle { /// The submitter no longer has interest in the operation result. The state /// must be passed to the driver and held until the operation completes. - #[allow(dead_code)] Cancelled(CancelData), /// The operation has completed with a single cqe result diff --git a/tokio/src/runtime/io/driver/uring.rs b/tokio/src/runtime/io/driver/uring.rs index 877b63283fe..3370164d1fe 100644 --- a/tokio/src/runtime/io/driver/uring.rs +++ b/tokio/src/runtime/io/driver/uring.rs @@ -15,7 +15,7 @@ const DEFAULT_RING_SIZE: u32 = 256; #[repr(usize)] #[derive(Debug, PartialEq, Eq)] -pub(crate) enum State { +enum State { Uninitialized = 0, Initialized = 1, Unsupported = 2, @@ -265,11 +265,6 @@ impl Handle { submit_or_remove(ctx)?; } - // Ensure that the completion queue is not full before submitting the entry. - while ctx.ring_mut().completion().is_full() { - ctx.dispatch_completions(); - } - // Note: For now, we submit the entry immediately without utilizing batching. submit_or_remove(ctx)?; diff --git a/tokio/src/runtime/mod.rs b/tokio/src/runtime/mod.rs index 031fde5d0b0..78a0114f48e 100644 --- a/tokio/src/runtime/mod.rs +++ b/tokio/src/runtime/mod.rs @@ -323,7 +323,7 @@ pub(crate) mod context; pub(crate) mod park; -pub(crate) mod driver; +mod driver; pub(crate) mod scheduler; diff --git a/tokio/tests/fs_open_options.rs b/tokio/tests/fs_open_options.rs index 58982d679df..58d7de647e2 100644 --- a/tokio/tests/fs_open_options.rs +++ b/tokio/tests/fs_open_options.rs @@ -58,7 +58,7 @@ async fn open_options_mode() { let mode = format!("{:?}", OpenOptions::new().mode(0o644)); // TESTING HACK: use Debug output to check the stored data assert!( - mode.contains("mode: 420") || mode.contains("mode: 0o000644"), + mode.contains("mode: 420 ") || mode.contains("mode: 0o000644 "), "mode is: {mode}" ); } @@ -69,7 +69,7 @@ async fn open_options_custom_flags_linux() { // TESTING HACK: use Debug output to check the stored data assert!( format!("{:?}", OpenOptions::new().custom_flags(libc::O_TRUNC)) - .contains("custom_flags: 512") + .contains("custom_flags: 512,") ); } diff --git a/tokio/tests/fs_uring.rs b/tokio/tests/fs_uring.rs deleted file mode 100644 index cd02909f4d6..00000000000 --- a/tokio/tests/fs_uring.rs +++ /dev/null @@ -1,152 +0,0 @@ -//! Uring file operations tests. - -#![cfg(all(tokio_uring, feature = "rt", feature = "fs", target_os = "linux"))] - -use futures::future::FutureExt; -use std::sync::mpsc; -use std::task::Poll; -use std::time::Duration; -use std::{future::poll_fn, path::PathBuf}; -use tempfile::NamedTempFile; -use tokio::{ - fs::OpenOptions, - runtime::{Builder, Runtime}, -}; -use tokio_util::task::TaskTracker; - -fn multi_rt(n: usize) -> Box Runtime> { - Box::new(move || { - Builder::new_multi_thread() - .worker_threads(n) - .enable_all() - .build() - .unwrap() - }) -} - -fn current_rt() -> Box Runtime> { - Box::new(|| Builder::new_current_thread().enable_all().build().unwrap()) -} - -fn rt_combinations() -> Vec Runtime>> { - vec![ - current_rt(), - multi_rt(1), - multi_rt(2), - multi_rt(8), - multi_rt(64), - multi_rt(256), - ] -} - -#[test] -fn shutdown_runtime_while_performing_io_uring_ops() { - fn run(rt: Runtime) { - let (tx, rx) = mpsc::channel(); - let (done_tx, done_rx) = mpsc::channel(); - - let (_tmp, path) = create_tmp_files(1); - rt.spawn(async move { - let path = path[0].clone(); - - // spawning a bunch of uring operations. - loop { - let path = path.clone(); - tokio::spawn(async move { - let mut opt = OpenOptions::new(); - opt.read(true); - opt.open(&path).await.unwrap(); - }); - - // Avoid busy looping. - tokio::time::sleep(std::time::Duration::from_millis(10)).await; - } - }); - - std::thread::spawn(move || { - let rt: Runtime = rx.recv().unwrap(); - rt.shutdown_timeout(Duration::from_millis(300)); - done_tx.send(()).unwrap(); - }); - - tx.send(rt).unwrap(); - done_rx.recv().unwrap(); - } - - for rt in rt_combinations() { - run(rt()); - } -} - -#[test] -fn open_many_files() { - fn run(rt: Runtime) { - const NUM_FILES: usize = 512; - - let (_tmp_files, paths): (Vec, Vec) = create_tmp_files(NUM_FILES); - - rt.block_on(async move { - let tracker = TaskTracker::new(); - - for i in 0..10_000 { - let path = paths.get(i % NUM_FILES).unwrap().clone(); - tracker.spawn(async move { - let _file = OpenOptions::new().read(true).open(path).await.unwrap(); - }); - } - tracker.close(); - tracker.wait().await; - }); - } - - for rt in rt_combinations() { - run(rt()); - } -} - -#[tokio::test] -async fn cancel_op_future() { - let (_tmp_file, path): (Vec, Vec) = create_tmp_files(1); - - let (tx, mut rx) = tokio::sync::mpsc::unbounded_channel(); - let handle = tokio::spawn(async move { - poll_fn(|cx| { - let opt = { - let mut opt = tokio::fs::OpenOptions::new(); - opt.read(true); - opt - }; - - let fut = opt.open(&path[0]); - let res = Box::pin(fut).poll_unpin(cx); - - // The first poll only submit the 'open' operation - // to the kernel. - assert!(res.is_pending(), "Expected the open to be pending"); - - tx.send(()).unwrap(); - - Poll::<()>::Pending - }) - .await; - }); - - // Wait for the first poll - rx.recv().await.unwrap(); - - handle.abort(); - - let res = handle.await.unwrap_err(); - assert!(res.is_cancelled()); -} - -fn create_tmp_files(num_files: usize) -> (Vec, Vec) { - let mut files = Vec::with_capacity(num_files); - for _ in 0..num_files { - let tmp = NamedTempFile::new().unwrap(); - let path = tmp.path().to_path_buf(); - files.push((tmp, path)); - } - - files.into_iter().unzip() -}