Skip to content

Commit 2fed57d

Browse files
committed
Use mio to replace Epoll
Epoll is linux-specific. So we use mio, which is a cross-platform event notification, to replace Epoll. Signed-off-by: Wenyu Huang <[email protected]>
1 parent cfff91e commit 2fed57d

File tree

7 files changed

+137
-82
lines changed

7 files changed

+137
-82
lines changed

vhost-user-backend/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
### Added
66
### Changed
7+
- [[316]](https://github.com/rust-vmm/vhost/pull/316) Use mio to replace Epoll. Expose event_loop::EventSet.
8+
79
### Deprecated
810
### Fixed
911

vhost-user-backend/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ postcopy = ["vhost/postcopy", "userfaultfd"]
1515
[dependencies]
1616
libc = "0.2.39"
1717
log = "0.4.17"
18+
mio = { version = "1.0.4", features = ["os-poll", "os-ext"] }
1819
userfaultfd = { version = "0.8.1", optional = true }
1920
vhost = { path = "../vhost", version = "0.14.0", features = ["vhost-user-backend"] }
2021
virtio-bindings = { workspace = true }

vhost-user-backend/src/backend.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@ use vhost::vhost_user::message::{
2929
};
3030
use vhost::vhost_user::Backend;
3131
use vm_memory::bitmap::Bitmap;
32-
use vmm_sys_util::epoll::EventSet;
3332
use vmm_sys_util::eventfd::EventFd;
3433

3534
use vhost::vhost_user::GpuBackend;
3635

36+
use crate::EventSet;
37+
3738
use super::vring::VringT;
3839
use super::GM;
3940

@@ -793,7 +794,7 @@ pub mod tests {
793794

794795
let vring = VringRwLock::new(mem, 0x1000).unwrap();
795796
backend
796-
.handle_event(0x1, EventSet::IN, &[vring], 0)
797+
.handle_event(0x1, EventSet::Readable, &[vring], 0)
797798
.unwrap();
798799

799800
backend.reset_device();

vhost-user-backend/src/event_loop.rs

Lines changed: 124 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -3,52 +3,88 @@
33
//
44
// SPDX-License-Identifier: Apache-2.0
55

6+
use std::collections::HashSet;
67
use std::fmt::{Display, Formatter};
78
use std::io::{self, Result};
89
use std::marker::PhantomData;
910
use std::os::unix::io::{AsRawFd, RawFd};
11+
use std::sync::Mutex;
1012

11-
use vmm_sys_util::epoll::{ControlOperation, Epoll, EpollEvent, EventSet};
13+
use mio::event::Event;
14+
use mio::unix::SourceFd;
15+
use mio::{Events, Interest, Poll, Registry, Token};
1216
use vmm_sys_util::eventfd::EventFd;
1317

1418
use super::backend::VhostUserBackend;
1519
use super::vring::VringT;
1620

1721
/// Errors related to vring epoll event handling.
1822
#[derive(Debug)]
19-
pub enum VringEpollError {
23+
pub enum VringPollError {
2024
/// Failed to create epoll file descriptor.
21-
EpollCreateFd(io::Error),
25+
PollerCreate(io::Error),
2226
/// Failed while waiting for events.
23-
EpollWait(io::Error),
27+
PollerWait(io::Error),
2428
/// Could not register exit event
2529
RegisterExitEvent(io::Error),
2630
/// Failed to read the event from kick EventFd.
2731
HandleEventReadKick(io::Error),
2832
/// Failed to handle the event from the backend.
2933
HandleEventBackendHandling(io::Error),
34+
/// Failed to clone registry.
35+
RegistryClone(io::Error),
3036
}
3137

32-
impl Display for VringEpollError {
38+
impl Display for VringPollError {
3339
fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
3440
match self {
35-
VringEpollError::EpollCreateFd(e) => write!(f, "cannot create epoll fd: {e}"),
36-
VringEpollError::EpollWait(e) => write!(f, "failed to wait for epoll event: {e}"),
37-
VringEpollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"),
38-
VringEpollError::HandleEventReadKick(e) => {
41+
VringPollError::PollerCreate(e) => write!(f, "cannot create poller: {e}"),
42+
VringPollError::PollerWait(e) => write!(f, "failed to wait for poller event: {e}"),
43+
VringPollError::RegisterExitEvent(e) => write!(f, "cannot register exit event: {e}"),
44+
VringPollError::HandleEventReadKick(e) => {
3945
write!(f, "cannot read vring kick event: {e}")
4046
}
41-
VringEpollError::HandleEventBackendHandling(e) => {
42-
write!(f, "failed to handle epoll event: {e}")
47+
VringPollError::HandleEventBackendHandling(e) => {
48+
write!(f, "failed to handle poll event: {e}")
4349
}
50+
VringPollError::RegistryClone(e) => write!(f, "cannot clone poller's registry: {e}"),
4451
}
4552
}
4653
}
4754

48-
impl std::error::Error for VringEpollError {}
55+
impl std::error::Error for VringPollError {}
4956

5057
/// Result of vring epoll operations.
51-
pub type VringEpollResult<T> = std::result::Result<T, VringEpollError>;
58+
pub type VringEpollResult<T> = std::result::Result<T, VringPollError>;
59+
60+
pub enum EventSet {
61+
Readable,
62+
Writable,
63+
All,
64+
}
65+
66+
impl EventSet {
67+
fn to_interest(&self) -> Interest {
68+
match self {
69+
EventSet::Readable => Interest::READABLE,
70+
EventSet::Writable => Interest::WRITABLE,
71+
EventSet::All => Interest::READABLE | Interest::WRITABLE,
72+
}
73+
}
74+
}
75+
76+
fn event_to_event_set(evt: &Event) -> Option<EventSet> {
77+
if evt.is_readable() && evt.is_writable() {
78+
return Some(EventSet::All);
79+
}
80+
if evt.is_readable() {
81+
return Some(EventSet::Readable);
82+
}
83+
if evt.is_writable() {
84+
return Some(EventSet::Writable);
85+
}
86+
None
87+
}
5288

5389
/// Epoll event handler to manage and process epoll events for registered file descriptor.
5490
///
@@ -57,7 +93,11 @@ pub type VringEpollResult<T> = std::result::Result<T, VringEpollError>;
5793
/// - remove registered file descriptors from the epoll fd
5894
/// - run the event loop to handle pending events on the epoll fd
5995
pub struct VringEpollHandler<T: VhostUserBackend> {
60-
epoll: Epoll,
96+
poller: Mutex<Poll>,
97+
registry: Registry,
98+
// Record the registered fd.
99+
// Because in mio, consecutive calls to register is unspecified behavior.
100+
fd_set: Mutex<HashSet<RawFd>>,
61101
backend: T,
62102
vrings: Vec<T::Vring>,
63103
thread_id: usize,
@@ -84,22 +124,32 @@ where
84124
vrings: Vec<T::Vring>,
85125
thread_id: usize,
86126
) -> VringEpollResult<Self> {
87-
let epoll = Epoll::new().map_err(VringEpollError::EpollCreateFd)?;
127+
let poller = Poll::new().map_err(VringPollError::PollerCreate)?;
88128
let exit_event_fd = backend.exit_event(thread_id);
129+
let fd_set = Mutex::new(HashSet::new());
89130

131+
let registry = poller
132+
.registry()
133+
.try_clone()
134+
.map_err(VringPollError::RegistryClone)?;
90135
if let Some(exit_event_fd) = &exit_event_fd {
91136
let id = backend.num_queues();
92-
epoll
93-
.ctl(
94-
ControlOperation::Add,
95-
exit_event_fd.as_raw_fd(),
96-
EpollEvent::new(EventSet::IN, id as u64),
137+
138+
registry
139+
.register(
140+
&mut SourceFd(&exit_event_fd.as_raw_fd()),
141+
Token(id),
142+
Interest::READABLE,
97143
)
98-
.map_err(VringEpollError::RegisterExitEvent)?;
144+
.map_err(VringPollError::RegisterExitEvent)?;
145+
146+
fd_set.lock().unwrap().insert(exit_event_fd.as_raw_fd());
99147
}
100148

101149
Ok(VringEpollHandler {
102-
epoll,
150+
poller: Mutex::new(poller),
151+
registry,
152+
fd_set,
103153
backend,
104154
vrings,
105155
thread_id,
@@ -135,13 +185,31 @@ where
135185
}
136186

137187
pub(crate) fn register_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> {
138-
self.epoll
139-
.ctl(ControlOperation::Add, fd, EpollEvent::new(ev_type, data))
188+
let mut fd_set = self.fd_set.lock().unwrap();
189+
if fd_set.contains(&fd) {
190+
return Err(io::Error::from_raw_os_error(libc::EEXIST));
191+
}
192+
self.registry
193+
.register(
194+
&mut SourceFd(&fd),
195+
Token(data as usize),
196+
ev_type.to_interest(),
197+
)
198+
.map_err(std::io::Error::other)?;
199+
fd_set.insert(fd);
200+
Ok(())
140201
}
141202

142-
pub(crate) fn unregister_event(&self, fd: RawFd, ev_type: EventSet, data: u64) -> Result<()> {
143-
self.epoll
144-
.ctl(ControlOperation::Delete, fd, EpollEvent::new(ev_type, data))
203+
pub(crate) fn unregister_event(&self, fd: RawFd, _ev_type: EventSet, _data: u64) -> Result<()> {
204+
let mut fd_set = self.fd_set.lock().unwrap();
205+
if !fd_set.contains(&fd) {
206+
return Err(io::Error::from_raw_os_error(libc::ENOENT));
207+
}
208+
self.registry
209+
.deregister(&mut SourceFd(&fd))
210+
.map_err(|e| std::io::Error::other(format!("Failed to deregister fd {}: {}", fd, e)))?;
211+
fd_set.remove(&fd);
212+
Ok(())
145213
}
146214

147215
/// Run the event poll loop to handle all pending events on registered fds.
@@ -150,49 +218,30 @@ where
150218
/// associated with the backend.
151219
pub(crate) fn run(&self) -> VringEpollResult<()> {
152220
const EPOLL_EVENTS_LEN: usize = 100;
153-
let mut events = vec![EpollEvent::new(EventSet::empty(), 0); EPOLL_EVENTS_LEN];
154-
155-
'epoll: loop {
156-
let num_events = match self.epoll.wait(-1, &mut events[..]) {
157-
Ok(res) => res,
158-
Err(e) => {
159-
if e.kind() == io::ErrorKind::Interrupted {
160-
// It's well defined from the epoll_wait() syscall
161-
// documentation that the epoll loop can be interrupted
162-
// before any of the requested events occurred or the
163-
// timeout expired. In both those cases, epoll_wait()
164-
// returns an error of type EINTR, but this should not
165-
// be considered as a regular error. Instead it is more
166-
// appropriate to retry, by calling into epoll_wait().
167-
continue;
168-
}
169-
return Err(VringEpollError::EpollWait(e));
170-
}
171-
};
172-
173-
for event in events.iter().take(num_events) {
174-
let evset = match EventSet::from_bits(event.events) {
175-
Some(evset) => evset,
176-
None => {
177-
let evbits = event.events;
178-
println!("epoll: ignoring unknown event set: 0x{evbits:x}");
179-
continue;
180-
}
181-
};
182221

183-
let ev_type = event.data() as u16;
222+
let mut events = Events::with_capacity(EPOLL_EVENTS_LEN);
223+
'poll: loop {
224+
self.poller
225+
.lock()
226+
.unwrap()
227+
.poll(&mut events, None)
228+
.map_err(VringPollError::PollerWait)?;
184229

185-
// handle_event() returns true if an event is received from the exit event fd.
186-
if self.handle_event(ev_type, evset)? {
187-
break 'epoll;
230+
for event in events.iter() {
231+
let token = event.token();
232+
233+
if let Some(evt_set) = event_to_event_set(event) {
234+
if self.handle_event(token.0 as u16, evt_set)? {
235+
break 'poll;
236+
}
188237
}
189238
}
190239
}
191240

192241
Ok(())
193242
}
194243

195-
fn handle_event(&self, device_event: u16, evset: EventSet) -> VringEpollResult<bool> {
244+
fn handle_event(&self, device_event: u16, event: EventSet) -> VringEpollResult<bool> {
196245
if self.exit_event_fd.is_some() && device_event as usize == self.backend.num_queues() {
197246
return Ok(true);
198247
}
@@ -201,7 +250,7 @@ where
201250
let vring = &self.vrings[device_event as usize];
202251
let enabled = vring
203252
.read_kick()
204-
.map_err(VringEpollError::HandleEventReadKick)?;
253+
.map_err(VringPollError::HandleEventReadKick)?;
205254

206255
// If the vring is not enabled, it should not be processed.
207256
if !enabled {
@@ -210,16 +259,16 @@ where
210259
}
211260

212261
self.backend
213-
.handle_event(device_event, evset, &self.vrings, self.thread_id)
214-
.map_err(VringEpollError::HandleEventBackendHandling)?;
262+
.handle_event(device_event, event, &self.vrings, self.thread_id)
263+
.map_err(VringPollError::HandleEventBackendHandling)?;
215264

216265
Ok(false)
217266
}
218267
}
219268

220269
impl<T: VhostUserBackend> AsRawFd for VringEpollHandler<T> {
221270
fn as_raw_fd(&self) -> RawFd {
222-
self.epoll.as_raw_fd()
271+
self.poller.lock().unwrap().as_raw_fd()
223272
}
224273
}
225274

@@ -244,29 +293,32 @@ mod tests {
244293

245294
let eventfd = EventFd::new(0).unwrap();
246295
handler
247-
.register_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
296+
.register_listener(eventfd.as_raw_fd(), EventSet::Readable, 3)
248297
.unwrap();
249298
// Register an already registered fd.
250299
handler
251-
.register_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
300+
.register_listener(eventfd.as_raw_fd(), EventSet::Readable, 3)
252301
.unwrap_err();
253302
// Register an invalid data.
254303
handler
255-
.register_listener(eventfd.as_raw_fd(), EventSet::IN, 1)
304+
.register_listener(eventfd.as_raw_fd(), EventSet::Readable, 1)
256305
.unwrap_err();
257306

258307
handler
259-
.unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
308+
.unregister_listener(eventfd.as_raw_fd(), EventSet::Readable, 3)
260309
.unwrap();
261310
// unregister an already unregistered fd.
262311
handler
263-
.unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 3)
312+
.unregister_listener(eventfd.as_raw_fd(), EventSet::Readable, 3)
264313
.unwrap_err();
265314
// unregister an invalid data.
266315
handler
267-
.unregister_listener(eventfd.as_raw_fd(), EventSet::IN, 1)
316+
.unregister_listener(eventfd.as_raw_fd(), EventSet::Readable, 1)
268317
.unwrap_err();
269318
// Check we retrieve the correct file descriptor
270-
assert_eq!(handler.as_raw_fd(), handler.epoll.as_raw_fd());
319+
assert_eq!(
320+
handler.as_raw_fd(),
321+
handler.poller.lock().unwrap().as_raw_fd()
322+
);
271323
}
272324
}

0 commit comments

Comments
 (0)