-
Notifications
You must be signed in to change notification settings - Fork 654
Support preadv2 and pwritev2 syscalls. #4066
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3797,7 +3797,8 @@ static Switchable did_emulate_read(int syscallno, RecordTask* t, | |
| { | ||
| syscall_state.emulate_result(result); | ||
| record_ranges(t, ranges, result); | ||
| if (syscallno == Arch::pread64 || syscallno == Arch::preadv || result <= 0) { | ||
| if (syscallno == Arch::pread64 || syscallno == Arch::preadv || | ||
| syscallno == Arch::preadv2 || result <= 0) { | ||
| // Don't perform this syscall. | ||
| Registers r = t->regs(); | ||
| r.set_arg1(-1); | ||
|
|
@@ -3823,6 +3824,42 @@ static ParamSize select_param_size(intptr_t nfds, SupportedArch arch) { | |
| return ParamSize(size); | ||
| } | ||
|
|
||
| // RWF_* flags we know rr records correctly. Reject unknown flags so | ||
| // that a future kernel addition whose semantics break rr (e.g. affecting | ||
| // the offset or bytes we record) cannot silently go wrong; the tracee | ||
| // simply sees EINVAL as if running on an older kernel. | ||
| // RWF_APPEND is excluded from the write mask: when set, the kernel | ||
| // ignores the user's offset and uses/updates the current file position, | ||
| // but FileMonitor::retrieve_offset would compute the explicit offset | ||
| // argument and get it wrong. | ||
| enum { | ||
| RR_RWF_HIPRI = 0x00000001, | ||
| RR_RWF_DSYNC = 0x00000002, | ||
| RR_RWF_SYNC = 0x00000004, | ||
| RR_RWF_NOWAIT = 0x00000008, | ||
| RR_RWF_APPEND = 0x00000010, | ||
| RR_RWF_NOAPPEND = 0x00000020, | ||
| RR_RWF_ATOMIC = 0x00000040, | ||
| RR_RWF_DONTCACHE = 0x00000080, | ||
| }; | ||
| static const uint32_t RR_KNOWN_PREADV2_FLAGS = | ||
| RR_RWF_HIPRI | RR_RWF_DSYNC | RR_RWF_SYNC | RR_RWF_NOWAIT | | ||
| RR_RWF_APPEND | RR_RWF_NOAPPEND | RR_RWF_ATOMIC | RR_RWF_DONTCACHE; | ||
| static const uint32_t RR_KNOWN_PWRITEV2_FLAGS = | ||
| RR_KNOWN_PREADV2_FLAGS & ~RR_RWF_APPEND; | ||
|
|
||
| template <typename Arch> | ||
| static Switchable reject_preadv2_pwritev2(RecordTask* t, | ||
| TaskSyscallState& syscall_state) { | ||
| syscall_state.emulate_result(-EINVAL); | ||
| // Point fd at -1 so the kernel short-circuits the syscall with -EBADF; | ||
| // emulate_result overrides the tracee-visible result to -EINVAL. | ||
| Registers r = t->regs(); | ||
| r.set_arg1(-1); | ||
| t->set_regs(r); | ||
| return PREVENT_SWITCH; | ||
| } | ||
|
|
||
| template <typename Arch> | ||
| static Switchable rec_prepare_syscall_arch(RecordTask* t, | ||
| TaskSyscallState& syscall_state, | ||
|
|
@@ -4556,7 +4593,14 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, | |
| case Arch::readv: | ||
| /* ssize_t preadv(int fd, const struct iovec *iov, int iovcnt, | ||
| off_t offset); */ | ||
| case Arch::preadv: { | ||
| case Arch::preadv: | ||
| /* ssize_t preadv2(int fd, const struct iovec *iov, int iovcnt, | ||
| off_t offset, int flags); */ | ||
| case Arch::preadv2: { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's possible that some future |
||
| if (syscallno == Arch::preadv2 && | ||
| ((uint32_t)regs.arg6() & ~RR_KNOWN_PREADV2_FLAGS)) { | ||
| return reject_preadv2_pwritev2<Arch>(t, syscall_state); | ||
| } | ||
| int fd = (int)regs.arg1_signed(); | ||
| int iovcnt = (int)regs.arg3_signed(); | ||
| remote_ptr<void> iovecsp_void = syscall_state.reg_parameter( | ||
|
|
@@ -4583,6 +4627,15 @@ static Switchable rec_prepare_syscall_arch(RecordTask* t, | |
| return ALLOW_SWITCH; | ||
| } | ||
|
|
||
| /* ssize_t pwritev2(int fd, const struct iovec *iov, int iovcnt, | ||
| off_t offset, int flags); */ | ||
| case Arch::pwritev2: { | ||
| if ((uint32_t)regs.arg6() & ~RR_KNOWN_PWRITEV2_FLAGS) { | ||
| return reject_preadv2_pwritev2<Arch>(t, syscall_state); | ||
| } | ||
| return ALLOW_SWITCH; | ||
| } | ||
|
|
||
| /* pid_t waitpid(pid_t pid, int *status, int options); */ | ||
| /* pid_t wait4(pid_t pid, int *status, int options, struct rusage | ||
| * *rusage); | ||
|
|
@@ -7282,6 +7335,8 @@ static void rec_process_syscall_arch(RecordTask* t, | |
| case Arch::pkey_mprotect: | ||
| case Arch::pread64: | ||
| case Arch::preadv: | ||
| case Arch::preadv2: | ||
| case Arch::pwritev2: | ||
| case Arch::ptrace: | ||
| case Arch::read: | ||
| case Arch::readv: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the
RWF_APPENDflag is set then the kernel ignores the user's offset, but we'll compute an incorrect offset below and things will go wrong. (I think we already have an existing bug if the user passesO_APPENDtoopen(); in that casepwritevwill ignore the offset and we compute the wrong offset.)So let's do what I said above and specify a mask of flags we know we can handle correctly --- I think that's everything currently defined except for
RWF_APPEND--- and returnEINVALif any other flags are set. That will require adding code inrecord_syscall.ccforpwritev2.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for taking a look! Fixed