Skip to content

Commit a53cecf

Browse files
AnilAltinaygvisor-bot
authored andcommitted
Fix leaking /proc/self/mem of a new process after execve.
memFD is linked with the Task instead of MemoryManager. At the time of read/write, it uses Task's MemoryManager. However, it should have used Task's MemoryManager from the time when we do open. To fix it, we store the MemoryManager at the time of opening /proc/self/mem. Added the reproducer as a test. PiperOrigin-RevId: 760855949
1 parent 094dd52 commit a53cecf

File tree

3 files changed

+134
-8
lines changed

3 files changed

+134
-8
lines changed

pkg/sentry/fsimpl/proc/task_files.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,9 @@ func (f *memInode) Open(ctx context.Context, rp *vfs.ResolvingPath, d *kernfs.De
453453
if err := checkTaskState(f.task); err != nil {
454454
return nil, err
455455
}
456-
fd := &memFD{}
456+
fd := &memFD{
457+
mm: getMM(f.task),
458+
}
457459
if err := fd.Init(rp.Mount(), d, f, opts.Flags); err != nil {
458460
return nil, err
459461
}
@@ -476,7 +478,7 @@ type memFD struct {
476478
vfs.LockFD
477479

478480
inode *memInode
479-
481+
mm *mm.MemoryManager
480482
// mu guards the fields below.
481483
mu sync.Mutex `state:"nosave"`
482484
offset int64
@@ -515,9 +517,9 @@ func (fd *memFD) PWrite(ctx context.Context, src usermem.IOSequence, offset int6
515517
if src.NumBytes() == 0 {
516518
return 0, nil
517519
}
518-
m, err := getMMIncRef(fd.inode.task)
519-
if err != nil {
520-
return 0, err
520+
m := fd.mm
521+
if m == nil || !m.IncUsers() {
522+
return 0, nil
521523
}
522524
defer m.DecUsers(ctx)
523525
// Buffer the write data because of MM locks
@@ -542,9 +544,9 @@ func (fd *memFD) PRead(ctx context.Context, dst usermem.IOSequence, offset int64
542544
if dst.NumBytes() == 0 {
543545
return 0, nil
544546
}
545-
m, err := getMMIncRef(fd.inode.task)
546-
if err != nil {
547-
return 0, err
547+
m := fd.mm
548+
if m == nil || !m.IncUsers() {
549+
return 0, nil
548550
}
549551
defer m.DecUsers(ctx)
550552
// Buffer the read data because of MM locks

test/syscalls/linux/BUILD

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,13 +767,18 @@ cc_binary(
767767
deps = select_gtest() + [
768768
"//test/util:file_descriptor",
769769
"//test/util:fs_util",
770+
"//test/util:logging",
771+
"//test/util:memory_util",
770772
"//test/util:multiprocess_util",
771773
"//test/util:posix_error",
774+
"//test/util:save_util",
772775
"//test/util:temp_path",
773776
"//test/util:test_util",
774777
"//test/util:thread_util",
775778
"@com_google_absl//absl/strings",
779+
"@com_google_absl//absl/strings:str_format",
776780
"@com_google_absl//absl/synchronization",
781+
"@com_google_absl//absl/time",
777782
"@com_google_absl//absl/types:optional",
778783
],
779784
)

test/syscalls/linux/exec.cc

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <errno.h>
1818
#include <fcntl.h>
1919
#include <sys/eventfd.h>
20+
#include <sys/mman.h>
2021
#include <sys/ptrace.h>
2122
#include <sys/resource.h>
2223
#include <sys/time.h>
@@ -33,15 +34,20 @@
3334
#include "gmock/gmock.h"
3435
#include "gtest/gtest.h"
3536
#include "absl/strings/match.h"
37+
#include "absl/strings/numbers.h"
3638
#include "absl/strings/str_cat.h"
39+
#include "absl/strings/str_format.h"
3740
#include "absl/strings/str_split.h"
3841
#include "absl/strings/string_view.h"
3942
#include "absl/synchronization/mutex.h"
4043
#include "absl/types/optional.h"
4144
#include "test/util/file_descriptor.h"
4245
#include "test/util/fs_util.h"
46+
#include "test/util/logging.h"
47+
#include "test/util/memory_util.h"
4348
#include "test/util/multiprocess_util.h"
4449
#include "test/util/posix_error.h"
50+
#include "test/util/save_util.h"
4551
#include "test/util/temp_path.h"
4652
#include "test/util/test_util.h"
4753
#include "test/util/thread_util.h"
@@ -63,6 +69,8 @@ constexpr char kPriorityWorkload[] = "test/syscalls/linux/priority_execve";
6369
constexpr char kExit42[] = "--exec_exit_42";
6470
constexpr char kExecWithThread[] = "--exec_exec_with_thread";
6571
constexpr char kExecFromThread[] = "--exec_exec_from_thread";
72+
constexpr char kExecInParent[] = "--exec_exec_in_parent";
73+
constexpr char kWriteAndWaitForPid[] = "--exec_write_and_wait_for_pid";
6674

6775
// Runs file specified by dirfd and pathname with argv and checks that the exit
6876
// status is expect_status and that stderr contains expect_stderr.
@@ -851,6 +859,101 @@ TEST(ExecTest, Setpgid) {
851859
EXPECT_THAT(setpgid(getpid(), pid), SyscallSucceeds());
852860
}
853861

862+
TEST(ExecTest, ReadProcMemAfterExecFromChild) {
863+
// Fork and exec in the parent process.
864+
CheckExec("/proc/self/exe", {"/proc/self/exe", kExecInParent}, {},
865+
W_EXITCODE(42, 0), "");
866+
}
867+
868+
/*
869+
This function, along with writeAndWaitForPid, sets up the test case to verify
870+
that a /proc/self/mem file descriptor is not leaked across an execve similar to
871+
b/382136040.
872+
873+
The setup is as follows:
874+
875+
This function opens /proc/self/mem in a process we'll call P1.
876+
* P1 forks, creating a child process P2.
877+
* P1 (the parent) then execves to become a new process, P3, which runs
878+
writeAndWaitForPid.
879+
* P2 (the child) holds the memfd from P1. It waits for a
880+
signal from P3.
881+
* P3 maps some memory with a secret value and then signals P2 via
882+
a pipe.
883+
* P2, upon receiving the signal, tries to read from the secret's memory
884+
location using the memfd it inherited. The test asserts that this pread in P2
885+
fails, because the memfd should be tied to the (now defunct) address space of
886+
P1, not the new address space of P3.
887+
*/
888+
void execInParent() {
889+
auto mem_fd = ASSERT_NO_ERRNO_AND_VALUE(Open("/proc/self/mem", O_RDONLY));
890+
int pipe_fd[2] = {};
891+
TEST_PCHECK(pipe(pipe_fd) == 0);
892+
893+
const auto readSecret = [&] {
894+
// Close writing end of the pipe.
895+
close(pipe_fd[1]);
896+
// Await parent OK to read the secret.
897+
uintptr_t addr;
898+
TEST_PCHECK_MSG(ReadFd(pipe_fd[0], &addr, sizeof(addr)) == sizeof(addr),
899+
"Failed to read mmap address from the pipe.");
900+
// Close the reading end of the pipe.
901+
TEST_PCHECK(close(pipe_fd[0]) == 0);
902+
// Parent sent the signal. Read the secret.
903+
// Use /proc/mem fd from parent to try to read the secret.
904+
char secret[] = "secret";
905+
char output[sizeof(secret)];
906+
TEST_PCHECK_MSG(
907+
pread(mem_fd.get(), output, sizeof(output), addr) != sizeof(secret),
908+
"pread succeeded. It should have failed.");
909+
};
910+
pid_t pid = fork();
911+
// In child process.
912+
if (pid == 0) {
913+
readSecret();
914+
TEST_CHECK_MSG(!::testing::Test::HasFailure(),
915+
"EXPECT*/ASSERT* failed. These are not async-signal-safe "
916+
"and must not be called from fn.");
917+
_exit(0);
918+
}
919+
// In parent process.
920+
MaybeSave();
921+
TEST_PCHECK(close(pipe_fd[0]) == 0);
922+
TEST_CHECK(pid != -1);
923+
924+
// Execve with args{function name, child_pid, pipe_fd[1]}
925+
const ExecveArray argv = {"/proc/self/exe", kWriteAndWaitForPid,
926+
absl::StrCat(pid) /*child_pid*/,
927+
absl::StrCat(pipe_fd[1])};
928+
const ExecveArray envv;
929+
execve("/proc/self/exe", argv.get(), envv.get());
930+
}
931+
/*
932+
This function is the new program image after execve in execInParent. It maps a
933+
"secret" value at a known address, signals the child of the original process to
934+
proceed, and then waits for it to exit.
935+
*/
936+
void writeAndWaitForPid(int child_pid, int pipe_fd) {
937+
// mmap the same address that the child process will try to read.
938+
const Mapping m = ASSERT_NO_ERRNO_AND_VALUE(
939+
MmapAnon(kPageSize, PROT_READ | PROT_WRITE, MAP_PRIVATE));
940+
const char secret[] = "secret";
941+
absl::SNPrintF((char*)m.addr(), sizeof(secret), "%s", secret);
942+
943+
// Tell the child process to read the secret location.
944+
uintptr_t addr = m.addr();
945+
TEST_PCHECK_MSG(WriteFd(pipe_fd, &addr, sizeof(addr)) == sizeof(addr),
946+
"Failed to write mmap address to the pipe.");
947+
TEST_PCHECK(close(pipe_fd) == 0);
948+
949+
// Wait for child process to read before the exit.
950+
int status;
951+
TEST_PCHECK_MSG(waitpid(child_pid, &status, 0) == child_pid,
952+
"waitpid failed.");
953+
TEST_CHECK(WIFEXITED(status) && WEXITSTATUS(status) == 0);
954+
exit(42);
955+
}
956+
854957
void ExecWithThread() {
855958
// Used to ensure that the thread has actually started.
856959
absl::Mutex mu;
@@ -948,6 +1051,22 @@ int main(int argc, char** argv) {
9481051
gvisor::testing::ExecFromThread();
9491052
return 1;
9501053
}
1054+
if (arg == gvisor::testing::kExecInParent) {
1055+
gvisor::testing::execInParent();
1056+
return 1;
1057+
}
1058+
if (arg == gvisor::testing::kWriteAndWaitForPid) {
1059+
int pid;
1060+
if (!absl::SimpleAtoi(argv[i + 1], &pid)) {
1061+
return 1;
1062+
}
1063+
int fd;
1064+
if (!absl::SimpleAtoi(argv[i + 2], &fd)) {
1065+
return 1;
1066+
}
1067+
gvisor::testing::writeAndWaitForPid(pid, fd);
1068+
return 1;
1069+
}
9511070
}
9521071

9531072
gvisor::testing::TestInit(&argc, &argv);

0 commit comments

Comments
 (0)