Skip to content

Commit b59071b

Browse files
committed
When working around kernel bugs writing strange mappings via /proc/.../mem, only fix the mappings that actually overlap the area we're going to write.
Trying to fix the whole mapping at once triggers `ENOMEM` on most kernel versions, e.g. 6.15, if the mapping is `PROT_NONE` `MAP_PRIVATE`. Resolves #3976
1 parent f71c3b1 commit b59071b

3 files changed

Lines changed: 49 additions & 6 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,7 @@ set(BASIC_TESTS
11751175
mlock
11761176
mlock_madvise
11771177
mmap_adjacent_to_rr_usage
1178+
mmap_huge
11781179
mmap_private
11791180
mmap_private_grow_under_map
11801181
mmap_recycle

src/Task.cc

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3186,12 +3186,15 @@ void Task::read_bytes_helper(remote_ptr<void> addr, ssize_t buf_size, void* buf,
31863186
}
31873187

31883188
/**
3189-
* This function exists to work around
3189+
* This function was initially created to work around
31903190
* https://bugzilla.kernel.org/show_bug.cgi?id=99101.
31913191
* On some kernels pwrite() to /proc/.../mem fails when writing to a region
3192-
* that's PROT_NONE.
3193-
* Also, writing through MAP_SHARED readonly mappings fails (even if the
3194-
* file was opened read-write originally), so we handle that here too.
3192+
* that's PROT_NONE. Actually this was fixed in kernel 4.8 but we still
3193+
* support 4.7 for now and there's no point in bumping the version
3194+
* requirement just to avoid this, especially because...
3195+
* Writing through MAP_SHARED readonly mappings fails (even if the
3196+
* file was opened read-write originally), so we handle that here too. This
3197+
* seems to be an issue for all kernel versions up to the present day.
31953198
*/
31963199
static ssize_t safe_pwrite64(Task* t, const void* buf, ssize_t buf_size,
31973200
remote_ptr<void> addr) {
@@ -3205,9 +3208,14 @@ static ssize_t safe_pwrite64(Task* t, const void* buf, ssize_t buf_size,
32053208
continue;
32063209
}
32073210
if (!(m.map.prot() & PROT_READ) || (m.map.flags() & MAP_SHARED)) {
3208-
mappings_to_fix.push_back(m.map);
3211+
// Limit the mapping change to the region we're actually going to write to.
3212+
// Kernel 6.15 reports ENOMEM trying mprotect PROT_WRITE colossal
3213+
// MAP_PRIVATE mappings. For some reason MAP_SHARED is OK...
3214+
auto start = floor_page_size(addr);
3215+
auto end = ceil_page_size(addr + buf_size);
3216+
mappings_to_fix.push_back(m.map.subrange(start, end));
32093217
}
3210-
};
3218+
}
32113219

32123220
if (mappings_to_fix.empty()) {
32133221
return pwrite_all_fallible(t->vm()->mem_fd(), buf, buf_size, addr.as_int());

src/test/mmap_huge.c

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
2+
3+
#include "util.h"
4+
5+
int main(void) {
6+
if (sizeof(void*) == 4) {
7+
atomic_puts("Skipping test on 32-bit");
8+
atomic_puts("EXIT-SUCCESS");
9+
return 0;
10+
}
11+
12+
int fd = memfd_create("rr_test", 0);
13+
test_assert(fd >= 0);
14+
uint64_t len = 1LL << 40;
15+
int ret = ftruncate(fd, len);
16+
test_assert(ret == 0);
17+
ret = write(fd, "x", 1);
18+
test_assert(ret == 1);
19+
20+
int flags[] = { PROT_NONE, PROT_READ };
21+
int mode[] = { MAP_PRIVATE, MAP_SHARED };
22+
for (int i = 0; i < 2; ++i) {
23+
for (int j = 0; j < 2; ++j) {
24+
void* p = mmap(NULL, len, flags[i], mode[j], fd, 0);
25+
test_assert(p != MAP_FAILED);
26+
ret = munmap(p, len);
27+
test_assert(ret == 0);
28+
}
29+
}
30+
31+
atomic_puts("EXIT-SUCCESS");
32+
33+
return 0;
34+
}

0 commit comments

Comments
 (0)