Skip to content

Commit 1c9362c

Browse files
committed
Copy just used pages from huge ANONYMOUS|NORESERVE mappings.
ASan creates a "shadow" of the used memory. This is done in a mapping of around 20 TB address space, but most of it is not yet used. This patch helps an ASan-enabled rr build in following tests: nested_detach nested_detach_kill nested_detach_kill_stuck nested_detach_wait nested_release Avoids error message: ERROR: AddressSanitizer: requested allocation size 0x20000000000 (0x20000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0) Changes in V2: - Fallback if pagemap is not available in copy_mem_mapping_just_used. - Allocate memory by std::vector instead of new. - Attempt to improve readability. - Explain better intended behaviour of copy_mem_mapping_just_used. - Added bit 62 (swap) like used in function process_execve. - Add check for return value of pread. - Change test to better exercise the handling of consecutive pages at the end of a mapping.
1 parent 0819303 commit 1c9362c

File tree

3 files changed

+152
-0
lines changed

3 files changed

+152
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,7 @@ set(BASIC_TESTS
934934
daemon
935935
desched_blocking_poll
936936
desched_sigkill
937+
detach_huge_mmap
937938
detach_state
938939
detach_threads
939940
detach_sigkill

src/Task.cc

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3696,6 +3696,73 @@ static void copy_mem_mapping(Task* from, Task* to, const KernelMapping& km) {
36963696
}
36973697
}
36983698

3699+
static bool copy_mem_mapping_just_used(Task* from, Task* to, const KernelMapping& km)
3700+
{
3701+
ScopedFd& fd = from->pagemap_fd();
3702+
if (!fd.is_open()) {
3703+
LOG(debug) << "Failed to open " << from->proc_pagemap_path();
3704+
return false;
3705+
}
3706+
3707+
size_t pagesize = page_size();
3708+
3709+
vector<uint64_t> buf;
3710+
#if defined(__i386__)
3711+
const auto buf_page_count = 64*1024;
3712+
#else
3713+
const auto buf_page_count = 1024*1024;
3714+
#endif
3715+
buf.resize(buf_page_count);
3716+
3717+
uint64_t pages_present = 0; // Just for logging
3718+
3719+
for (uintptr_t offset = 0; offset < km.size(); offset += buf.size() * pagesize) {
3720+
3721+
auto read_offset = ((km.start().as_int() + offset) / pagesize) * sizeof(uint64_t);
3722+
auto read_count = buf.size() * sizeof(uint64_t);
3723+
ssize_t bytes_read = pread(fd, buf.data(), read_count, read_offset);
3724+
if (bytes_read <= 0) {
3725+
FATAL() << "pread of pagemap fd failed, errno=" << errno;
3726+
}
3727+
3728+
// A chunk was read from pagemap above, now iterate through it to detect
3729+
// if memory is physically present (bit 63) or in swap (bit 62) in Task "from".
3730+
// If yes, just transfer those pages to the new Task "to".
3731+
// Also try to find consecutive pages to copy them in one operation.
3732+
// The file /proc/PID/pagemap consists of 64-bit values, each describing
3733+
// the state of one page. See https://www.kernel.org/doc/Documentation/vm/pagemap.txt
3734+
3735+
for (size_t page = 0; page < buf.size() && page < bytes_read / sizeof(uint64_t); ++page) {
3736+
if (buf[page] & ((1ULL << 63) | (1ULL << 62))) {
3737+
auto start = km.start() + offset + page * pagesize;
3738+
if (start >= km.end()) {
3739+
break;
3740+
}
3741+
++pages_present;
3742+
3743+
// Check for consecutive used pages
3744+
while (page < buf.size()-1 &&
3745+
buf[page + 1] & ((1ULL << 63) | (1ULL << 62)))
3746+
{
3747+
if (km.start() + offset + pagesize * (page + 1) >= km.end()) {
3748+
break;
3749+
}
3750+
++page;
3751+
++pages_present;
3752+
}
3753+
3754+
auto end = km.start() + offset + pagesize * (page + 1);
3755+
LOG(debug) << km << " copying start: 0x" << hex << start << " end: 0x" << end
3756+
<< dec << " pages: " << (end - start) / pagesize;
3757+
auto pages = km.subrange(start, end);
3758+
copy_mem_mapping(from, to, pages);
3759+
}
3760+
}
3761+
}
3762+
LOG(debug) << km << " pages_present: " << pages_present << " pages_total: " << km.size() / pagesize;
3763+
return true;
3764+
}
3765+
36993766
static void move_vdso_mapping(AutoRemoteSyscalls &remote, const KernelMapping &km) {
37003767
for (const auto& m : remote.task()->vm()->maps()) {
37013768
if (m.map.is_vdso() && m.map.start() != km.start()) {
@@ -3783,6 +3850,16 @@ void Task::dup_from(Task *other) {
37833850
create_mapping(this, remote_this, km);
37843851
LOG(debug) << "Copying mapping into " << tid;
37853852
if (!(km.flags() & MAP_SHARED)) {
3853+
// Make the effort just for bigger mappings, copy smaller as a whole.
3854+
if (km.flags() & (MAP_ANONYMOUS | MAP_NORESERVE) &&
3855+
km.size() >= 0x10000000/*256MB*/)
3856+
{
3857+
LOG(debug) << "Using copy_mem_mapping_just_used";
3858+
if (copy_mem_mapping_just_used(other, this, km)) {
3859+
continue;
3860+
}
3861+
LOG(debug) << "Fallback to copy_mem_mapping";
3862+
}
37863863
copy_mem_mapping(other, this, km);
37873864
}
37883865
}

src/test/detach_huge_mmap.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
2+
3+
#include "util_internal.h"
4+
5+
static const int magic = 0xab;
6+
static uint64_t size = 0x10000000; /* 256 MB, at least the value in Task::dup_from */
7+
static size_t page_size;
8+
static void* pages[10];
9+
static unsigned int idx; /*next index of pages*/
10+
11+
void test_alloc(char* mem, unsigned int count, off_t offset) {
12+
13+
test_assert(0 == munmap(mem + size, page_size));
14+
15+
/* one page near the start */
16+
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
17+
pages[idx] = mem + page_size;
18+
memset(pages[idx], magic, page_size);
19+
idx++;
20+
21+
/* one or more pages near or at the end */
22+
for (unsigned int i = 0; i < count; i++) {
23+
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
24+
pages[idx] = mem + offset + i * page_size;
25+
memset(pages[idx], magic, page_size);
26+
idx++;
27+
}
28+
}
29+
30+
int main(void) {
31+
page_size = sysconf(_SC_PAGESIZE);
32+
33+
/* Create one big mapping, then break it up by munmap
34+
* into smaller ones, to better test the handling in
35+
* the end of mappings. */
36+
37+
void* mem1 = mmap(NULL, 4 * (size + page_size), PROT_READ | PROT_WRITE,
38+
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
39+
test_assert(mem1 != MAP_FAILED);
40+
41+
void* mem2 = mem1 + size + page_size;
42+
void* mem3 = mem2 + size + page_size;
43+
void* mem4 = mem3 + size + page_size;
44+
45+
test_alloc(mem1, 1, size - page_size); /* one page used at last page */
46+
test_alloc(mem2, 1, size - page_size * 2); /* one page used before last page */
47+
test_alloc(mem3, 2, size - page_size * 2); /* two consecutive pages at last two pages */
48+
test_alloc(mem4, 2, size - page_size * 3); /* two consecutive pages before last page */
49+
50+
pid_t pid = fork();
51+
if (pid == 0) {
52+
if (running_under_rr()) {
53+
rr_detach_teleport();
54+
}
55+
56+
/* create one page for easier comparison */
57+
char* cmp = malloc(page_size * 3);
58+
test_assert(cmp != NULL);
59+
memset(cmp, magic, page_size * 3);
60+
61+
/* check if the saved pages have the expected value */
62+
for (unsigned int i = 0; i < idx; i++) {
63+
test_assert(memcmp(pages[i], cmp, page_size) == 0);
64+
}
65+
66+
return 0;
67+
}
68+
69+
int status;
70+
wait(&status);
71+
test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 0);
72+
atomic_puts("EXIT-SUCCESS");
73+
return 0;
74+
}

0 commit comments

Comments
 (0)