Skip to content

Commit 5c1c898

Browse files
committed
Copy just used pages from huge ANONYMOUS mappings.
ASan creates a "shadow" of the used memory. This is done in a mapping of around 20 TB address space, but most of it is not yet used. This patch helps an ASan-enabled rr build in following tests: nested_detach nested_detach_kill nested_detach_kill_stuck nested_detach_wait nested_release Avoids error message: ERROR: AddressSanitizer: requested allocation size 0x20000000000 (0x20000001000 after adjustments for alignment, red zones etc.) exceeds maximum supported size of 0x10000000000 (thread T0) Changes in V5: - Add more suggested improvements of readability. - Avoid possible underflow issue.
1 parent a28cc29 commit 5c1c898

File tree

3 files changed

+145
-0
lines changed

3 files changed

+145
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -934,6 +934,7 @@ set(BASIC_TESTS
934934
daemon
935935
desched_blocking_poll
936936
desched_sigkill
937+
detach_huge_mmap
937938
detach_state
938939
detach_threads
939940
detach_sigkill

src/Task.cc

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3696,6 +3696,66 @@ static void copy_mem_mapping(Task* from, Task* to, const KernelMapping& km) {
36963696
}
36973697
}
36983698

3699+
// https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/fs/proc/task_mmu.c?h=v6.3#n1352
3700+
#define PM_PRESENT (1ULL << 63)
3701+
#define PM_SWAP (1ULL << 62)
3702+
3703+
static bool copy_mem_mapping_just_used(Task* from, Task* to, const KernelMapping& km)
3704+
{
3705+
ScopedFd& fd = from->pagemap_fd();
3706+
if (!fd.is_open()) {
3707+
LOG(debug) << "Failed to open " << from->proc_pagemap_path();
3708+
return false;
3709+
}
3710+
3711+
size_t pagesize = page_size();
3712+
uint64_t pages_present = 0; // Just for logging
3713+
3714+
const int max_buf_size = 65536;
3715+
vector<uint64_t> buf;
3716+
3717+
for (uintptr_t page_offset = 0; page_offset < km.size() / pagesize; page_offset += max_buf_size) {
3718+
auto page_read_offset = (km.start().as_int() / pagesize + page_offset);
3719+
size_t page_read_count = min<size_t>(max_buf_size, km.size() / pagesize - page_offset);
3720+
buf.resize(page_read_count);
3721+
size_t bytes_read = pread(fd, buf.data(), page_read_count * sizeof(uint64_t), page_read_offset * sizeof(uint64_t));
3722+
ASSERT(from, bytes_read == page_read_count * sizeof(uint64_t));
3723+
3724+
// A chunk was read from pagemap above, now iterate through it to detect
3725+
// if memory is physically present (bit 63, PM_PRESENT) or in swap (bit 62, PM_SWAP) in Task "from".
3726+
// If yes, just transfer those pages to the new Task "to".
3727+
// Also try to find consecutive pages to copy them in one operation.
3728+
// The file /proc/PID/pagemap consists of 64-bit values, each describing
3729+
// the state of one page. See https://www.kernel.org/doc/Documentation/vm/pagemap.txt
3730+
3731+
for (size_t page = 0; page < page_read_count; ++page) {
3732+
if (buf[page] & (PM_PRESENT | PM_SWAP)) {
3733+
auto start = km.start() + (page_offset + page) * pagesize;
3734+
if (start >= km.end()) {
3735+
break;
3736+
}
3737+
++pages_present;
3738+
3739+
// Check for consecutive used pages
3740+
while (page + 1 < page_read_count &&
3741+
buf[page + 1] & (PM_PRESENT | PM_SWAP))
3742+
{
3743+
++page;
3744+
++pages_present;
3745+
}
3746+
3747+
auto end = km.start() + (page_offset + page + 1) * pagesize;
3748+
LOG(debug) << km << " copying start: 0x" << hex << start << " end: 0x" << end
3749+
<< dec << " pages: " << (end - start) / pagesize;
3750+
auto pages = km.subrange(start, end);
3751+
copy_mem_mapping(from, to, pages);
3752+
}
3753+
}
3754+
}
3755+
LOG(debug) << km << " pages_present: " << pages_present << " pages_total: " << km.size() / pagesize;
3756+
return true;
3757+
}
3758+
36993759
static void move_vdso_mapping(AutoRemoteSyscalls &remote, const KernelMapping &km) {
37003760
for (const auto& m : remote.task()->vm()->maps()) {
37013761
if (m.map.is_vdso() && m.map.start() != km.start()) {
@@ -3783,6 +3843,16 @@ void Task::dup_from(Task *other) {
37833843
create_mapping(this, remote_this, km);
37843844
LOG(debug) << "Copying mapping into " << tid;
37853845
if (!(km.flags() & MAP_SHARED)) {
3846+
// Make the effort just for bigger mappings, copy smaller as a whole.
3847+
if ((km.flags() & MAP_ANONYMOUS) &&
3848+
km.size() >= 0x400000/*4MB*/)
3849+
{
3850+
LOG(debug) << "Using copy_mem_mapping_just_used";
3851+
if (copy_mem_mapping_just_used(other, this, km)) {
3852+
continue;
3853+
}
3854+
LOG(debug) << "Fallback to copy_mem_mapping";
3855+
}
37863856
copy_mem_mapping(other, this, km);
37873857
}
37883858
}

src/test/detach_huge_mmap.c

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */
2+
3+
#include "util_internal.h"
4+
5+
static const int magic = 0xab;
6+
static uint64_t size = 0x400000; /* 4 MB, at least the value in Task::dup_from */
7+
static size_t page_size;
8+
static void* pages[10];
9+
static unsigned int idx; /*next index of pages*/
10+
11+
void test_alloc(char* mem, unsigned int count, off_t offset) {
12+
13+
test_assert(0 == munmap(mem + size, page_size));
14+
15+
/* one page near the start */
16+
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
17+
pages[idx] = mem + page_size;
18+
memset(pages[idx], magic, page_size);
19+
idx++;
20+
21+
/* one or more pages near or at the end */
22+
for (unsigned int i = 0; i < count; i++) {
23+
test_assert(idx < sizeof(pages)/sizeof(pages[0]));
24+
pages[idx] = mem + offset + i * page_size;
25+
memset(pages[idx], magic, page_size);
26+
idx++;
27+
}
28+
}
29+
30+
int main(void) {
31+
page_size = sysconf(_SC_PAGESIZE);
32+
33+
/* Create one big mapping, then break it up by munmap
34+
* into smaller ones, to better test the handling in
35+
* the end of mappings. */
36+
37+
void* mem1 = mmap(NULL, 4 * (size + page_size), PROT_READ | PROT_WRITE,
38+
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
39+
test_assert(mem1 != MAP_FAILED);
40+
41+
void* mem2 = mem1 + size + page_size;
42+
void* mem3 = mem2 + size + page_size;
43+
void* mem4 = mem3 + size + page_size;
44+
45+
test_alloc(mem1, 1, size - page_size); /* one page used at last page */
46+
test_alloc(mem2, 1, size - page_size * 2); /* one page used before last page */
47+
test_alloc(mem3, 2, size - page_size * 2); /* two consecutive pages at last two pages */
48+
test_alloc(mem4, 2, size - page_size * 3); /* two consecutive pages before last page */
49+
50+
pid_t pid = fork();
51+
if (pid == 0) {
52+
if (running_under_rr()) {
53+
rr_detach_teleport();
54+
}
55+
56+
/* create one page for easier comparison */
57+
char* cmp = malloc(page_size * 3);
58+
test_assert(cmp != NULL);
59+
memset(cmp, magic, page_size * 3);
60+
61+
/* check if the saved pages have the expected value */
62+
for (unsigned int i = 0; i < idx; i++) {
63+
test_assert(memcmp(pages[i], cmp, page_size) == 0);
64+
}
65+
66+
return 0;
67+
}
68+
69+
int status;
70+
wait(&status);
71+
test_assert(WIFEXITED(status) && WEXITSTATUS(status) == 0);
72+
atomic_puts("EXIT-SUCCESS");
73+
return 0;
74+
}

0 commit comments

Comments
 (0)