Skip to content

Commit 2a4b866

Browse files
Vasily Gorbikgregkh
authored andcommitted
mm/gup: fix gup_fast with dynamic page table folding
commit d3f7b1b upstream. Currently to make sure that every page table entry is read just once gup_fast walks perform READ_ONCE and pass pXd value down to the next gup_pXd_range function by value e.g.: static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) ... pudp = pud_offset(&p4d, addr); This function passes a reference on that local value copy to pXd_offset, and might get the very same pointer in return. This happens when the level is folded (on most arches), and that pointer should not be iterated. On s390 due to the fact that each task might have different 5,4 or 3-level address translation and hence different levels folded the logic is more complex and non-iteratable pointer to a local copy leads to severe problems. Here is an example of what happens with gup_fast on s390, for a task with 3-level paging, crossing a 2 GB pud boundary: // addr = 0x1007ffff000, end = 0x10080001000 static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end, unsigned int flags, struct page **pages, int *nr) { unsigned long next; pud_t *pudp; // pud_offset returns &p4d itself (a pointer to a value on stack) pudp = pud_offset(&p4d, addr); do { // on second iteratation reading "random" stack value pud_t pud = READ_ONCE(*pudp); // next = 0x10080000000, due to PUD_SIZE/MASK != PGDIR_SIZE/MASK on s390 next = pud_addr_end(addr, end); ... } while (pudp++, addr = next, addr != end); // pudp++ iterating over stack return 1; } This happens since s390 moved to common gup code with commit d1874a0 ("s390/mm: make the pxd_offset functions more robust") and commit 1a42010 ("s390/mm: convert to the generic get_user_pages_fast code"). s390 tried to mimic static level folding by changing pXd_offset primitives to always calculate top level page table offset in pgd_offset and just return the value passed when pXd_offset has to act as folded. What is crucial for gup_fast and what has been overlooked is that PxD_SIZE/MASK and thus pXd_addr_end should also change correspondingly. And the latter is not possible with dynamic folding. To fix the issue in addition to pXd values pass original pXdp pointers down to gup_pXd_range functions. And introduce pXd_offset_lockless helpers, which take an additional pXd entry value parameter. This has already been discussed in https://lkml.kernel.org/r/20190418100218.0a4afd51@mschwideX1 Fixes: 1a42010 ("s390/mm: convert to the generic get_user_pages_fast code") Signed-off-by: Vasily Gorbik <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Reviewed-by: Gerald Schaefer <[email protected]> Reviewed-by: Alexander Gordeev <[email protected]> Reviewed-by: Jason Gunthorpe <[email protected]> Reviewed-by: Mike Rapoport <[email protected]> Reviewed-by: John Hubbard <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Russell King <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Will Deacon <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Benjamin Herrenschmidt <[email protected]> Cc: Paul Mackerras <[email protected]> Cc: Jeff Dike <[email protected]> Cc: Richard Weinberger <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Arnd Bergmann <[email protected]> Cc: Andrey Ryabinin <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Claudio Imbrenda <[email protected]> Cc: <[email protected]> [5.2+] Link: https://lkml.kernel.org/r/patch.git-943f1e5dcff2.your-ad-here.call-01599856292-ext-8676@work.hours Signed-off-by: Linus Torvalds <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 488b66f commit 2a4b866

File tree

3 files changed

+49
-21
lines changed

3 files changed

+49
-21
lines changed

arch/s390/include/asm/pgtable.h

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1260,26 +1260,44 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
12601260

12611261
#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
12621262

1263-
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
1263+
static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address)
12641264
{
1265-
if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
1266-
return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
1267-
return (p4d_t *) pgd;
1265+
if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
1266+
return (p4d_t *) pgd_deref(pgd) + p4d_index(address);
1267+
return (p4d_t *) pgdp;
12681268
}
1269+
#define p4d_offset_lockless p4d_offset_lockless
12691270

1270-
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
1271+
static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address)
12711272
{
1272-
if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
1273-
return (pud_t *) p4d_deref(*p4d) + pud_index(address);
1274-
return (pud_t *) p4d;
1273+
return p4d_offset_lockless(pgdp, *pgdp, address);
1274+
}
1275+
1276+
static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address)
1277+
{
1278+
if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
1279+
return (pud_t *) p4d_deref(p4d) + pud_index(address);
1280+
return (pud_t *) p4dp;
1281+
}
1282+
#define pud_offset_lockless pud_offset_lockless
1283+
1284+
static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address)
1285+
{
1286+
return pud_offset_lockless(p4dp, *p4dp, address);
12751287
}
12761288
#define pud_offset pud_offset
12771289

1278-
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
1290+
static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address)
1291+
{
1292+
if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
1293+
return (pmd_t *) pud_deref(pud) + pmd_index(address);
1294+
return (pmd_t *) pudp;
1295+
}
1296+
#define pmd_offset_lockless pmd_offset_lockless
1297+
1298+
static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address)
12791299
{
1280-
if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
1281-
return (pmd_t *) pud_deref(*pud) + pmd_index(address);
1282-
return (pmd_t *) pud;
1300+
return pmd_offset_lockless(pudp, *pudp, address);
12831301
}
12841302
#define pmd_offset pmd_offset
12851303

include/linux/pgtable.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1424,6 +1424,16 @@ typedef unsigned int pgtbl_mod_mask;
14241424
#define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED)
14251425
#endif
14261426

1427+
#ifndef p4d_offset_lockless
1428+
#define p4d_offset_lockless(pgdp, pgd, address) p4d_offset(&(pgd), address)
1429+
#endif
1430+
#ifndef pud_offset_lockless
1431+
#define pud_offset_lockless(p4dp, p4d, address) pud_offset(&(p4d), address)
1432+
#endif
1433+
#ifndef pmd_offset_lockless
1434+
#define pmd_offset_lockless(pudp, pud, address) pmd_offset(&(pud), address)
1435+
#endif
1436+
14271437
/*
14281438
* p?d_leaf() - true if this entry is a final mapping to a physical address.
14291439
* This differs from p?d_huge() by the fact that they are always available (if

mm/gup.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2574,13 +2574,13 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
25742574
return 1;
25752575
}
25762576

2577-
static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
2577+
static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
25782578
unsigned int flags, struct page **pages, int *nr)
25792579
{
25802580
unsigned long next;
25812581
pmd_t *pmdp;
25822582

2583-
pmdp = pmd_offset(&pud, addr);
2583+
pmdp = pmd_offset_lockless(pudp, pud, addr);
25842584
do {
25852585
pmd_t pmd = READ_ONCE(*pmdp);
25862586

@@ -2617,13 +2617,13 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
26172617
return 1;
26182618
}
26192619

2620-
static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
2620+
static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
26212621
unsigned int flags, struct page **pages, int *nr)
26222622
{
26232623
unsigned long next;
26242624
pud_t *pudp;
26252625

2626-
pudp = pud_offset(&p4d, addr);
2626+
pudp = pud_offset_lockless(p4dp, p4d, addr);
26272627
do {
26282628
pud_t pud = READ_ONCE(*pudp);
26292629

@@ -2638,20 +2638,20 @@ static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
26382638
if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
26392639
PUD_SHIFT, next, flags, pages, nr))
26402640
return 0;
2641-
} else if (!gup_pmd_range(pud, addr, next, flags, pages, nr))
2641+
} else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
26422642
return 0;
26432643
} while (pudp++, addr = next, addr != end);
26442644

26452645
return 1;
26462646
}
26472647

2648-
static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
2648+
static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
26492649
unsigned int flags, struct page **pages, int *nr)
26502650
{
26512651
unsigned long next;
26522652
p4d_t *p4dp;
26532653

2654-
p4dp = p4d_offset(&pgd, addr);
2654+
p4dp = p4d_offset_lockless(pgdp, pgd, addr);
26552655
do {
26562656
p4d_t p4d = READ_ONCE(*p4dp);
26572657

@@ -2663,7 +2663,7 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
26632663
if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
26642664
P4D_SHIFT, next, flags, pages, nr))
26652665
return 0;
2666-
} else if (!gup_pud_range(p4d, addr, next, flags, pages, nr))
2666+
} else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
26672667
return 0;
26682668
} while (p4dp++, addr = next, addr != end);
26692669

@@ -2691,7 +2691,7 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
26912691
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
26922692
PGDIR_SHIFT, next, flags, pages, nr))
26932693
return;
2694-
} else if (!gup_p4d_range(pgd, addr, next, flags, pages, nr))
2694+
} else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
26952695
return;
26962696
} while (pgdp++, addr = next, addr != end);
26972697
}

0 commit comments

Comments
 (0)