diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S index dc8bcff73cb9c0..3f013dc5bfaaac 100644 --- a/lib/libc/amd64/string/memcmp.S +++ b/lib/libc/amd64/string/memcmp.S @@ -267,7 +267,7 @@ ARCHENTRY(memcmp, baseline) lea -1(%rsi, %rdx, 1), %r9d xor %ecx, %eax xor %r9d, %r8d - test $PAGE_SIZE, %eax # are they on different pages? + test $PAGE_SIZE_4K, %eax # are they on different pages? jz 0f /* fix up rdi */ @@ -278,7 +278,7 @@ ARCHENTRY(memcmp, baseline) movdqa %xmm0, -40(%rsp) # copy to replacement buffer movdqa %xmm1, -24(%rsp) -0: test $PAGE_SIZE, %r8d +0: test $PAGE_SIZE_4K, %r8d jz 0f /* fix up rsi */ diff --git a/lib/libc/amd64/string/strcmp.S b/lib/libc/amd64/string/strcmp.S index eb354bd2af82f6..106aebdd27b70a 100644 --- a/lib/libc/amd64/string/strcmp.S +++ b/lib/libc/amd64/string/strcmp.S @@ -104,7 +104,7 @@ ARCHENTRY(strcmp, baseline) and $0xf, %eax # offset from alignment and $0xf, %edx pxor %xmm1, %xmm1 - test $PAGE_SIZE, %r9d # did the page change? + test $PAGE_SIZE_4K, %r9d # did the page change? jz 0f # if not, take fast path /* heads may cross page boundary, avoid unmapped loads */ diff --git a/lib/libc/amd64/string/strcspn.S b/lib/libc/amd64/string/strcspn.S index 7ebd7a847d6717..2c341504d81623 100644 --- a/lib/libc/amd64/string/strcspn.S +++ b/lib/libc/amd64/string/strcspn.S @@ -199,7 +199,7 @@ ARCHENTRY(__strcspn, x86_64_v2) mov %rdi, %rax # save original string pointer lea 15(%rdi), %esi # last byte of the head xor %edi, %esi - test $PAGE_SIZE, %esi # does the head cross a page? + test $PAGE_SIZE_4K, %esi # does the head cross a page? jz 0f /* head crosses page: copy to stack to fix up */ diff --git a/lib/libc/amd64/string/strncmp.S b/lib/libc/amd64/string/strncmp.S index 932cf078bdfc0f..7246813ce8a123 100644 --- a/lib/libc/amd64/string/strncmp.S +++ b/lib/libc/amd64/string/strncmp.S @@ -135,7 +135,7 @@ ARCHENTRY(strncmp, baseline) cmp $16, %rdx # end of buffer within the first 32 bytes? jb .Llt16 - test $PAGE_SIZE, %r9d # did the page change? + test $PAGE_SIZE_4K, %r9d # did the page change? jz 0f # if not, take fast path @@ -187,7 +187,7 @@ ARCHENTRY(strncmp, baseline) jmp .Lnormal /* buffer ends within the first 16 bytes */ -.Llt16: test $PAGE_SIZE, %r9d # did the page change? +.Llt16: test $PAGE_SIZE_4K, %r9d # did the page change? jz 0f # if not, take fast path /* heads may cross page boundary */ diff --git a/lib/libc/amd64/string/strspn.S b/lib/libc/amd64/string/strspn.S index 565330f0c385d4..9645d0017898c5 100644 --- a/lib/libc/amd64/string/strspn.S +++ b/lib/libc/amd64/string/strspn.S @@ -198,7 +198,7 @@ ARCHENTRY(strspn, x86_64_v2) mov %rdi, %rax # save original string pointer lea 15(%rdi), %esi # last byte of the head xor %edi, %esi - test $PAGE_SIZE, %esi # does the head cross a page? + test $PAGE_SIZE_4K, %esi # does the head cross a page? jz 0f /* head crosses page: copy to stack to fix up */ diff --git a/lib/libkvm/kvm_amd64.h b/lib/libkvm/kvm_amd64.h index bc8b08db5036bd..935033ecc6af44 100644 --- a/lib/libkvm/kvm_amd64.h +++ b/lib/libkvm/kvm_amd64.h @@ -62,9 +62,9 @@ typedef uint64_t amd64_pml4e_t; #ifdef __amd64__ _Static_assert(NPTEPG == AMD64_NPTEPG, "NPTEPG mismatch"); -_Static_assert(PAGE_SHIFT == AMD64_PAGE_SHIFT, "PAGE_SHIFT mismatch"); -_Static_assert(PAGE_SIZE == AMD64_PAGE_SIZE, "PAGE_SIZE mismatch"); -_Static_assert(PAGE_MASK == AMD64_PAGE_MASK, "PAGE_MASK mismatch"); +_Static_assert(PAGE_SHIFT_4K == AMD64_PAGE_SHIFT, "PAGE_SHIFT mismatch"); +_Static_assert(PAGE_SIZE_4K == AMD64_PAGE_SIZE, "PAGE_SIZE mismatch"); +_Static_assert(PAGE_MASK_4K == AMD64_PAGE_MASK, "PAGE_MASK mismatch"); _Static_assert(NPDEPG == AMD64_NPDEPG, "NPDEPG mismatch"); _Static_assert(PDRSHIFT == AMD64_PDRSHIFT, "PDRSHIFT mismatch"); _Static_assert(NBPDR == AMD64_NBPDR, "NBPDR mismatch"); diff --git a/lib/libthr/arch/amd64/include/pthread_md.h b/lib/libthr/arch/amd64/include/pthread_md.h index 02b73d90f006dc..563124aa75abf3 100644 --- a/lib/libthr/arch/amd64/include/pthread_md.h +++ b/lib/libthr/arch/amd64/include/pthread_md.h @@ -40,7 +40,7 @@ #define CPU_SPINWAIT __asm __volatile("pause") /* For use in _Static_assert to check structs will fit in a page */ -#define THR_PAGE_SIZE_MIN PAGE_SIZE +#define THR_PAGE_SIZE_MIN PAGE_SIZE_4K static __inline struct pthread * _get_curthread(void) diff --git a/sbin/mdconfig/tests/mdconfig_test.sh b/sbin/mdconfig/tests/mdconfig_test.sh index ea87ff5d542dd3..cc29c188cbd8cc 100755 --- a/sbin/mdconfig/tests/mdconfig_test.sh +++ b/sbin/mdconfig/tests/mdconfig_test.sh @@ -274,22 +274,23 @@ attach_size_rounddown() attach_size_rounddown_body() { local md - local ss=8192 - local ms=$(($ss + 4096)) - local ms2=$((2 * $ss + 4096)) + local pgsz=$(pagesize) + local ss=$(($pgsz * 2)) + local ms=$(($ss + $pgsz)) + local ms2=$((2 * $ss + $pgsz)) - # Use a sector size that's a likely multiple of PAGE_SIZE, as md(4) + # Use a sector size that's a multiple of the kernel page size, as md(4) # expects that for swap MDs. atf_check -s exit:0 -o save:mdconfig.out -e empty \ -x "mdconfig -a -t swap -S $ss -s ${ms}b" md=$(cat mdconfig.out) - # 12288 bytes should be rounded down to one sector. - check_diskinfo "$md" 8192 1 $ss + # one sector plus one page should be rounded down to one sector. + check_diskinfo "$md" $ss 1 $ss # Resize and verify that the new size was also rounded down. atf_check -s exit:0 -o empty -e empty \ -x "mdconfig -r -u ${md#md} -s ${ms2}b" - check_diskinfo "$md" 16384 2 $ss + check_diskinfo "$md" $((2 * $ss)) 2 $ss } attach_size_rounddown_cleanup() { diff --git a/sbin/swapon/swapon.c b/sbin/swapon/swapon.c index 3dff4df5e63f91..9e49803b504abd 100644 --- a/sbin/swapon/swapon.c +++ b/sbin/swapon/swapon.c @@ -723,8 +723,8 @@ swapon_trim(const char *name) } else errx(1, "%s has an invalid file type", name); /* Trim the device. */ - ioarg[0] = BBSIZE; - ioarg[1] = sz - BBSIZE; + ioarg[0] = roundup(BBSIZE, getpagesize()); + ioarg[1] = sz - ioarg[0]; if (ioctl(fd, DIOCGDELETE, ioarg) != 0) warn("ioctl(DIOCGDELETE)"); diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 99565fbb69cae9..076f52becc2393 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -307,7 +307,7 @@ static void acpi_alloc_wakeup_handler(void **wakeaddr, void *wakept_pages[ACPI_WAKEPT_PAGES]) { - vm_page_t wakept_m[ACPI_WAKEPT_PAGES]; + ptpage_t wakept_m[ACPI_WAKEPT_PAGES]; int i; *wakeaddr = NULL; @@ -330,8 +330,7 @@ acpi_alloc_wakeup_handler(void **wakeaddr, for (i = 0; i < ACPI_WAKEPT_PAGES - (la57 ? 0 : 1); i++) { wakept_m[i] = pmap_page_alloc_below_4g(true); - wakept_pages[i] = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS( - wakept_m[i])); + wakept_pages[i] = pmap_ptpage_va(wakept_m[i]); } if (EVENTHANDLER_REGISTER(power_resume, acpi_stop_beep, NULL, EVENTHANDLER_PRI_LAST) == NULL) { @@ -349,7 +348,7 @@ acpi_alloc_wakeup_handler(void **wakeaddr, free(*wakeaddr, M_DEVBUF); for (i = 0; i < ACPI_WAKEPT_PAGES; i++) { if (wakept_m[i] != NULL) - vm_page_free(wakept_m[i]); + pmap_free_pt_page(NULL, wakept_m[i], false); } *wakeaddr = NULL; } diff --git a/sys/amd64/amd64/efirt_machdep.c b/sys/amd64/amd64/efirt_machdep.c index fe5d60c978dd1b..441cf0186da013 100644 --- a/sys/amd64/amd64/efirt_machdep.c +++ b/sys/amd64/amd64/efirt_machdep.c @@ -62,19 +62,27 @@ 1u << EFI_MD_TYPE_RT_CODE | 1u << EFI_MD_TYPE_RT_DATA | \ 1u << EFI_MD_TYPE_FIRMWARE \ ) +vm_paddr_t pmap_ptpage_pa(ptpage_t ptp); +void *pmap_ptpage_va(ptpage_t ptp); +void *pmap_ptpage_pa_to_va(vm_paddr_t); +ptpage_t pmap_pa_to_ptpage(vm_paddr_t pa); +ptpage_t pmap_va_to_ptpage(void *p); static pml5_entry_t *efi_pml5; static pml4_entry_t *efi_pml4; static vm_object_t obj_1t1_pt; -static vm_page_t efi_pmltop_page; +static ptpage_t efi_pmltop_page; static vm_pindex_t efi_1t1_idx; void efi_destroy_1t1_map(void) { + +#if 0 struct pctrie_iter pages; vm_page_t m; + /* CHUQ free these at some point. */ if (obj_1t1_pt != NULL) { vm_page_iter_init(&pages, obj_1t1_pt); VM_OBJECT_RLOCK(obj_1t1_pt); @@ -84,6 +92,7 @@ efi_destroy_1t1_map(void) VM_OBJECT_RUNLOCK(obj_1t1_pt); vm_object_deallocate(obj_1t1_pt); } +#endif obj_1t1_pt = NULL; efi_pml4 = NULL; @@ -104,12 +113,14 @@ efi_phys_to_kva(vm_paddr_t paddr) return (PHYS_TO_DMAP(paddr)); } -static vm_page_t +static ptpage_t efi_1t1_page(void) { + vm_page_t m; - return (vm_page_grab(obj_1t1_pt, efi_1t1_idx++, VM_ALLOC_NOBUSY | - VM_ALLOC_WIRED | VM_ALLOC_ZERO)); + /* CHUQ don't worry about freeing this for now */ + m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_ZERO); + return (pmap_pa_to_ptpage(VM_PAGE_TO_PHYS(m))); } static pt_entry_t * @@ -120,7 +131,7 @@ efi_1t1_pte(vm_offset_t va) pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; - vm_page_t m; + ptpage_t m; vm_pindex_t pml5_idx, pml4_idx, pdp_idx, pd_idx; vm_paddr_t mphys; @@ -130,12 +141,12 @@ efi_1t1_pte(vm_offset_t va) pml5e = &efi_pml5[pml5_idx]; if (*pml5e == 0) { m = efi_1t1_page(); - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pml5e = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pml5e & PG_FRAME; } - pml4e = (pml4_entry_t *)PHYS_TO_DMAP(mphys); + pml4e = pmap_ptpage_pa_to_va(mphys); pml4e = &pml4e[pml4_idx]; } else { pml4e = &efi_pml4[pml4_idx]; @@ -143,35 +154,35 @@ efi_1t1_pte(vm_offset_t va) if (*pml4e == 0) { m = efi_1t1_page(); - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pml4e = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pml4e & PG_FRAME; } - pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys); + pdpe = pmap_ptpage_pa_to_va(mphys); pdp_idx = pmap_pdpe_index(va); pdpe += pdp_idx; if (*pdpe == 0) { m = efi_1t1_page(); - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pdpe = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pdpe & PG_FRAME; } - pde = (pd_entry_t *)PHYS_TO_DMAP(mphys); + pde = pmap_ptpage_pa_to_va(mphys); pd_idx = pmap_pde_index(va); pde += pd_idx; if (*pde == 0) { m = efi_1t1_page(); - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pde = mphys | X86_PG_RW | X86_PG_V; } else { mphys = *pde & PG_FRAME; } - pte = (pt_entry_t *)PHYS_TO_DMAP(mphys); + pte = pmap_ptpage_pa_to_va(mphys); pte += pmap_pte_index(va); KASSERT(*pte == 0, ("va %#jx *pt %#jx", va, *pte)); @@ -184,11 +195,17 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) struct efi_md *p; pt_entry_t *pte; void *pml; +#if 1 vm_page_t m; +#endif vm_offset_t va; uint64_t idx; int bits, i, mode; bool map_pz = true; +#if 0 && PAGE_SIZE != PAGE_SIZE_4K + vm_paddr_t lastpa = 0; + int lastmode = 0; +#endif obj_1t1_pt = vm_pager_allocate(OBJT_PHYS, NULL, ptoa(1 + NPML4EPG + NPML4EPG * NPDPEPG + NPML4EPG * NPDPEPG * NPDEPG), @@ -197,7 +214,7 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) VM_OBJECT_WLOCK(obj_1t1_pt); efi_pmltop_page = efi_1t1_page(); VM_OBJECT_WUNLOCK(obj_1t1_pt); - pml = (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(efi_pmltop_page)); + pml = pmap_ptpage_va(efi_pmltop_page); if (la57) { efi_pml5 = pml; pmap_pinit_pml5(efi_pmltop_page); @@ -206,12 +223,6 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) pmap_pinit_pml4(efi_pmltop_page); } - if ((efi_map_regs & ~EFI_ALLOWED_TYPES_MASK) != 0) { - printf("Ignoring the following runtime EFI regions: %#x\n", - efi_map_regs & ~EFI_ALLOWED_TYPES_MASK); - efi_map_regs &= EFI_ALLOWED_TYPES_MASK; - } - for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, descsz)) { if ((p->md_attr & EFI_MD_ATTR_RT) == 0 && @@ -257,17 +268,24 @@ efi_create_1t1_map(struct efi_md *map, int ndesc, int descsz) X86_PG_V; VM_OBJECT_WLOCK(obj_1t1_pt); for (va = p->md_phys, idx = 0; idx < p->md_pages; idx++, - va += PAGE_SIZE) { + va += EFI_PAGE_SIZE) { pte = efi_1t1_pte(va); pte_store(pte, va | bits); - m = PHYS_TO_VM_PAGE(va); - if (m != NULL && VM_PAGE_TO_PHYS(m) == 0) { + m = PHYS_TO_VM_PAGE(trunc_page(va)); + if (m == NULL) + continue; + if (VM_PAGE_TO_PHYS(m) == 0) { vm_page_init_page(m, va, -1, VM_FREEPOOL_DEFAULT); m->order = VM_NFREEORDER + 1; /* invalid */ m->pool = VM_NFREEPOOL + 1; /* invalid */ pmap_page_set_memattr_noflush(m, mode); + } else { + KASSERT(m->md.pat_mode == mode, + ("pa 0x%lx idx 0x%lx m %p " + "m->md.pat_mode 0x%x == mode 0x%x", + va, idx, m, m->md.pat_mode, mode)); } } VM_OBJECT_WUNLOCK(obj_1t1_pt); @@ -338,7 +356,7 @@ efi_arch_enter(void) if (pmap_pcid_enabled && !invpcid_works) PCPU_SET(curpmap, NULL); - cr3 = VM_PAGE_TO_PHYS(efi_pmltop_page); + cr3 = pmap_ptpage_pa(efi_pmltop_page); if (pmap_pcid_enabled) cr3 |= pmap_get_pcid(curpmap); load_cr3(cr3); diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index eb1b746f58938d..84d5540c9ca055 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -91,6 +91,7 @@ ASSYM(TDP_CALLCHAIN, TDP_CALLCHAIN); ASSYM(TDP_KTHREAD, TDP_KTHREAD); ASSYM(PAGE_SIZE, PAGE_SIZE); +ASSYM(PAGE_SIZE_PT, PAGE_SIZE_PT); ASSYM(NPTEPG, NPTEPG); ASSYM(NPDEPG, NPDEPG); ASSYM(addr_P4Tmap, addr_P4Tmap); diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 9ff60439d1ec9d..071ec1329ffb3f 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1303,6 +1303,8 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) TSRAW(&thread0, TS_ENTER, __func__, NULL); + physfree = round_page(physfree); + kernphys = amd64_loadaddr(); physfree += kernphys; diff --git a/sys/amd64/amd64/minidump_machdep.c b/sys/amd64/amd64/minidump_machdep.c index 43bf81a991bf8e..b394d712320611 100644 --- a/sys/amd64/amd64/minidump_machdep.c +++ b/sys/amd64/amd64/minidump_machdep.c @@ -53,6 +53,7 @@ #include CTASSERT(sizeof(struct kerneldumpheader) == 512); +CTASSERT(MINIDUMP_PAGE_SIZE == PAGE_SIZE_PT); static struct kerneldumpheader kdh; @@ -92,7 +93,8 @@ blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) if (maxdumpsz == 0) /* seatbelt */ maxdumpsz = PAGE_SIZE; error = 0; - if ((sz % PAGE_SIZE) != 0) { + if ((pa != 0 && (sz % PAGE_SIZE) != 0) || + (ptr != 0 && (sz % MINIDUMP_PAGE_SIZE) != 0)) { printf("size not page aligned\n"); return (EINVAL); } @@ -157,6 +159,7 @@ blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) /* A fake page table page, to avoid having to handle both 4K and 2M pages */ static pd_entry_t fakepd[NPDEPG]; +CTASSERT(sizeof(fakepd) == MINIDUMP_PAGE_SIZE); int cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) @@ -191,7 +194,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) * We always write a page, even if it is zero. Each * page written corresponds to 1GB of space */ - pmapsize += PAGE_SIZE; + pmapsize += PAGE_SIZE_PT; ii = pmap_pml4e_index(va); pml4 = (uint64_t *)PHYS_TO_DMAP(KPML4phys) + ii; pdp = (uint64_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); @@ -207,7 +210,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) if ((pdpe & PG_PS) != 0) { va += NBPDP; pa = pdpe & PG_PS_FRAME; - for (n = 0; n < NPDEPG * NPTEPG; n++) { + for (n = 0; n < NPDEPG * NPTEPG; n += PAGE_SIZE_PTES) { if (vm_phys_is_dumpable(pa)) vm_page_dump_add(state->dump_bitset, pa); @@ -226,7 +229,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) if ((pde & PG_PS) != 0) { /* This is an entire 2M page. */ pa = pde & PG_PS_FRAME; - for (k = 0; k < NPTEPG; k++) { + for (k = 0; k < NPTEPG; k += PAGE_SIZE_PTES) { if (vm_phys_is_dumpable(pa)) vm_page_dump_add( state->dump_bitset, pa); @@ -241,7 +244,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) vm_page_dump_add(state->dump_bitset, pa); /* and for each valid page in this 2MB block */ pt = (uint64_t *)PHYS_TO_DMAP(pde & PG_FRAME); - for (k = 0; k < NPTEPG; k++) { + for (k = 0; k < NPTEPG; k += PAGE_SIZE_PTES) { pte = atomic_load_64(&pt[k]); if ((pte & PG_V) == 0) continue; @@ -256,18 +259,18 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) /* Calculate dump size. */ mbp = state->msgbufp; dumpsize = pmapsize; - dumpsize += round_page(mbp->msg_size); - dumpsize += round_page(sizeof(dump_avail)); - dumpsize += round_page(BITSET_SIZE(vm_page_dump_pages)); + dumpsize += roundup2(mbp->msg_size, MINIDUMP_PAGE_SIZE); + dumpsize += roundup2(sizeof(dump_avail), MINIDUMP_PAGE_SIZE); + dumpsize += roundup2(BITSET_SIZE(vm_page_dump_pages), MINIDUMP_PAGE_SIZE); VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { /* Clear out undumpable pages now if needed */ if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) { - dumpsize += PAGE_SIZE; + dumpsize += MINIDUMP_PAGE_SIZE; } else { vm_page_dump_drop(state->dump_bitset, pa); } } - dumpsize += PAGE_SIZE; + dumpsize += MINIDUMP_PAGE_SIZE; wdog_next = progress = dumpsize; dumpsys_pb_init(dumpsize); @@ -277,12 +280,13 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) strcpy(mdhdr.magic, MINIDUMP_MAGIC); mdhdr.version = MINIDUMP_VERSION; mdhdr.msgbufsize = mbp->msg_size; - mdhdr.bitmapsize = round_page(BITSET_SIZE(vm_page_dump_pages)); + mdhdr.bitmapsize = roundup2(BITSET_SIZE(vm_page_dump_pages), + MINIDUMP_PAGE_SIZE); mdhdr.pmapsize = pmapsize; mdhdr.kernbase = kva_layout.km_low; mdhdr.dmapbase = kva_layout.dmap_low; mdhdr.dmapend = kva_layout.dmap_high; - mdhdr.dumpavailsize = round_page(sizeof(dump_avail)); + mdhdr.dumpavailsize = roundup2(sizeof(dump_avail), MINIDUMP_PAGE_SIZE); dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_AMD64_VERSION, dumpsize); @@ -297,12 +301,13 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) /* Dump my header */ bzero(&fakepd, sizeof(fakepd)); bcopy(&mdhdr, &fakepd, sizeof(mdhdr)); - error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); + error = blk_write(di, (char *)&fakepd, 0, MINIDUMP_PAGE_SIZE); if (error) goto fail; /* Dump msgbuf up front */ - error = blk_write(di, mbp->msg_ptr, 0, round_page(mbp->msg_size)); + error = blk_write(di, mbp->msg_ptr, 0, + roundup2(mbp->msg_size, MINIDUMP_PAGE_SIZE)); if (error) goto fail; @@ -311,13 +316,13 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) "Large dump_avail not handled"); bzero(&fakepd, sizeof(fakepd)); memcpy(fakepd, dump_avail, sizeof(dump_avail)); - error = blk_write(di, (char *)fakepd, 0, PAGE_SIZE); + error = blk_write(di, (char *)fakepd, 0, MINIDUMP_PAGE_SIZE); if (error) goto fail; /* Dump bitmap */ error = blk_write(di, (char *)state->dump_bitset, 0, - round_page(BITSET_SIZE(vm_page_dump_pages))); + roundup2(BITSET_SIZE(vm_page_dump_pages), MINIDUMP_PAGE_SIZE)); if (error) goto fail; @@ -331,7 +336,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) /* We always write a page, even if it is zero */ if ((pdpe & PG_V) == 0) { - error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); + error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE_PT); if (error) goto fail; /* flush, in case we reuse fakepd in the same block */ @@ -347,7 +352,7 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) fakepd[0] = pdpe; for (j = 1; j < NPDEPG; j++) fakepd[j] = fakepd[j - 1] + NBPDR; - error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); + error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE_PT); if (error) goto fail; /* flush, in case we reuse fakepd in the same block */ @@ -361,10 +366,10 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) pa = pdpe & PG_FRAME; if (PHYS_IN_DMAP(pa) && vm_phys_is_dumpable(pa)) { pd = (uint64_t *)PHYS_TO_DMAP(pa); - error = blk_write(di, (char *)pd, 0, PAGE_SIZE); + error = blk_write(di, (char *)pd, 0, PAGE_SIZE_PT); } else { /* Malformed pa, write the zeroed fakepd. */ - error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE); + error = blk_write(di, (char *)&fakepd, 0, PAGE_SIZE_PT); } if (error) goto fail; @@ -375,7 +380,8 @@ cpu_minidumpsys(struct dumperinfo *di, const struct minidumpstate *state) /* Dump memory chunks */ VM_PAGE_DUMP_FOREACH(state->dump_bitset, pa) { - error = blk_write(di, 0, pa, PAGE_SIZE); + error = blk_write(di, (char *)PHYS_TO_DMAP(pa), 0, + MINIDUMP_PAGE_SIZE); if (error) goto fail; } diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 00e99f9df19219..28115294e49586 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -320,7 +320,8 @@ amd64_mp_alloc_pcpu(void) int start_all_aps(void) { - vm_page_t m_boottramp, m_pml4, m_pdp, m_pd[4]; + vm_page_t m_boottramp; + ptpage_t m_pml4, m_pdp, m_pd[4]; pml5_entry_t old_pml45; pml4_entry_t *v_pml4; pdp_entry_t *v_pdp; @@ -342,46 +343,46 @@ start_all_aps(void) /* Create a transient 1:1 mapping of low 4G */ if (la57) { m_pml4 = pmap_page_alloc_below_4g(true); - v_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pml4)); + v_pml4 = pmap_ptpage_va(m_pml4); } else { v_pml4 = &kernel_pmap->pm_pmltop[0]; } m_pdp = pmap_page_alloc_below_4g(true); - v_pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pdp)); + v_pdp = pmap_ptpage_va(m_pdp); m_pd[0] = pmap_page_alloc_below_4g(false); - v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[0])); + v_pd = pmap_ptpage_va(m_pd[0]); for (i = 0; i < NPDEPG; i++) v_pd[i] = (i << PDRSHIFT) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS; m_pd[1] = pmap_page_alloc_below_4g(false); - v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[1])); + v_pd = pmap_ptpage_va(m_pd[1]); for (i = 0; i < NPDEPG; i++) v_pd[i] = (NBPDP + (i << PDRSHIFT)) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS; m_pd[2] = pmap_page_alloc_below_4g(false); - v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[2])); + v_pd = pmap_ptpage_va(m_pd[2]); for (i = 0; i < NPDEPG; i++) v_pd[i] = (2UL * NBPDP + (i << PDRSHIFT)) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS; m_pd[3] = pmap_page_alloc_below_4g(false); - v_pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m_pd[3])); + v_pd = pmap_ptpage_va(m_pd[3]); for (i = 0; i < NPDEPG; i++) v_pd[i] = (3UL * NBPDP + (i << PDRSHIFT)) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS; - v_pdp[0] = VM_PAGE_TO_PHYS(m_pd[0]) | X86_PG_V | + v_pdp[0] = pmap_ptpage_pa(m_pd[0]) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; - v_pdp[1] = VM_PAGE_TO_PHYS(m_pd[1]) | X86_PG_V | + v_pdp[1] = pmap_ptpage_pa(m_pd[1]) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; - v_pdp[2] = VM_PAGE_TO_PHYS(m_pd[2]) | X86_PG_V | + v_pdp[2] = pmap_ptpage_pa(m_pd[2]) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; - v_pdp[3] = VM_PAGE_TO_PHYS(m_pd[3]) | X86_PG_V | + v_pdp[3] = pmap_ptpage_pa(m_pd[3]) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; old_pml45 = kernel_pmap->pm_pmltop[0]; if (la57) { - kernel_pmap->pm_pmltop[0] = VM_PAGE_TO_PHYS(m_pml4) | + kernel_pmap->pm_pmltop[0] = pmap_ptpage_pa(m_pml4) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; } - v_pml4[0] = VM_PAGE_TO_PHYS(m_pdp) | X86_PG_V | + v_pml4[0] = pmap_ptpage_pa(m_pdp) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; pmap_invalidate_all(kernel_pmap); @@ -453,12 +454,12 @@ start_all_aps(void) kernel_pmap->pm_pmltop[0] = old_pml45; invlpg(0); if (la57) - vm_page_free(m_pml4); - vm_page_free(m_pd[3]); - vm_page_free(m_pd[2]); - vm_page_free(m_pd[1]); - vm_page_free(m_pd[0]); - vm_page_free(m_pdp); + pmap_free_pt_page(NULL, m_pml4, false); + pmap_free_pt_page(NULL, m_pd[3], false); + pmap_free_pt_page(NULL, m_pd[2], false); + pmap_free_pt_page(NULL, m_pd[1], false); + pmap_free_pt_page(NULL, m_pd[0], false); + pmap_free_pt_page(NULL, m_pdp, false); vm_page_free(m_boottramp); /* number of APs actually started */ @@ -702,7 +703,8 @@ void smp_masked_invlpg(vm_offset_t addr, pmap_t pmap, smp_invl_cb_t curcpu_cb) { if (invlpgb_works && pmap == kernel_pmap) { - invlpgb(INVLPGB_GLOB | INVLPGB_VA | trunc_page(addr), 0, 0); + KASSERT((addr & PAGE_MASK_PT) == 0, ("unaligned va 0x%lx", addr)); + invlpgb(INVLPGB_GLOB | INVLPGB_VA | addr, 0, 0); tlbsync(); sched_unpin(); return; @@ -724,17 +726,17 @@ smp_masked_invlpg_range(vm_offset_t addr1, vm_offset_t addr2, pmap_t pmap, addr1 = trunc_page(addr1); addr2 = round_page(addr2); - total = atop(addr2 - addr1); + total = atop_pt(addr2 - addr1); for (va = addr1; total > 0;) { if ((va & PDRMASK) != 0 || total < NPDEPG) { - cnt = atop(NBPDR - (va & PDRMASK)); + cnt = atop_pt(NBPDR - (va & PDRMASK)); if (cnt > total) cnt = total; if (cnt > invlpgb_maxcnt + 1) cnt = invlpgb_maxcnt + 1; invlpgb(INVLPGB_GLOB | INVLPGB_VA | va, 0, cnt - 1); - va += ptoa(cnt); + va += ptoa_pt(cnt); total -= cnt; } else { cnt = total / NPTEPG; @@ -755,7 +757,7 @@ smp_masked_invlpg_range(vm_offset_t addr1, vm_offset_t addr2, pmap_t pmap, invl_op_pgrng); #ifdef COUNT_XINVLTLB_HITS ipi_range++; - ipi_range_size += (addr2 - addr1) / PAGE_SIZE; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE_PT; #endif } @@ -938,7 +940,7 @@ invlrng_handler(vm_offset_t smp_tlb_addr1, vm_offset_t smp_tlb_addr2) addr = smp_tlb_addr1; do { invlpg(addr); - addr += PAGE_SIZE; + addr += PAGE_SIZE_PT; } while (addr < smp_tlb_addr2); } @@ -964,7 +966,7 @@ invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, } else { do { invlpg(addr); - addr += PAGE_SIZE; + addr += PAGE_SIZE_PT; } while (addr < smp_tlb_addr2); } if (smp_tlb_pmap == PCPU_GET(curpmap) && @@ -975,7 +977,7 @@ invlrng_invpcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, d.addr = smp_tlb_addr1; do { invpcid(&d, INVPCID_ADDR); - d.addr += PAGE_SIZE; + d.addr += PAGE_SIZE_PT; } while (d.addr < smp_tlb_addr2); } } @@ -998,7 +1000,7 @@ invlrng_pcid_handler(pmap_t smp_tlb_pmap, vm_offset_t smp_tlb_addr1, addr = smp_tlb_addr1; do { invlpg(addr); - addr += PAGE_SIZE; + addr += PAGE_SIZE_PT; } while (addr < smp_tlb_addr2); if (smp_tlb_pmap == PCPU_GET(curpmap) && (ucr3 = smp_tlb_pmap->pm_ucr3) != PMAP_NO_CR3 && diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 243a6625becefd..5b599f224da707 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -152,6 +152,7 @@ #include #include #include +#include #include #include @@ -173,6 +174,307 @@ #define PMAP_MEMDOM 1 #endif +/* + * ptpage_t abstraction for page table pages of size PAGE_SIZE_PT, + * vs. vm_page_t which is for data pages of size PAGE_SIZE. + */ +vm_paddr_t pmap_ptpage_pa(ptpage_t ptp); +void *pmap_ptpage_va(ptpage_t ptp); +void *pmap_ptpage_pa_to_va(vm_paddr_t); +ptpage_t pmap_pa_to_ptpage(vm_paddr_t pa); +ptpage_t pmap_va_to_ptpage(void *p); + +vm_pindex_t pmap_ptpage_pindex(ptpage_t ptp); +void pmap_ptpage_pindex_set(ptpage_t ptp, vm_pindex_t pindex); + +u_int pmap_ptpage_refs(ptpage_t ptp); +u_int pmap_ptpage_ref_add(ptpage_t ptp, int delta); +void pmap_ptpage_ref_set(ptpage_t ptp, u_int cnt); + +void pmap_ptpage_valid_set(ptpage_t ptp, vm_page_bits_t val); +vm_page_bits_t pmap_ptpage_valid_get(ptpage_t ptp); + +void pmap_ptpage_slist_init(struct ptpglist *ptpgl); +void pmap_ptpage_slist_add(pmap_t pmap, struct ptpglist *ptpgl, ptpage_t ptp); +void pmap_ptpage_slist_free(struct ptpglist *ptgl, bool update_wire_count); + +ptpage_t pmap_pt_page_array; + +#define TID (curthread->td_tid) + +#if 0 +/* CHUQ */ +#undef PMAP_LOCK +#define PMAP_LOCK(pmap) pmap_lock(pmap) +void pmap_lock(pmap_t); + +#undef PMAP_UNLOCK +#define PMAP_UNLOCK(pmap) pmap_unlock(pmap) +void pmap_unlock(pmap_t); + +__noinline void +pmap_lock(pmap_t pmap) +{ + + mtx_lock(&(pmap)->pm_mtx); + chuq_do_pp(); +} + +__noinline void +pmap_unlock(pmap_t pmap) +{ + chuq_do_pp(); + mtx_unlock(&(pmap)->pm_mtx); +} +#endif + +#if PAGE_SIZE == PAGE_SIZE_4K + +static void ptpage_radix_init(struct ptpage_radix *rtree); +static int ptpage_radix_insert(struct ptpage_radix *rtree, ptpage_t ptp); +static ptpage_t ptpage_radix_lookup(struct ptpage_radix *rtree, vm_pindex_t index); +static ptpage_t ptpage_radix_remove(struct ptpage_radix *rtree, vm_pindex_t index); +static bool ptpage_radix_is_empty(struct ptpage_radix *rtree); + +static void +ptpage_radix_init(struct ptpage_radix *rtree) +{ + + vm_radix_init((struct vm_radix *)rtree); +} + +static int +ptpage_radix_insert(struct ptpage_radix *rtree, ptpage_t ptp) +{ + + return (vm_radix_insert((struct vm_radix *)rtree, (vm_page_t)ptp)); +} + +static ptpage_t +ptpage_radix_lookup(struct ptpage_radix *rtree, vm_pindex_t index) +{ + + return ((ptpage_t)vm_radix_lookup((struct vm_radix *)rtree, index)); +} + +static ptpage_t +ptpage_radix_remove(struct ptpage_radix *rtree, vm_pindex_t index) +{ + + return ((ptpage_t)vm_radix_remove((struct vm_radix *)rtree, index)); +} + +static bool +ptpage_radix_is_empty(struct ptpage_radix *rtree) +{ + + return (vm_radix_is_empty((struct vm_radix *)rtree)); +} + +static __inline pt_entry_t +pte_load_datapg(pt_entry_t *ptep) +{ + + return (*ptep); +} + +static __inline void +pte_store_datapg(pt_entry_t *ptep, pt_entry_t pte) +{ + + pte_store(ptep, pte); +} + +static __inline pt_entry_t +pte_load_clear_datapg(pt_entry_t *ptep) +{ + + return (pte_load_clear(ptep)); +} + +static __inline pt_entry_t +pte_load_store_datapg(pt_entry_t *ptep, pt_entry_t newpte) +{ + + return (pte_load_store(ptep, newpte)); +} + +static __inline void +atomic_clear_long_datapg(pt_entry_t *ptep, uint64_t val) +{ + + atomic_clear_long(ptep, val); +} + +void +pmap_pt_page_array_mark(void) +{ +} + +ptpage_t +pmap_page_alloc_below_4g(bool zeroed) +{ + vm_page_t m; + + m = vm_page_alloc_noobj_contig(VM_ALLOC_WIRED | (zeroed ? VM_ALLOC_ZERO : 0), + 1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); + return ((ptpage_t)m); +} + +#define invlpg_datapg(va) invlpg(va) +#define pmap_invlpg_datapg(pmap, va) pmap_invlpg(pmap, va) + +#else + +struct ptpage { + vm_pindex_t pindex; + uint32_t refs; + uint16_t free; + uint8_t valid; /* CHUQ only need 3 values */ + uint8_t spare; + SLIST_ENTRY(ptpage) ss; + pmap_t pmap; /* CHUQ due to pm_ptpfree */ + TAILQ_ENTRY(ptpage) pt_list; /* CHUQ hack for now */ +}; + +vm_offset_t pmap_pt_page_array_size; + +#include "pmap_radix.c" + +void +pmap_pt_page_array_mark(void) +{ + + (void)vm_map_insert(kernel_map, NULL, 0, (vm_offset_t)pmap_pt_page_array, + (vm_offset_t)pmap_pt_page_array + round_2mpage(pmap_pt_page_array_size * + sizeof(struct ptpage)), + VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); +} + +static __inline pt_entry_t +pte_load_datapg(pt_entry_t *ptep) +{ + pt_entry_t pte; + __unused pt_entry_t apte, cpte; + int i; + + pte = pte_load(ptep); + apte = pte & ~(X86_PG_M | X86_PG_A); + KASSERT((pte & (PAGE_MASK & ~PAGE_MASK_PT)) == 0, + ("ptep %p not aligned", ptep)); + for (i = 1; i < PAGE_SIZE_PTES; i++) { + cpte = pte_load(ptep + i); + pte |= cpte & (X86_PG_A | X86_PG_M); + cpte &= ~(X86_PG_A | X86_PG_M); + KASSERT(cpte == (apte | (i << PAGE_SHIFT_PT)) || + apte == 0 && cpte == 0, + ("pte_load_datapg: mismatch ptep %p i %d apte 0x%lx cpte 0x%lx", + ptep, i, apte, cpte)); + } + return (pte); +} + +/* + * Store all PTEs for a data page. + * Verify that the pte address and pte value are aligned correctly. + */ +static __inline void +pte_store_datapg(pt_entry_t *ptep, pt_entry_t pte) +{ + __unused pt_entry_t mask; + int i; + + mask = PAGE_MASK >> (PAGE_SHIFT_PT - 3); + KASSERT(((uint64_t)ptep & mask) == 0, + ("unaligned pte_store_datapg ptep %p mask 0x%lx", ptep, mask)); + mask = (PAGE_MASK & ~PAGE_MASK_PT); + KASSERT(((uint64_t)pte & mask) == 0, + ("unaligned pte_store_datapg pte %p mask 0x%lx", (void *)pte, mask)); + for (i = 0; i < PAGE_SIZE_PTES; i++) { + ptep[i] = pte ? (pte | (i << PAGE_SHIFT_PT)) : 0; + } +} + +static __inline pt_entry_t +pte_load_clear_datapg(pt_entry_t *ptep) +{ + pt_entry_t pte, cpte; + __unused pt_entry_t mask, apte; + int i; + + mask = PAGE_MASK >> (PAGE_SHIFT_PT - 3); + KASSERT(((uint64_t)ptep & mask) == 0, + ("unaligned pte_load_clear_datapg ptep %p mask 0x%lx", ptep, mask)); + pte = pte_load_clear(ptep); + apte = pte & ~(X86_PG_A | X86_PG_M); + mask = (PAGE_MASK & ~PAGE_MASK_PT); + KASSERT((apte & mask) == 0, + ("unaligned pte_load_clear_datapg pte 0x%lx mask 0x%lx", pte, mask)); + for (i = 1; i < PAGE_SIZE_PTES; i++) { + cpte = pte_load_clear(ptep + i); + pte |= cpte & (X86_PG_A | X86_PG_M); + cpte &= ~(X86_PG_A | X86_PG_M); + KASSERT(cpte == (apte | (i << PAGE_SHIFT_PT)), + ("pte_load_clear_datapg mismatch ptep %p i %d apte 0x%lx cpte 0x%lx", + ptep, i, apte, cpte)); + } + return (pte); +} + +static __inline pt_entry_t +pte_load_store_datapg(pt_entry_t *ptep, pt_entry_t newpte) +{ + pt_entry_t oldpte, cpte; + __unused pt_entry_t mask, apte; + int i; + + mask = PAGE_MASK >> (PAGE_SHIFT_PT - 3); + KASSERT(((uint64_t)ptep & mask) == 0, + ("unaligned pte_load_store_datapg ptep %p mask 0x%lx", ptep, mask)); + mask = (PAGE_MASK & ~PAGE_MASK_PT); + KASSERT((newpte & mask) == 0, + ("unaligned pte_load_store_datapg newpte 0x%lx", newpte)); + oldpte = pte_load_store(ptep, newpte); + apte = oldpte & ~(X86_PG_A | X86_PG_M); + KASSERT((apte & mask) == 0, + ("unaligned pte_load_store_datapg oldpte 0x%lx", oldpte)); + for (i = 1; i < PAGE_SIZE_PTES; i++) { + cpte = pte_load_store(ptep + i, newpte | (i << PAGE_SHIFT_PT)); + oldpte |= cpte & (X86_PG_A | X86_PG_M); + cpte &= ~(X86_PG_A | X86_PG_M); + KASSERT(cpte == (apte | (i << PAGE_SHIFT_PT)), + ("pte_load_store_datapg mismatch ptep %p i %d apte 0x%lx cpte 0x%lx", + ptep, i, apte, cpte)); + } + return (oldpte); +} + +static __inline void +atomic_clear_long_datapg(pt_entry_t *ptep, uint64_t val) +{ + + for (int i = 0; i < PAGE_SIZE_PTES; i++) + atomic_clear_long(ptep + i, val); +} + +static __inline void +invlpg_datapg(vm_offset_t va) +{ + + for (int i = 0; i < PAGE_SIZE_PTES; i++) + invlpg(va + (i << PAGE_SHIFT_PT)); +} + +static __inline void +pmap_invlpg_datapg(pmap_t pmap, vm_offset_t va) +{ + + for (int i = 0; i < PAGE_SIZE_PTES; i++) + pmap_invlpg(pmap, va + (i << PAGE_SHIFT_PT)); +} + +#endif + static __inline bool pmap_type_guest(pmap_t pmap) { @@ -420,7 +722,7 @@ pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "VM/pmap parameters"); -static int __read_frequently pg_ps_enabled = 1; +static int __read_frequently pg_ps_enabled = (PAGE_SIZE == PAGE_SIZE_4K); SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); @@ -619,7 +921,9 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, "Page Table Isolation enabled"); static vm_object_t pti_obj; static pml4_entry_t *pti_pml4; +#if PAGE_SIZE == PAGE_SIZE_4K static vm_pindex_t pti_pg_idx; +#endif static bool pti_finalized; static int pmap_growkernel_panic = 0; @@ -1339,24 +1643,24 @@ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); -static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte); +static void pmap_abort_ptp(pmap_t pmap, vm_offset_t va, ptpage_t mpte); static int pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, int mode, int flags); static bool pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static bool pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static bool pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, - vm_offset_t va, struct rwlock **lockp, vm_page_t mpte); + vm_offset_t va, struct rwlock **lockp, ptpage_t mpte); static bool pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, - vm_offset_t va, vm_page_t m); + vm_offset_t va, ptpage_t m); static int pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m, struct rwlock **lockp); -static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, - vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); +static ptpage_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, + vm_page_t m, vm_prot_t prot, ptpage_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); -static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +static int pmap_insert_pt_page(pmap_t pmap, ptpage_t mpte, bool promoted, bool allpte_PG_A_set); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva); @@ -1365,11 +1669,11 @@ static void pmap_invalidate_cache_range_all(vm_offset_t sva, static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); -static vm_page_t pmap_large_map_getptp_unlocked(void); +static ptpage_t pmap_large_map_getptp_unlocked(void); static vm_paddr_t pmap_large_map_kextract(vm_offset_t va); #if VM_NRESERVLEVEL > 0 static bool pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - vm_page_t mpte, struct rwlock **lockp); + ptpage_t mpte, struct rwlock **lockp); #endif static bool pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); @@ -1380,14 +1684,14 @@ static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, - bool demote_kpde, struct spglist *free, struct rwlock **lockp); + bool demote_kpde, struct ptpglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, - pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); -static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); + pd_entry_t ptepde, struct ptpglist *free, struct rwlock **lockp); +static ptpage_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, - struct spglist *free); + struct ptpglist *free); static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, - pd_entry_t *pde, struct spglist *free, + pd_entry_t *pde, struct ptpglist *free, struct rwlock **lockp); static bool pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); @@ -1395,21 +1699,21 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde); -static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, +static pd_entry_t *pmap_alloc_pde(pmap_t pmap, vm_offset_t va, ptpage_t *pdpgp, struct rwlock **lockp); -static vm_page_t pmap_allocpte_alloc(pmap_t pmap, vm_pindex_t ptepindex, +static ptpage_t pmap_allocpte_alloc(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, vm_offset_t va); -static vm_page_t pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, +static ptpage_t pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, vm_offset_t va); -static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, +static ptpage_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp); -static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, - struct spglist *free); -static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); +static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, ptpage_t m, + struct ptpglist *free); +static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct ptpglist *); -static vm_page_t pmap_alloc_pt_page(pmap_t, vm_pindex_t, int); -static void pmap_free_pt_page(pmap_t, vm_page_t, bool); +static ptpage_t pmap_alloc_pt_page(pmap_t, vm_pindex_t, int); +void pmap_free_pt_page(pmap_t, ptpage_t, bool); /********************/ /* Inline functions */ @@ -1602,8 +1906,20 @@ pmap_pt_page_count_adj(pmap_t pmap, int count) if (pmap == kernel_pmap) counter_u64_add(kernel_pt_page_count, count); else { +#if 0 + /* + * XXX CHUQ do not include pt pages in resident count for now. + * I changed the code to always free pt pages via + * pmap_free_pt_page() and never by calling vm_page_free() + * directly, but that doesn't work due to locking + * which is why the previous code called this function + * separately and freeing pt pages via vm_page_free_pages_toq(). + * sort all this out later and just don't count pt pages + * toward resident_count for now. + */ if (pmap != NULL) pmap_resident_count_adj(pmap, count); +#endif counter_u64_add(user_pt_page_count, count); } } @@ -1617,7 +1933,7 @@ vtopte(vm_offset_t va) { KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va)); - return ((pt_entry_t *)(PTmap + ((va >> (PAGE_SHIFT - 3)) & vtoptem))); + return ((pt_entry_t *)(PTmap + ((va >> (PAGE_SHIFT_PT - 3)) & vtoptem))); } pd_entry_t vtopdem __read_mostly = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + @@ -1638,8 +1954,8 @@ allocpages(vm_paddr_t *firstaddr, int n) u_int64_t ret; ret = *firstaddr; - bzero((void *)ret, n * PAGE_SIZE); - *firstaddr += n * PAGE_SIZE; + bzero((void *)ret, n * PAGE_SIZE_PT); + *firstaddr += round_page(n * PAGE_SIZE_PT); return (ret); } @@ -1881,8 +2197,9 @@ create_pagetables(vm_paddr_t *firstaddr) * the PTmap. */ pd_p = (pd_entry_t *)KPDphys; - for (i = 0; i < nkpt; i++) - pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; + for (i = 0; i < nkpt; i++) { + pd_p[i] = (KPTphys + ptoa_pt(i)) | X86_PG_RW | X86_PG_V; + } /* * Map from start of the kernel in physical memory (staging @@ -1907,9 +2224,9 @@ create_pagetables(vm_paddr_t *firstaddr) *firstaddr = round_2mpage(KERNend); /* And connect up the PD to the PDP (leaving room for L4 pages) */ - pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE)); + pdp_p = (pdp_entry_t *)(KPDPphys + ptoa_pt(KPML4I - KPML4BASE)); for (i = 0; i < nkpdpe; i++) - pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V; + pdp_p[i + KPDPI] = (KPDphys + ptoa_pt(i)) | X86_PG_RW | X86_PG_V; #ifdef KASAN kasankernbase = kasan_md_addr_to_shad(KERNBASE); @@ -1921,12 +2238,12 @@ create_pagetables(vm_paddr_t *firstaddr) pd_p = (pd_entry_t *)KASANPDphys; for (i = 0; i < nkasanpte; i++) - pd_p[i + kasankpdi] = (KASANPTphys + ptoa(i)) | X86_PG_RW | + pd_p[i + kasankpdi] = (KASANPTphys + ptoa_pt(i)) | X86_PG_RW | X86_PG_V | pg_nx; pt_p = (pt_entry_t *)KASANPTphys; for (i = 0; i < nkasanpte * NPTEPG; i++) - pt_p[i] = (KASANphys + ptoa(i)) | X86_PG_RW | X86_PG_V | + pt_p[i] = (KASANphys + ptoa_pt(i)) | X86_PG_RW | X86_PG_V | X86_PG_M | X86_PG_A | pg_nx; #endif @@ -1953,7 +2270,7 @@ create_pagetables(vm_paddr_t *firstaddr) X86_PG_M | X86_PG_A | pg_nx; } for (j = 0; i < ndmpdp; i++, j++) { - pdp_p[i] = DMPDphys + ptoa(j); + pdp_p[i] = DMPDphys + ptoa_pt(j); pdp_p[i] |= X86_PG_RW | X86_PG_V | pg_nx; } @@ -1963,7 +2280,7 @@ create_pagetables(vm_paddr_t *firstaddr) */ p4d_p = la57 ? (pml4_entry_t *)DMPML4phys : &p4_p[DMPML4I]; for (i = 0; i < ndmpdpphys; i++) { - p4d_p[i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V | + p4d_p[i] = (DMPDPphys + ptoa_pt(i)) | X86_PG_RW | X86_PG_V | pg_nx; } @@ -1981,7 +2298,7 @@ create_pagetables(vm_paddr_t *firstaddr) } j = rounddown2(kernphys, NBPDP) >> PDPSHIFT; for (i = 0; i < nkdmpde; i++) { - pdp_p[i + j] = (DMPDkernphys + ptoa(i)) | + pdp_p[i + j] = (DMPDkernphys + ptoa_pt(i)) | X86_PG_RW | X86_PG_V | pg_nx; } } @@ -1989,7 +2306,7 @@ create_pagetables(vm_paddr_t *firstaddr) #ifdef KASAN /* Connect the KASAN shadow map slots up to the PML4. */ for (i = 0; i < NKASANPML4E; i++) { - p4_p[KASANPML4I + i] = KASANPDPphys + ptoa(i); + p4_p[KASANPML4I + i] = KASANPDPphys + ptoa_pt(i); p4_p[KASANPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; } #endif @@ -1997,20 +2314,20 @@ create_pagetables(vm_paddr_t *firstaddr) #ifdef KMSAN /* Connect the KMSAN shadow map slots up to the PML4. */ for (i = 0; i < NKMSANSHADPML4E; i++) { - p4_p[KMSANSHADPML4I + i] = KMSANSHADPDPphys + ptoa(i); + p4_p[KMSANSHADPML4I + i] = KMSANSHADPDPphys + ptoa_pt(i); p4_p[KMSANSHADPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; } /* Connect the KMSAN origin map slots up to the PML4. */ for (i = 0; i < NKMSANORIGPML4E; i++) { - p4_p[KMSANORIGPML4I + i] = KMSANORIGPDPphys + ptoa(i); + p4_p[KMSANORIGPML4I + i] = KMSANORIGPDPphys + ptoa_pt(i); p4_p[KMSANORIGPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; } #endif /* Connect the KVA slots up to the PML4 */ for (i = 0; i < NKPML4E; i++) { - p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); + p4_p[KPML4BASE + i] = KPDPphys + ptoa_pt(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; } @@ -2065,7 +2382,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) TSENTER(); KERNend = *firstaddr; - res = atop(KERNend - (vm_paddr_t)kernphys); + res = atop_pt(KERNend - (vm_paddr_t)kernphys); if (!pti) pg_g = X86_PG_G; @@ -2075,7 +2392,6 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ pmap_bootstrap_la57(firstaddr); create_pagetables(firstaddr); - pcpu0_phys = allocpages(firstaddr, 1); /* @@ -2085,7 +2401,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) * are required for promotion of the corresponding kernel virtual * addresses to superpage mappings. */ - vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt)); + vm_phys_early_add_seg(trunc_page(KPTphys), round_page(KPTphys + ptoa_pt(nkpt))); /* * Account for the virtual addresses mapped by create_pagetables(). @@ -2138,7 +2454,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) kernel_pmap->pm_ucr3 = PMAP_NO_CR3; TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_stats.resident_count = res; - vm_radix_init(&kernel_pmap->pm_root); + ptpage_radix_init(&kernel_pmap->pm_root); kernel_pmap->pm_flags = pmap_flags; if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { rangeset_init(&kernel_pmap->pm_pkru, pkru_dup_range, @@ -2161,7 +2477,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) * mapping of pages. */ #define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); + v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n) * PAGE_SIZE_PTES; va = virtual_avail; pte = vtopte(va); @@ -2183,8 +2499,13 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ pcpu_pte[0] = pcpu0_phys | X86_PG_V | X86_PG_RW | pg_g | pg_nx | X86_PG_M | X86_PG_A; - for (i = 1; i < MAXCPU; i++) - pcpu_pte[i] = 0; + pte_store_datapg(pcpu_pte, pcpu0_phys | X86_PG_V | X86_PG_RW | pg_g | pg_nx | + X86_PG_M | X86_PG_A); + + size_t pteincr = PAGE_SIZE_PTES; + for (i = 1; i < MAXCPU; i++) { + pte_store_datapg(pcpu_pte + i * pteincr, 0); + } /* * Re-initialize PCPU area for BSP after switching. @@ -2313,12 +2634,14 @@ pmap_init_pat(void) load_cr4(cr4); } +#if 0 vm_page_t pmap_page_alloc_below_4g(bool zeroed) { return (vm_page_alloc_noobj_contig((zeroed ? VM_ALLOC_ZERO : 0), 1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT)); } +#endif /* * Initialize a vm_page's machine-dependent fields. @@ -2492,11 +2815,16 @@ void pmap_init(void) { struct pmap_preinit_mapping *ppim; - vm_page_t m, mpte; + ptpage_t m, mpte; pml4_entry_t *pml4e; unsigned long lm_max; int error, i, ret, skz63; +#if PAGE_SIZE == PAGE_SIZE_4K +#else + ptpage_radix_zinit(); +#endif + /* L1TF, reserve page @0 unconditionally */ vm_page_blacklist_add(0, bootverbose); @@ -2537,13 +2865,18 @@ pmap_init(void) */ PMAP_LOCK(kernel_pmap); for (i = 0; i < nkpt; i++) { - mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); - KASSERT(mpte >= vm_page_array && - mpte < &vm_page_array[vm_page_array_size], + mpte = pmap_pa_to_ptpage(KPTphys + (i << PAGE_SHIFT_PT)); +#if PAGE_SIZE == PAGE_SIZE_4K + KASSERT((vm_page_t)mpte >= vm_page_array && + (vm_page_t)mpte < &vm_page_array[vm_page_array_size], + ("pmap_init: page table page is out of range")); +#else + KASSERT(mpte >= pmap_pt_page_array && + mpte < &pmap_pt_page_array[pmap_pt_page_array_size], ("pmap_init: page table page is out of range")); - mpte->pindex = pmap_pde_pindex(KERNBASE) + i; - mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); - mpte->ref_count = 1; +#endif + pmap_ptpage_pindex_set(mpte, pmap_pde_pindex(KERNBASE) + i); + pmap_ptpage_ref_set(mpte, 1); /* * Collect the page table pages that were replaced by a 2MB @@ -2596,6 +2929,7 @@ pmap_init(void) } pmap_init_pv_table(); + pmap_initialized = 1; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; @@ -2651,7 +2985,7 @@ pmap_init(void) m = pmap_large_map_getptp_unlocked(); kernel_pmap->pm_pmltop[LMSPML5I + i] = X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | - pg_nx | VM_PAGE_TO_PHYS(m); + pg_nx | pmap_ptpage_pa(m); } } for (i = 0; i < lm_ents; i++) { @@ -2659,7 +2993,7 @@ pmap_init(void) pml4e = pmap_pml4e(kernel_pmap, kva_layout.lm_low + (u_long)i * NBPML4); *pml4e = X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | - pg_nx | VM_PAGE_TO_PHYS(m); + pg_nx | pmap_ptpage_pa(m); } } } @@ -3256,8 +3590,18 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) smp_masked_invlpg(va, pmap, pmap_invalidate_page_curcpu_cb); } +static void +pmap_invalidate_page_datapg(pmap_t pmap, vm_offset_t va) +{ + int i; + + for (i = 0; i < PAGE_SIZE_PTES; i++) { + pmap_invalidate_page(pmap, va + (i << PAGE_SHIFT_PT)); + } +} + /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ -#define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) +#define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE_PT) static void pmap_invalidate_range_pcid_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, @@ -3278,7 +3622,7 @@ pmap_invalidate_range_pcid_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, if (invpcid_works1) { d.pcid = pcid | PMAP_PCID_USER_PT; d.pad = 0; - for (d.addr = sva; d.addr < eva; d.addr += PAGE_SIZE) + for (d.addr = sva; d.addr < eva; d.addr += PAGE_SIZE_PT) invpcid(&d, INVPCID_ADDR); } else { kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; @@ -3327,11 +3671,11 @@ pmap_invalidate_range_curcpu_cb(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) invpcid(&d, INVPCID_CTXGLOB); } else { - for (addr = sva; addr < eva; addr += PAGE_SIZE) + for (addr = sva; addr < eva; addr += PAGE_SIZE_PT) invlpg(addr); } } else if (pmap == PCPU_GET(curpmap)) { - for (addr = sva; addr < eva; addr += PAGE_SIZE) + for (addr = sva; addr < eva; addr += PAGE_SIZE_PT) invlpg(addr); pmap_invalidate_range_cb(pmap, sva, eva); } @@ -3646,7 +3990,7 @@ pmap_invalidate_cache_pages(vm_page_t *pages, int count) bool useclflushopt; useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; - if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || + if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE_PT || ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt)) pmap_invalidate_cache(); else { @@ -3721,8 +4065,8 @@ pmap_flush_cache_phys_range(vm_paddr_t spa, vm_paddr_t epa, vm_memattr_t mattr) pte = vtopte(vaddr); for (; spa < epa; spa += PAGE_SIZE) { sched_pin(); - pte_store(pte, spa | pte_bits); - pmap_invlpg(kernel_pmap, vaddr); + pte_store_datapg(pte, spa | pte_bits); + pmap_invlpg_datapg(kernel_pmap, vaddr); /* XXXKIB atomic inside flush_cache_range are excessive */ pmap_flush_cache_range(vaddr, vaddr + PAGE_SIZE); sched_unpin(); @@ -3873,7 +4217,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) pt_entry_t *pte; pte = vtopte(va); - pte_store(pte, pa | pg_g | pg_nx | X86_PG_A | X86_PG_M | + pte_store_datapg(pte, pa | pg_g | pg_nx | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V); } @@ -3885,7 +4229,7 @@ pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) pte = vtopte(va); cache_bits = pmap_cache_bits(kernel_pmap, mode, false); - pte_store(pte, pa | pg_g | pg_nx | X86_PG_A | X86_PG_M | + pte_store_datapg(pte, pa | pg_g | pg_nx | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V | cache_bits); } @@ -3899,7 +4243,7 @@ pmap_kremove(vm_offset_t va) pt_entry_t *pte; pte = vtopte(va); - pte_clear(pte); + pte_clear_datapg(pte); } /* @@ -3936,19 +4280,20 @@ pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) vm_page_t m; int cache_bits; + KASSERT((sva & PAGE_MASK) == 0, ("sva %p not page aligned", (void *)sva)); oldpte = 0; pte = vtopte(sva); - endpte = pte + count; + endpte = pte + (count * PAGE_SIZE_PTES); while (pte < endpte) { m = *ma++; cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, false); pa = VM_PAGE_TO_PHYS(m) | cache_bits; if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) { oldpte |= *pte; - pte_store(pte, pa | pg_g | pg_nx | X86_PG_A | + pte_store_datapg(pte, pa | pg_g | pg_nx | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V); } - pte++; + pte += PAGE_SIZE_PTES; } if (__predict_false((oldpte & X86_PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * @@ -3995,14 +4340,20 @@ pmap_qremove(vm_offset_t sva, int count) * physical memory manager after the TLB has been updated. */ static __inline void -pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, bool set_PG_ZERO) +pmap_add_delayed_free_list(pmap_t pmap, ptpage_t m, struct ptpglist *free, bool set_PG_ZERO) { +#if 0 + /* CHUQ hack this for now. */ + vm_page_t mm = (vm_page_t)m; if (set_PG_ZERO) - m->flags |= PG_ZERO; + mm->flags |= PG_ZERO; else - m->flags &= ~PG_ZERO; - SLIST_INSERT_HEAD(free, m, plinks.s.ss); + mm->flags &= ~PG_ZERO; + + /* CHUQ add something for PG_ZERO tracking */ +#endif + pmap_ptpage_slist_add(pmap, free, m); } /* @@ -4022,16 +4373,16 @@ pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, bool set_PG_ZERO) * valid mappings with identical attributes including PG_A; "mpte"'s valid * field will be set to VM_PAGE_BITS_ALL. */ -static __inline int -pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, +static __noinline int +pmap_insert_pt_page(pmap_t pmap, ptpage_t mpte, bool promoted, bool allpte_PG_A_set) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(promoted || !allpte_PG_A_set, ("a zero-filled PTP can't have PG_A set in every PTE")); - mpte->valid = promoted ? (allpte_PG_A_set ? VM_PAGE_BITS_ALL : 1) : 0; - return (vm_radix_insert(&pmap->pm_root, mpte)); + pmap_ptpage_valid_set(mpte, promoted ? (allpte_PG_A_set ? VM_PAGE_BITS_ALL : 1) : 0); + return (ptpage_radix_insert(&pmap->pm_root, mpte)); } /* @@ -4040,12 +4391,12 @@ pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte, bool promoted, * Otherwise, returns NULL if there is no page table page corresponding to the * specified virtual address. */ -static __inline vm_page_t +static __inline ptpage_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); - return (vm_radix_remove(&pmap->pm_root, pmap_pde_pindex(va))); + return (ptpage_radix_remove(&pmap->pm_root, pmap_pde_pindex(va))); } /* @@ -4055,11 +4406,11 @@ pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) * page table page was unmapped and false otherwise. */ static inline bool -pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, ptpage_t m, struct ptpglist *free) { - --m->ref_count; - if (m->ref_count == 0) { + pmap_ptpage_ref_add(m, -1); + if (pmap_ptpage_refs(m) == 0) { _pmap_unwire_ptp(pmap, va, m, free); return (true); } else @@ -4067,20 +4418,22 @@ pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) } static void -_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) +_pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, ptpage_t ptp, struct ptpglist *free) { pml5_entry_t *pml5; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pd; - vm_page_t pdpg, pdppg, pml4pg; + ptpage_t pdpg, pdppg, pml4pg; + vm_pindex_t pindex; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ - if (m->pindex >= NUPDE + NUPDPE + NUPML4E) { + pindex = pmap_ptpage_pindex(ptp); + if (pindex >= NUPDE + NUPDPE + NUPML4E) { /* PML4 page */ MPASS(pmap_is_la57(pmap)); pml5 = pmap_pml5e(pmap, va); @@ -4089,7 +4442,7 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) pml5 = pmap_pml5e_u(pmap, va); *pml5 = 0; } - } else if (m->pindex >= NUPDE + NUPDPE) { + } else if (pindex >= NUPDE + NUPDPE) { /* PDP page */ pml4 = pmap_pml4e(pmap, va); *pml4 = 0; @@ -4098,7 +4451,7 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) pml4 = pmap_pml4e_u(pmap, va); *pml4 = 0; } - } else if (m->pindex >= NUPDE) { + } else if (pindex >= NUPDE) { /* PD page */ pdp = pmap_pdpe(pmap, va); *pdp = 0; @@ -4107,17 +4460,17 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) pd = pmap_pde(pmap, va); *pd = 0; } - if (m->pindex < NUPDE) { + if (pindex < NUPDE) { /* We just released a PT, unhold the matching PD */ - pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME); + pdpg = pmap_pa_to_ptpage(*pmap_pdpe(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pdpg, free); - } else if (m->pindex < NUPDE + NUPDPE) { + } else if (pindex < NUPDE + NUPDPE) { /* We just released a PD, unhold the matching PDP */ - pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); + pdppg = pmap_pa_to_ptpage(*pmap_pml4e(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pdppg, free); - } else if (m->pindex < NUPDE + NUPDPE + NUPML4E && pmap_is_la57(pmap)) { + } else if (pindex < NUPDE + NUPDPE + NUPML4E && pmap_is_la57(pmap)) { /* We just released a PDP, unhold the matching PML4 */ - pml4pg = PHYS_TO_VM_PAGE(*pmap_pml5e(pmap, va) & PG_FRAME); + pml4pg = pmap_pa_to_ptpage(*pmap_pml5e(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pml4pg, free); } @@ -4127,7 +4480,7 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ - pmap_add_delayed_free_list(m, free, true); + pmap_add_delayed_free_list(pmap, ptp, free, true); } /* @@ -4136,14 +4489,14 @@ _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, - struct spglist *free) + struct ptpglist *free) { - vm_page_t mpte; + ptpage_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); - mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); + mpte = pmap_pa_to_ptpage(ptepde & PG_FRAME); return (pmap_unwire_ptp(pmap, va, mpte, free)); } @@ -4152,9 +4505,9 @@ pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, * mapping. */ static void -pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte) +pmap_abort_ptp(pmap_t pmap, vm_offset_t va, ptpage_t mpte) { - struct spglist free; + struct ptpglist free; SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, mpte, &free)) { @@ -4164,7 +4517,7 @@ pmap_abort_ptp(pmap_t pmap, vm_offset_t va, vm_page_t mpte) * page table pages. Invalidate those entries. */ pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); } } @@ -4181,163 +4534,573 @@ pmap_pinit_pcids(pmap_t pmap, uint32_t pcid, int gen) } } -void -pmap_pinit0(pmap_t pmap) +void +pmap_pinit0(pmap_t pmap) +{ + struct proc *p; + struct thread *td; + + PMAP_LOCK_INIT(pmap); + pmap->pm_pmltop = kernel_pmap->pm_pmltop; + pmap->pm_pmltopu = NULL; + pmap->pm_cr3 = kernel_pmap->pm_cr3; + /* hack to keep pmap_pti_pcid_invalidate() alive */ + pmap->pm_ucr3 = PMAP_NO_CR3; + ptpage_radix_init(&pmap->pm_root); + CPU_ZERO(&pmap->pm_active); + TAILQ_INIT(&pmap->pm_pvchunk); +#if PAGE_SIZE == PAGE_SIZE_4K +#else + TAILQ_INIT(&pmap->pm_ptpfree); +#endif + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + pmap->pm_flags = pmap_flags; + pmap->pm_pcidp = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK); + pmap_pinit_pcids(pmap, PMAP_PCID_KERN + 1, 1); + pmap_activate_boot(pmap); + td = curthread; + if (pti) { + p = td->td_proc; + PROC_LOCK(p); + p->p_md.md_flags |= P_MD_KPTI; + PROC_UNLOCK(p); + } + pmap_thread_init_invl_gen(td); + + if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { + pmap_pkru_ranges_zone = uma_zcreate("pkru ranges", + sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL, + UMA_ALIGN_PTR, 0); + } +} + +void +pmap_pinit_pml4(ptpage_t pml4ptp) +{ + pml4_entry_t *pm_pml4; + int i; + + pm_pml4 = pmap_ptpage_va(pml4ptp); + + /* Wire in kernel global address entries. */ + for (i = 0; i < NKPML4E; i++) { + pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa_pt(i)) | X86_PG_RW | + X86_PG_V; + } +#ifdef KASAN + for (i = 0; i < NKASANPML4E; i++) { + pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa_pt(i)) | X86_PG_RW | + X86_PG_V | pg_nx; + } +#endif +#ifdef KMSAN + for (i = 0; i < NKMSANSHADPML4E; i++) { + pm_pml4[KMSANSHADPML4I + i] = (KMSANSHADPDPphys + ptoa_pt(i)) | + X86_PG_RW | X86_PG_V | pg_nx; + } + for (i = 0; i < NKMSANORIGPML4E; i++) { + pm_pml4[KMSANORIGPML4I + i] = (KMSANORIGPDPphys + ptoa_pt(i)) | + X86_PG_RW | X86_PG_V | pg_nx; + } +#endif + for (i = 0; i < ndmpdpphys; i++) { + pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa_pt(i)) | X86_PG_RW | + X86_PG_V; + } + + /* install self-referential address mapping entry(s) */ + pm_pml4[PML4PML4I] = pmap_ptpage_pa(pml4ptp) | X86_PG_V | X86_PG_RW | + X86_PG_A | X86_PG_M; + + /* install large map entries if configured */ + for (i = 0; i < lm_ents; i++) + pm_pml4[LMSPML4I + i] = kernel_pmap->pm_pmltop[LMSPML4I + i]; +} + +void +pmap_pinit_pml5(ptpage_t pml5ptp) +{ + pml5_entry_t *pm_pml5; + int i; + + pm_pml5 = pmap_ptpage_va(pml5ptp); + for (i = 0; i < NPML5EPG / 2; i++) + pm_pml5[i] = 0; + for (; i < NPML5EPG; i++) + pm_pml5[i] = kernel_pmap->pm_pmltop[i]; +} + +static void +pmap_pinit_pml4_pti(ptpage_t pml4pgu) +{ + pml4_entry_t *pm_pml4u; + int i; + + pm_pml4u = pmap_ptpage_va(pml4pgu); + for (i = 0; i < NPML4EPG; i++) + pm_pml4u[i] = pti_pml4[i]; +} + +static void +pmap_pinit_pml5_pti(ptpage_t pml5pgu) +{ + pml5_entry_t *pm_pml5u; + + pm_pml5u = pmap_ptpage_va(pml5pgu); + bzero(pm_pml5u, PAGE_SIZE_PT); + + /* + * Add pml5 entry at top of KVA pointing to existing pml4 pti + * table, entering all kernel mappings needed for usermode + * into level 5 table. + */ + pm_pml5u[pmap_pml5e_index(UPT_MAX_ADDRESS)] = + pmap_kextract((vm_offset_t)pti_pml4) | + X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; +} + +#if PAGE_SIZE == PAGE_SIZE_4K + +/* Allocate a page table page and do related bookkeeping */ +static ptpage_t +pmap_alloc_pt_page(pmap_t pmap, vm_pindex_t pindex, int flags) +{ + vm_page_t m; + + m = vm_page_alloc_noobj(flags); + if (__predict_false(m == NULL)) + return (NULL); + m->pindex = pindex; + pmap_pt_page_count_adj(pmap, 1); + return ((ptpage_t)m); +} + +void +pmap_free_pt_page(pmap_t pmap, ptpage_t ptp, bool zerofilled) +{ + vm_page_t m; + + /* + * This function assumes the page will need to be unwired, + * even though the counterpart allocation in pmap_alloc_pt_page() + * doesn't enforce VM_ALLOC_WIRED. However, all current uses + * of pmap_free_pt_page() require unwiring. The case in which + * a PT page doesn't require unwiring because its ref_count has + * naturally reached 0 is handled through _pmap_unwire_ptp(). + */ + m = (vm_page_t)ptp; + vm_page_unwire_noq(m); + if (zerofilled) + vm_page_free_zero(m); + else + vm_page_free(m); + + pmap_pt_page_count_adj(pmap, -1); +} + +vm_paddr_t +pmap_ptpage_pa(ptpage_t ptp) +{ + vm_page_t m = (vm_page_t)ptp; + + return (VM_PAGE_TO_PHYS(m)); +} + +void * +pmap_ptpage_pa_to_va(vm_paddr_t pa) +{ + + return ((void *)PHYS_TO_DMAP(pa)); +} + +void * +pmap_ptpage_va(ptpage_t ptp) +{ + + return (pmap_ptpage_pa_to_va(pmap_ptpage_pa(ptp))); +} + +ptpage_t +pmap_pa_to_ptpage(vm_paddr_t pa) +{ + + return ((ptpage_t)PHYS_TO_VM_PAGE(pa)); +} + +ptpage_t +pmap_va_to_ptpage(void *p) +{ + + return (pmap_pa_to_ptpage(DMAP_TO_PHYS((uintptr_t)p))); +} + +vm_pindex_t +pmap_ptpage_pindex(ptpage_t ptp) +{ + + return (((vm_page_t)ptp)->pindex); +} + +void +pmap_ptpage_pindex_set(ptpage_t ptp, vm_pindex_t pindex) +{ + + ((vm_page_t)ptp)->pindex = pindex; +} + +u_int +pmap_ptpage_refs(ptpage_t ptp) +{ + + return (((vm_page_t)ptp)->ref_count); +} + +u_int +pmap_ptpage_ref_add(ptpage_t ptp, int delta) +{ + vm_page_t m; + u_int old; + + m = (vm_page_t)ptp; + old = m->ref_count; + m->ref_count += delta; + return (old); +} + +void +pmap_ptpage_ref_set(ptpage_t ptp, u_int cnt) +{ + + ((vm_page_t)ptp)->ref_count = cnt; +} + +void +pmap_ptpage_valid_set(ptpage_t ptp, vm_page_bits_t val) +{ + + ((vm_page_t)ptp)->valid = val; +} + +vm_page_bits_t +pmap_ptpage_valid_get(ptpage_t ptp) +{ + + return (((vm_page_t)ptp)->valid); +} + +void +pmap_ptpage_slist_init(struct ptpglist *ptpgl) +{ + struct spglist *spgl = (void *)ptpgl; + + SLIST_INIT(spgl); +} + +void +pmap_ptpage_slist_add(pmap_t pmap, struct ptpglist *ptpgl, ptpage_t ptp) +{ + struct spglist *spgl = (void *)ptpgl; + vm_page_t m = (void *)ptp; + + SLIST_INSERT_HEAD(spgl, m, plinks.s.ss); +} + +void +pmap_ptpage_slist_free(struct ptpglist *ptpgl, bool update_wire_count) +{ + struct spglist *spgl = (void *)ptpgl; + + vm_page_free_pages_toq(spgl, update_wire_count); +} + +#else + +static vm_page_t +pmap_ptpage_vmpage(ptpage_t ptp) +{ + vm_paddr_t pa = pmap_ptpage_pa(ptp); + vm_page_t m; + + pa &= ~PAGE_MASK; + m = PHYS_TO_VM_PAGE(pa); + return m; +} + +static ptpage_t +pmap_alloc_pt_page(pmap_t pmap, vm_pindex_t pindex, int flags) +{ + ptpage_t ptp; + vm_page_t m; + int i; + +/* CHUQ for now just use a whole vm_page for a ptpage */ +#define PTPAGE_WHOLE_PAGE 1 + +#ifdef PTPAGE_WHOLE_PAGE + pmap = NULL; +#endif + + /* + * CHUQ how to handle VM_ALLOC_INTERRUPT or VM_ALLOC_NOFREE ? + */ + /* + * CHUQ for now just keep a list of free ptps in the pmap. + * we'll make this more efficient later. + * m->pindex is the count of free ptpages in this vm_page. + * if pmap is NULL then just use the first ptpage and waste the rest. + */ + if (pmap != NULL) { + if (!cold) { + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + } + ptp = TAILQ_FIRST(&pmap->pm_ptpfree); + if (ptp != NULL) { + TAILQ_REMOVE(&pmap->pm_ptpfree, ptp, pt_list); + m = pmap_ptpage_vmpage(ptp); + if (--m->pindex == 0) { + /* CHUQ do something? */ + } + if ((flags & VM_ALLOC_ZERO) != 0) + bzero(pmap_ptpage_va(ptp), PAGE_SIZE_PT); + goto out; + } + } + m = vm_page_alloc_noobj(flags); + if (__predict_false(m == NULL)) + return (NULL); + m->pindex = PAGE_SIZE_PTES - 1; + ptp = pmap_pa_to_ptpage(VM_PAGE_TO_PHYS(m)); + KASSERT(pmap_ptpage_va(ptp) == (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), + ("ptp va mismatch ptp va %p vm_page %p", pmap_ptpage_va(ptp), + (void *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)))); + if (pmap != NULL) { + for (i = 1; i < PAGE_SIZE_PTES; i++, ptp++) { + TAILQ_INSERT_HEAD(&pmap->pm_ptpfree, ptp, pt_list); + } + } + +out: + ptp->pindex = pindex; + ptp->refs = 1; + ptp->free = 0; + ptp->valid = 0; + ptp->pmap = pmap; + pmap_pt_page_count_adj(pmap, 1); + return ptp; +} + +void +pmap_free_pt_page(pmap_t pmap, ptpage_t ptp, bool zerofilled) +{ + vm_paddr_t pa; + vm_page_t m; + ptpage_t tptp; + int i; + +#ifdef PTPAGE_WHOLE_PAGE + pmap = NULL; +#endif + + /* CHUQ do something with zerofilled. */ + + pmap_pt_page_count_adj(pmap, -1); + pa = pmap_ptpage_pa(ptp); + pa &= ~PAGE_MASK; + m = PHYS_TO_VM_PAGE(pa); + + /* + * If there's no pmap then this is a whole vm_page + * and we can just free it. + */ + + if (pmap == NULL) { + vm_page_unwire_noq(m); + vm_page_free(m); + return; + } + + /* + * If this is not freeing the last ptpage of the vm_page + * then just put the ptpage on the pmap's list of free ptpages. + */ + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if (++ m->pindex < PAGE_SIZE_PTES) { + TAILQ_INSERT_HEAD(&pmap->pm_ptpfree, ptp, pt_list); + return; + } + + /* + * The vm_page containing this ptpage is now entirely free + * so take all the other ptpages in this vm_page off the pmap's list and + * free the underlying vm_page. + */ + + for (i = 0; i < PAGE_SIZE; i += PAGE_SIZE_PT) { + tptp = pmap_pa_to_ptpage(pa + i); + if (tptp != ptp) { + TAILQ_REMOVE(&pmap->pm_ptpfree, tptp, pt_list); + } + + } + vm_page_unwire_noq(m); + vm_page_free(m); +} + +vm_paddr_t +pmap_ptpage_pa(ptpage_t ptp) +{ + + return ((ptp - pmap_pt_page_array) << PAGE_SHIFT_PT) + (first_page << PAGE_SHIFT); +} + +void * +pmap_ptpage_pa_to_va(vm_paddr_t pa) +{ + + return ((void *)PHYS_TO_DMAP(pa)); +} + +void * +pmap_ptpage_va(ptpage_t ptp) +{ + + return (pmap_ptpage_pa_to_va(pmap_ptpage_pa(ptp))); +} + + +ptpage_t +pmap_pa_to_ptpage(vm_paddr_t pa) +{ + + return (&pmap_pt_page_array[(pa >> PAGE_SHIFT_PT) - first_page * PAGE_SIZE_PTES]); +} + +ptpage_t +pmap_va_to_ptpage(void *p) +{ + + return (pmap_pa_to_ptpage(DMAP_TO_PHYS((uintptr_t)p))); +} + +vm_pindex_t +pmap_ptpage_pindex(ptpage_t ptp) +{ + + return (ptp->pindex); +} + +void +pmap_ptpage_pindex_set(ptpage_t ptp, vm_pindex_t pindex) +{ + + ptp->pindex = pindex; +} + +u_int +pmap_ptpage_refs(ptpage_t ptp) { - struct proc *p; - struct thread *td; - PMAP_LOCK_INIT(pmap); - pmap->pm_pmltop = kernel_pmap->pm_pmltop; - pmap->pm_pmltopu = NULL; - pmap->pm_cr3 = kernel_pmap->pm_cr3; - /* hack to keep pmap_pti_pcid_invalidate() alive */ - pmap->pm_ucr3 = PMAP_NO_CR3; - vm_radix_init(&pmap->pm_root); - CPU_ZERO(&pmap->pm_active); - TAILQ_INIT(&pmap->pm_pvchunk); - bzero(&pmap->pm_stats, sizeof pmap->pm_stats); - pmap->pm_flags = pmap_flags; - pmap->pm_pcidp = uma_zalloc_pcpu(pcpu_zone_8, M_WAITOK); - pmap_pinit_pcids(pmap, PMAP_PCID_KERN + 1, 1); - pmap_activate_boot(pmap); - td = curthread; - if (pti) { - p = td->td_proc; - PROC_LOCK(p); - p->p_md.md_flags |= P_MD_KPTI; - PROC_UNLOCK(p); - } - pmap_thread_init_invl_gen(td); + return (ptp->refs); +} - if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { - pmap_pkru_ranges_zone = uma_zcreate("pkru ranges", - sizeof(struct pmap_pkru_range), NULL, NULL, NULL, NULL, - UMA_ALIGN_PTR, 0); - } +u_int +pmap_ptpage_ref_add(ptpage_t ptp, int delta) +{ + u_int old; + + old = ptp->refs; + ptp->refs += delta; + return (old); } void -pmap_pinit_pml4(vm_page_t pml4pg) +pmap_ptpage_ref_set(ptpage_t ptp, u_int cnt) { - pml4_entry_t *pm_pml4; - int i; - pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); + ptp->refs = cnt; +} - /* Wire in kernel global address entries. */ - for (i = 0; i < NKPML4E; i++) { - pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) | X86_PG_RW | - X86_PG_V; - } -#ifdef KASAN - for (i = 0; i < NKASANPML4E; i++) { - pm_pml4[KASANPML4I + i] = (KASANPDPphys + ptoa(i)) | X86_PG_RW | - X86_PG_V | pg_nx; - } -#endif -#ifdef KMSAN - for (i = 0; i < NKMSANSHADPML4E; i++) { - pm_pml4[KMSANSHADPML4I + i] = (KMSANSHADPDPphys + ptoa(i)) | - X86_PG_RW | X86_PG_V | pg_nx; - } - for (i = 0; i < NKMSANORIGPML4E; i++) { - pm_pml4[KMSANORIGPML4I + i] = (KMSANORIGPDPphys + ptoa(i)) | - X86_PG_RW | X86_PG_V | pg_nx; - } -#endif - for (i = 0; i < ndmpdpphys; i++) { - pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | - X86_PG_V; - } +void +pmap_ptpage_valid_set(ptpage_t ptp, vm_page_bits_t val) +{ - /* install self-referential address mapping entry(s) */ - pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW | - X86_PG_A | X86_PG_M; + ptp->valid = val; +} - /* install large map entries if configured */ - for (i = 0; i < lm_ents; i++) - pm_pml4[LMSPML4I + i] = kernel_pmap->pm_pmltop[LMSPML4I + i]; +vm_page_bits_t +pmap_ptpage_valid_get(ptpage_t ptp) +{ + + return (ptp->valid); } void -pmap_pinit_pml5(vm_page_t pml5pg) +pmap_ptpage_slist_init(struct ptpglist *ptpgl) { - pml5_entry_t *pm_pml5; - int i; - pm_pml5 = (pml5_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml5pg)); - for (i = 0; i < NPML5EPG / 2; i++) - pm_pml5[i] = 0; - for (; i < NPML5EPG; i++) - pm_pml5[i] = kernel_pmap->pm_pmltop[i]; + SLIST_INIT(ptpgl); } -static void -pmap_pinit_pml4_pti(vm_page_t pml4pgu) +void +pmap_ptpage_slist_add(pmap_t pmap, struct ptpglist *ptpgl, ptpage_t ptp) { - pml4_entry_t *pm_pml4u; - int i; - pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pgu)); - for (i = 0; i < NPML4EPG; i++) - pm_pml4u[i] = pti_pml4[i]; + ptp->pmap = pmap; + SLIST_INSERT_HEAD(ptpgl, ptp, ss); } -static void -pmap_pinit_pml5_pti(vm_page_t pml5pgu) +void +pmap_ptpage_slist_free(struct ptpglist *ptpgl, bool update_wire_count) { - pml5_entry_t *pm_pml5u; - - pm_pml5u = (pml5_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml5pgu)); - pagezero(pm_pml5u); + ptpage_t ptp; + pmap_t pmap; /* - * Add pml5 entry at top of KVA pointing to existing pml4 pti - * table, entering all kernel mappings needed for usermode - * into level 5 table. + * XXX CHUQ we can't take PMAP_LOCK here, it's already held in + * pmap_enter_pde() and pmap_enter_2mpage(). */ - pm_pml5u[pmap_pml5e_index(UPT_MAX_ADDRESS)] = - pmap_kextract((vm_offset_t)pti_pml4) | - X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; + while ((ptp = SLIST_FIRST(ptpgl)) != NULL) { + SLIST_REMOVE_HEAD(ptpgl, ss); + pmap = ptp->pmap; +#ifdef PTPAGE_WHOLE_PAGE +#else + PMAP_LOCK(pmap); +#endif + pmap_free_pt_page(pmap, ptp, false); +#ifdef PTPAGE_WHOLE_PAGE +#else + PMAP_UNLOCK(pmap); +#endif + } } -/* Allocate a page table page and do related bookkeeping */ -static vm_page_t -pmap_alloc_pt_page(pmap_t pmap, vm_pindex_t pindex, int flags) +ptpage_t +pmap_page_alloc_below_4g(bool zeroed) { + ptpage_t ptp; vm_page_t m; - m = vm_page_alloc_noobj(flags); + /* + * These ptpages are all used only temporarily, so just allocate a whole + * vm_page for each ptpage. + */ + + m = vm_page_alloc_noobj_contig(VM_ALLOC_WIRED | (zeroed ? VM_ALLOC_ZERO : 0), + 1, 0, (1ULL << 32), PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if (__predict_false(m == NULL)) return (NULL); - m->pindex = pindex; - pmap_pt_page_count_adj(pmap, 1); - return (m); + ptp = pmap_pa_to_ptpage(VM_PAGE_TO_PHYS(m)); + return (ptp); } -static void -pmap_free_pt_page(pmap_t pmap, vm_page_t m, bool zerofilled) -{ - /* - * This function assumes the page will need to be unwired, - * even though the counterpart allocation in pmap_alloc_pt_page() - * doesn't enforce VM_ALLOC_WIRED. However, all current uses - * of pmap_free_pt_page() require unwiring. The case in which - * a PT page doesn't require unwiring because its ref_count has - * naturally reached 0 is handled through _pmap_unwire_ptp(). - */ - vm_page_unwire_noq(m); - if (zerofilled) - vm_page_free_zero(m); - else - vm_page_free(m); +#endif - pmap_pt_page_count_adj(pmap, -1); -} _Static_assert(sizeof(struct pmap_pcid) == 8, "Fix pcpu zone for pm_pcidp"); +static void pmap_ptpages(db_expr_t, bool, db_expr_t, char *); + /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. @@ -4345,7 +5108,7 @@ _Static_assert(sizeof(struct pmap_pcid) == 8, "Fix pcpu zone for pm_pcidp"); int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) { - vm_page_t pmltop_pg, pmltop_pgu; + ptpage_t pmltop_pg, pmltop_pgu; vm_paddr_t pmltop_phys; bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -4363,12 +5126,16 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) * not effective since we started using non-managed mapping of * the shared page. */ +#if PAGE_SIZE == PAGE_SIZE_4K +#else + TAILQ_INIT(&pmap->pm_ptpfree); +#endif pmltop_pg = pmap_alloc_pt_page(NULL, 0, VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); pmap_pt_page_count_pinit(pmap, 1); - pmltop_phys = VM_PAGE_TO_PHYS(pmltop_pg); - pmap->pm_pmltop = (pml5_entry_t *)PHYS_TO_DMAP(pmltop_phys); + pmltop_phys = pmap_ptpage_pa(pmltop_pg); + pmap->pm_pmltop = pmap_ptpage_va(pmltop_pg); if (pmap_pcid_enabled) { if (pmap->pm_pcidp == NULL) @@ -4404,13 +5171,12 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) pmltop_pgu = pmap_alloc_pt_page(NULL, 0, VM_ALLOC_WIRED | VM_ALLOC_WAITOK); pmap_pt_page_count_pinit(pmap, 1); - pmap->pm_pmltopu = (pml4_entry_t *)PHYS_TO_DMAP( - VM_PAGE_TO_PHYS(pmltop_pgu)); + pmap->pm_pmltopu = pmap_ptpage_va(pmltop_pgu); if (pmap_is_la57(pmap)) pmap_pinit_pml5_pti(pmltop_pgu); else pmap_pinit_pml4_pti(pmltop_pgu); - pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pmltop_pgu); + pmap->pm_ucr3 = pmap_ptpage_pa(pmltop_pgu); } if ((cpu_stdext_feature2 & CPUID_STDEXT2_PKU) != 0) { rangeset_init(&pmap->pm_pkru, pkru_dup_range, @@ -4423,7 +5189,7 @@ pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) break; } - vm_radix_init(&pmap->pm_root); + ptpage_radix_init(&pmap->pm_root); CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); pmap->pm_flags = flags; @@ -4442,16 +5208,16 @@ pmap_pinit(pmap_t pmap) static void pmap_allocpte_free_unref(pmap_t pmap, vm_offset_t va, pt_entry_t *pte) { - vm_page_t mpg; - struct spglist free; + ptpage_t ptp; + struct ptpglist free; - mpg = PHYS_TO_VM_PAGE(*pte & PG_FRAME); - if (mpg->ref_count != 0) + ptp = pmap_pa_to_ptpage(*pte & PG_FRAME); + if (pmap_ptpage_refs(ptp) != 0) return; SLIST_INIT(&free); - _pmap_unwire_ptp(pmap, va, mpg, &free); + _pmap_unwire_ptp(pmap, va, ptp, &free); pmap_invalidate_page(pmap, va); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); } static pml4_entry_t * @@ -4461,7 +5227,7 @@ pmap_allocpte_getpml4(pmap_t pmap, struct rwlock **lockp, vm_offset_t va, vm_pindex_t pml5index; pml5_entry_t *pml5; pml4_entry_t *pml4; - vm_page_t pml4pg; + ptpage_t pml4pg; pt_entry_t PG_V; bool allocated; @@ -4482,11 +5248,11 @@ pmap_allocpte_getpml4(pmap_t pmap, struct rwlock **lockp, vm_offset_t va, pml4 = (pml4_entry_t *)PHYS_TO_DMAP(*pml5 & PG_FRAME); pml4 = &pml4[pmap_pml4e_index(va)]; if ((*pml4 & PG_V) == 0) { - pml4pg = PHYS_TO_VM_PAGE(*pml5 & PG_FRAME); + pml4pg = pmap_pa_to_ptpage(*pml5 & PG_FRAME); if (allocated && !addref) - pml4pg->ref_count--; + pmap_ptpage_ref_add(pml4pg, -1); else if (!allocated && addref) - pml4pg->ref_count++; + pmap_ptpage_ref_add(pml4pg, 1); } return (pml4); } @@ -4495,7 +5261,7 @@ static pdp_entry_t * pmap_allocpte_getpdp(pmap_t pmap, struct rwlock **lockp, vm_offset_t va, bool addref) { - vm_page_t pdppg; + ptpage_t pdppg; pml4_entry_t *pml4; pdp_entry_t *pdp; pt_entry_t PG_V; @@ -4523,11 +5289,11 @@ pmap_allocpte_getpdp(pmap_t pmap, struct rwlock **lockp, vm_offset_t va, pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pmap_pdpe_index(va)]; if ((*pdp & PG_V) == 0) { - pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME); + pdppg = pmap_pa_to_ptpage(*pml4 & PG_FRAME); if (allocated && !addref) - pdppg->ref_count--; + pmap_ptpage_ref_add(pdppg, -1); else if (!allocated && addref) - pdppg->ref_count++; + pmap_ptpage_ref_add(pdppg, 1); } return (pdp); } @@ -4565,7 +5331,7 @@ pmap_allocpte_getpdp(pmap_t pmap, struct rwlock **lockp, vm_offset_t va, * The root page at PML4/PML5 does not participate in this indexing scheme, * since it is statically allocated by pmap_pinit() and not by pmap_allocpte(). */ -static vm_page_t +static ptpage_t pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, vm_offset_t va) { @@ -4574,7 +5340,7 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, pml4_entry_t *pml4, *pml4u; pdp_entry_t *pdp; pd_entry_t *pd; - vm_page_t m, pdpg; + ptpage_t m, pdpg; pt_entry_t PG_A, PG_M, PG_RW, PG_V; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -4603,14 +5369,14 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, pml5 = &pmap->pm_pmltop[pml5index]; KASSERT((*pml5 & PG_V) == 0, ("pmap %p va %#lx pml5 %#lx", pmap, va, *pml5)); - *pml5 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pml5 = pmap_ptpage_pa(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; if (pmap->pm_pmltopu != NULL && pml5index < NUPML5E) { MPASS(pmap->pm_ucr3 != PMAP_NO_CR3); *pml5 |= pg_nx; pml5u = &pmap->pm_pmltopu[pml5index]; - *pml5u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | + *pml5u = pmap_ptpage_pa(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } } else if (ptepindex >= NUPDE + NUPDPE) { @@ -4623,7 +5389,7 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, } KASSERT((*pml4 & PG_V) == 0, ("pmap %p va %#lx pml4 %#lx", pmap, va, *pml4)); - *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pml4 = pmap_ptpage_pa(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; if (!pmap_is_la57(pmap) && pmap->pm_pmltopu != NULL && pml4index < NUPML4E) { @@ -4639,7 +5405,7 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, *pml4 |= pg_nx; pml4u = &pmap->pm_pmltopu[pml4index]; - *pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | + *pml4u = pmap_ptpage_pa(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } } else if (ptepindex >= NUPDE) { @@ -4651,7 +5417,7 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, } KASSERT((*pdp & PG_V) == 0, ("pmap %p va %#lx pdp %#lx", pmap, va, *pdp)); - *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pdp = pmap_ptpage_pa(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } else { /* Wire up a new PTE page */ pdp = pmap_allocpte_getpdp(pmap, lockp, va, false); @@ -4670,8 +5436,8 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, } } else { /* Add reference to the pd page */ - pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); - pdpg->ref_count++; + pdpg = pmap_pa_to_ptpage(*pdp & PG_FRAME); + pmap_ptpage_ref_add(pdpg, 1); } pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); @@ -4679,7 +5445,7 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, pd = &pd[pmap_pde_index(va)]; KASSERT((*pd & PG_V) == 0, ("pmap %p va %#lx pd %#lx", pmap, va, *pd)); - *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; + *pd = pmap_ptpage_pa(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } return (m); @@ -4694,30 +5460,30 @@ pmap_allocpte_nosleep(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, * drop pmap lock to sleep while a page table page has ref_count == 0, * which prevents the page from being freed under us. */ -static vm_page_t +static ptpage_t pmap_allocpte_alloc(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp, vm_offset_t va) { - vm_page_t m; + ptpage_t ptp; - m = pmap_allocpte_nosleep(pmap, ptepindex, lockp, va); - if (m == NULL && lockp != NULL) { + ptp = pmap_allocpte_nosleep(pmap, ptepindex, lockp, va); + if (ptp == NULL && lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); PMAP_ASSERT_NOT_IN_DI(); vm_wait(NULL); PMAP_LOCK(pmap); } - return (m); + return (ptp); } static pd_entry_t * -pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, +pmap_alloc_pde(pmap_t pmap, vm_offset_t va, ptpage_t *pdpgp, struct rwlock **lockp) { pdp_entry_t *pdpe, PG_V; pd_entry_t *pde; - vm_page_t pdpg; + ptpage_t pdpg; vm_pindex_t pdpindex; PG_V = pmap_valid_bit(pmap); @@ -4728,8 +5494,8 @@ pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, pde = pmap_pdpe_to_pde(pdpe, va); if (va < VM_MAXUSER_ADDRESS) { /* Add a reference to the pd page. */ - pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); - pdpg->ref_count++; + pdpg = pmap_pa_to_ptpage(*pdpe & PG_FRAME); + pmap_ptpage_ref_add(pdpg, 1); } else pdpg = NULL; } else if (va < VM_MAXUSER_ADDRESS) { @@ -4742,7 +5508,7 @@ pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, else return (NULL); } - pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); + pde = pmap_ptpage_va(pdpg); pde = &pde[pmap_pde_index(va)]; } else panic("pmap_alloc_pde: missing page table page for va %#lx", @@ -4751,12 +5517,12 @@ pmap_alloc_pde(pmap_t pmap, vm_offset_t va, vm_page_t *pdpgp, return (pde); } -static vm_page_t +static ptpage_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *pd, PG_V; - vm_page_t m; + ptpage_t ptp; PG_V = pmap_valid_bit(pmap); @@ -4789,18 +5555,18 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) * hold count, and activate it. */ if (pd != NULL && (*pd & PG_V) != 0) { - m = PHYS_TO_VM_PAGE(*pd & PG_FRAME); - m->ref_count++; + ptp = pmap_pa_to_ptpage(*pd & PG_FRAME); + pmap_ptpage_ref_add(ptp, 1); } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ - m = pmap_allocpte_alloc(pmap, ptepindex, lockp, va); - if (m == NULL && lockp != NULL) + ptp = pmap_allocpte_alloc(pmap, ptepindex, lockp, va); + if (ptp == NULL && lockp != NULL) goto retry; } - return (m); + return (ptp); } /*************************************************** @@ -4815,16 +5581,16 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) void pmap_release(pmap_t pmap) { - vm_page_t m; + ptpage_t m; int i; - KASSERT(vm_radix_is_empty(&pmap->pm_root), + KASSERT(ptpage_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap %p has reserved page table page(s)", pmap)); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pmltop)); + m = pmap_va_to_ptpage(pmap->pm_pmltop); if (pmap_is_la57(pmap)) { for (i = NPML5EPG / 2; i < NPML5EPG; i++) @@ -4853,8 +5619,7 @@ pmap_release(pmap_t pmap) pmap_pt_page_count_pinit(pmap, -1); if (pmap->pm_pmltopu != NULL) { - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap-> - pm_pmltopu)); + m = pmap_va_to_ptpage(pmap->pm_pmltopu); pmap_free_pt_page(NULL, m, false); pmap_pt_page_count_pinit(pmap, -1); } @@ -4902,6 +5667,10 @@ pmap_kmsan_shadow_map_page_array(vm_paddr_t pdppa, vm_size_t size) npdpg = howmany(size, NBPDP); npde = size / NBPDR; + /* + * CHUQ allocate a whole page even though we only need a ptpage. + */ + dummypa = vm_phys_early_alloc(-1, PAGE_SIZE); pagezero((void *)PHYS_TO_DMAP(dummypa)); @@ -4943,29 +5712,23 @@ pmap_kmsan_page_array_startup(vm_offset_t start, vm_offset_t end) } #endif -/* - * Allocate physical memory for the vm_page array and map it into KVA, - * attempting to back the vm_pages with domain-local memory. - */ -void -pmap_page_array_startup(long pages) +static void * +pmap_page_array_alloc(vm_offset_t start, long pages, size_t size) { pdp_entry_t *pdpe; pd_entry_t *pde, newpdir; - vm_offset_t va, start, end; + vm_offset_t va, end; vm_paddr_t pa; long pfn; int domain, i; - vm_page_array_size = pages; - - start = kva_layout.km_low; - end = start + pages * sizeof(struct vm_page); + end = start + pages * size; for (va = start; va < end; va += NBPDR) { - pfn = first_page + (va - start) / sizeof(struct vm_page); + pfn = first_page + (va - start) / PAGE_SIZE; domain = vm_phys_domain(ptoa(pfn)); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) { + /* CHUQ this wastes the rest of the page after PAGE_SIZE_PT */ pa = vm_phys_early_alloc(domain, PAGE_SIZE); dump_add_page(pa); pagezero((void *)PHYS_TO_DMAP(pa)); @@ -4976,16 +5739,42 @@ pmap_page_array_startup(long pages) if ((*pde & X86_PG_V) != 0) panic("Unexpected pde"); pa = vm_phys_early_alloc(domain, NBPDR); - for (i = 0; i < NPDEPG; i++) + for (i = 0; i < NBPDR / PAGE_SIZE; i++) dump_add_page(pa + i * PAGE_SIZE); newpdir = (pd_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M | PG_PS | pg_g | pg_nx); pde_store(pde, newpdir); } - vm_page_array = (vm_page_t)start; #ifdef KMSAN pmap_kmsan_page_array_startup(start, end); +#endif + return ((void *)start); +} + +/* + * Allocate physical memory for the vm_page array and map it into KVA, + * attempting to back the vm_pages with domain-local memory. + */ +void +pmap_page_array_startup(long pages) +{ + vm_offset_t start; + size_t size; + + start = kva_layout.km_low; + size = sizeof (struct vm_page); + vm_page_array = pmap_page_array_alloc(start, pages, size); + vm_page_array_size = pages; + bzero(vm_page_array, pages * size); + +#if PAGE_SIZE != PAGE_SIZE_4K + start += roundup(pages * size, NBPDR); + pages *= PAGE_SIZE_PTES; + size = sizeof (struct ptpage); + pmap_pt_page_array = pmap_page_array_alloc(start, pages, size); + pmap_pt_page_array_size = pages; + bzero(pmap_pt_page_array, pages * size); #endif } @@ -4996,7 +5785,7 @@ static int pmap_growkernel_nopanic(vm_offset_t addr) { vm_paddr_t paddr; - vm_page_t nkpg; + ptpage_t nkpg; pd_entry_t *pde, newpdir; pdp_entry_t *pdpe; vm_offset_t end; @@ -5052,6 +5841,7 @@ pmap_growkernel_nopanic(vm_offset_t addr) kasan_shadow_map(end, addr - end); kmsan_shadow_map(end, addr - end); + PMAP_LOCK(kernel_pmap); while (end < addr) { pdpe = pmap_pdpe(kernel_pmap, end); if ((*pdpe & X86_PG_V) == 0) { @@ -5062,7 +5852,7 @@ pmap_growkernel_nopanic(vm_offset_t addr) rv = KERN_RESOURCE_SHORTAGE; break; } - paddr = VM_PAGE_TO_PHYS(nkpg); + paddr = pmap_ptpage_pa(nkpg); *pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M); continue; /* try again */ @@ -5085,7 +5875,7 @@ pmap_growkernel_nopanic(vm_offset_t addr) break; } - paddr = VM_PAGE_TO_PHYS(nkpg); + paddr = pmap_ptpage_pa(nkpg); newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; pde_store(pde, newpdir); @@ -5095,6 +5885,7 @@ pmap_growkernel_nopanic(vm_offset_t addr) break; } } + PMAP_UNLOCK(kernel_pmap); if (end <= KERNBASE) kernel_vm_end = end; @@ -5198,7 +5989,7 @@ reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; - struct spglist free; + struct ptpglist free; uint64_t inuse; int bit, field, freed; bool start_di, restart; @@ -5299,9 +6090,9 @@ reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_W) != 0) continue; - tpte = pte_load_clear(pte); + tpte = pte_load_clear_datapg(pte); if ((tpte & PG_G) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); @@ -5339,6 +6130,7 @@ reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) PV_STAT(counter_u64_add(pc_chunk_count, -1)); PV_STAT(counter_u64_add(pc_chunk_frees, 1)); /* Entire chunk is free; return it. */ + /* CHUQ this is pv page. let's treat it as a data page for now. */ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m_pc->phys_addr); mtx_lock(&pvc->pvc_lock); @@ -5373,13 +6165,19 @@ reclaim_pv_chunk_domain(pmap_t locked_pmap, struct rwlock **lockp, int domain) pvc->active_reclaims--; mtx_unlock(&pvc->pvc_lock); reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di); +#if 0 && PAGE_SIZE_PT == PAGE_SIZE_PV + /* + * This tries to reuse a pt page as a pv page. + * This only works when those sizes are the same. + */ if (m_pc == NULL && !SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->ref_count = 1; } - vm_page_free_pages_toq(&free, true); +#endif + pmap_ptpage_slist_free(&free, true); return (m_pc); } @@ -5440,6 +6238,7 @@ free_pv_chunk_dequeued(struct pv_chunk *pc) PV_STAT(counter_u64_add(pc_chunk_frees, 1)); counter_u64_add(pv_page_count, -1); /* entire chunk is free, return it */ + /* CHUQ this is a pv page. let's treat it as a data page for now. */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire_noq(m); @@ -5718,11 +6517,12 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, va = trunc_2mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); + m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - /* Instantiate the remaining NPTEPG - 1 pv entries. */ - PV_STAT(counter_u64_add(pv_entry_allocs, NPTEPG - 1)); + /* Instantiate the remaining NDPTEPG - 1 pv entries. */ + PV_STAT(counter_u64_add(pv_entry_allocs, NDPTEPG - 1)); va_last = va + NBPDR - PAGE_SIZE; for (;;) { pc = TAILQ_FIRST(&pmap->pm_pvchunk); @@ -5751,8 +6551,8 @@ pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } - PV_STAT(counter_u64_add(pv_entry_count, NPTEPG - 1)); - PV_STAT(counter_u64_add(pv_entry_spare, -(NPTEPG - 1))); + PV_STAT(counter_u64_add(pv_entry_count, NDPTEPG - 1)); + PV_STAT(counter_u64_add(pv_entry_spare, -(NDPTEPG - 1))); } #if VM_NRESERVLEVEL > 0 @@ -5788,7 +6588,7 @@ pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; - /* Free the remaining NPTEPG - 1 pv entries. */ + /* Free the remaining NDPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; @@ -5872,7 +6672,7 @@ pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; - newpte += PAGE_SIZE; + newpte += PAGE_SIZE_PT; } } @@ -5901,7 +6701,7 @@ pmap_demote_pde_check(pt_entry_t *firstpte __unused, pt_entry_t newpte __unused) pt_entry_t *xpte, *ypte; for (xpte = firstpte; xpte < firstpte + NPTEPG; - xpte++, newpte += PAGE_SIZE) { + xpte++, newpte += PAGE_SIZE_PT) { if ((*xpte & PG_FRAME) != (newpte & PG_FRAME)) { printf("pmap_demote_pde: xpte %zd and newpte map " "different pages: found %#lx, expected %#lx\n", @@ -5924,7 +6724,7 @@ static void pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t oldpde, struct rwlock **lockp) { - struct spglist free; + struct ptpglist free; vm_offset_t sva; SLIST_INIT(&free); @@ -5932,7 +6732,7 @@ pmap_demote_pde_abort(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pmap_remove_pde(pmap, pde, sva, true, &free, lockp); if ((oldpde & pmap_global_bit(pmap)) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx in pmap %p", va, pmap); } @@ -5946,7 +6746,7 @@ pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, static bool pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, - struct rwlock **lockp, vm_page_t mpte) + struct rwlock **lockp, ptpage_t mpte) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; @@ -5983,11 +6783,13 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, } mpte = pmap_remove_pt_page(pmap, va); /* Fill the PTP with PTEs that have PG_A cleared. */ - mpte->valid = 0; + pmap_ptpage_valid_set(mpte, 0); } else if ((mpte = pmap_remove_pt_page(pmap, va)) == NULL) { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping is missing")); +#if 0 +/* CHUQ ??? */ /* * If the page table page is missing and the mapping * is for a kernel address, the mapping must belong to @@ -5999,18 +6801,20 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, KASSERT(!in_kernel || (va >= kva_layout.dmap_low && va < kva_layout.dmap_high), ("pmap_demote_pde: No saved mpte for va %#lx", va)); +#endif - /* - * If the 2MB page mapping belongs to the direct map - * region of the kernel's address space, then the page - * allocation request specifies the highest possible - * priority (VM_ALLOC_INTERRUPT). Otherwise, the - * priority is normal. - */ - mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), - (in_kernel ? VM_ALLOC_INTERRUPT : 0) | - VM_ALLOC_WIRED); + /* + * If the 2MB page mapping belongs to the direct map + * region of the kernel's address space, then the page + * allocation request specifies the highest possible + * priority (VM_ALLOC_INTERRUPT). Otherwise, the + * priority is normal. + */ + mpte = pmap_alloc_pt_page(pmap, pmap_pde_pindex(va), + (in_kernel ? VM_ALLOC_INTERRUPT : 0) | VM_ALLOC_WIRED); +#if 0 +/* CHUQ rebase leftover ??? */ /* * If the allocation of the new page table page fails, * invalidate the 2MB page mapping and return "failure". @@ -6022,11 +6826,15 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, } if (!in_kernel) - mpte->ref_count = NPTEPG; + /* CHUQ needs different constant */ + pmap_ptpage_ref_set(mpte, NPTEPG); +#endif } + if (!in_kernel) + pmap_ptpage_ref_set(mpte, NDPTEPG); } - mptepa = VM_PAGE_TO_PHYS(mpte); - firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); + mptepa = pmap_ptpage_pa(mpte); + firstpte = pmap_ptpage_va(mpte); newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); @@ -6038,7 +6846,7 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * have PG_A set in every PTE, then fill it. The new PTEs will all * have PG_A set, unless this is a wired mapping with PG_A clear. */ - if (!vm_page_all_valid(mpte)) + if (pmap_ptpage_valid_get(mpte) != VM_PAGE_BITS_ALL) pmap_fill_ptp(firstpte, newpte); pmap_demote_pde_check(firstpte, newpte); @@ -6058,7 +6866,7 @@ pmap_demote_pde_mpte(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, * PV entry for the 2MB page mapping that is being demoted. */ if ((oldpde & PG_MANAGED) != 0) - reserve_pv_entries(pmap, NPTEPG - 1, lockp); + reserve_pv_entries(pmap, NDPTEPG - 1, lockp); /* * Demote the mapping. This pmap is locked. The old PDE has @@ -6098,22 +6906,22 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; - vm_page_t mpte; + ptpage_t mpte; KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); mpte = pmap_remove_pt_page(pmap, va); KASSERT(mpte != NULL, ("pmap_remove_kernel_pde: missing pt page")); - mptepa = VM_PAGE_TO_PHYS(mpte); + mptepa = pmap_ptpage_pa(mpte); newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V; /* * If this page table page was unmapped by a promotion, then it * contains valid mappings. Zero it to invalidate those mappings. */ - if (vm_page_any_valid(mpte)) - pagezero((void *)PHYS_TO_DMAP(mptepa)); + if (pmap_ptpage_valid_get(mpte) != 0) + bzero((void *)PHYS_TO_DMAP(mptepa), PAGE_SIZE_PT); /* * Demote the mapping. @@ -6134,12 +6942,13 @@ pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) */ static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde, - struct spglist *free, struct rwlock **lockp) + struct ptpglist *free, struct rwlock **lockp) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; - vm_page_t m, mpte; + vm_page_t m; + ptpage_t mpte; pt_entry_t PG_G, PG_A, PG_M, PG_RW; PG_G = pmap_global_bit(pmap); @@ -6155,7 +6964,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde, pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; if ((oldpde & PG_G) != 0) pmap_invalidate_pde_page(kernel_pmap, sva, oldpde); - pmap_resident_count_adj(pmap, -NBPDR / PAGE_SIZE); + pmap_resident_count_adj(pmap, -NBPDR / PAGE_SIZE_PT); if (oldpde & PG_MANAGED) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME); pvh = pa_to_pvh(oldpde & PG_PS_FRAME); @@ -6176,21 +6985,21 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde, if (pmap != kernel_pmap) { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { - KASSERT(vm_page_any_valid(mpte), + KASSERT(pmap_ptpage_valid_get(mpte) != 0, ("pmap_remove_pde: pte page not promoted")); pmap_pt_page_count_adj(pmap, -1); - KASSERT(mpte->ref_count == NPTEPG, + KASSERT(pmap_ptpage_refs(mpte) == NDPTEPG, ("pmap_remove_pde: pte page ref count error")); - mpte->ref_count = 0; - pmap_add_delayed_free_list(mpte, free, false); + pmap_ptpage_ref_set(mpte, 0); + pmap_add_delayed_free_list(pmap, mpte, free, false); } } else if (demote_kpde) { pmap_remove_kernel_pde(pmap, pdq, sva); } else { - mpte = vm_radix_lookup(&pmap->pm_root, pmap_pde_pindex(sva)); - if (vm_page_any_valid(mpte)) { - mpte->valid = 0; - pmap_zero_page(mpte); + mpte = ptpage_radix_lookup(&pmap->pm_root, pmap_pde_pindex(sva)); + if (pmap_ptpage_valid_get(mpte) != 0) { + pmap_ptpage_valid_set(mpte, 0); + bzero(pmap_ptpage_va(mpte), PAGE_SIZE_PT); } } return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); @@ -6201,7 +7010,7 @@ pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, bool demote_kpde, */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, - pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp) + pd_entry_t ptepde, struct ptpglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t oldpte, PG_A, PG_M, PG_RW; @@ -6212,10 +7021,10 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - oldpte = pte_load_clear(ptq); + oldpte = pte_load_clear_datapg(ptq); if (oldpte & PG_W) - pmap->pm_stats.wired_count -= 1; - pmap_resident_count_adj(pmap, -1); + pmap->pm_stats.wired_count -= PAGE_SIZE_PTES; + pmap_resident_count_adj(pmap, -PAGE_SIZE_PTES); if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) @@ -6240,23 +7049,25 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, - struct spglist *free) + struct ptpglist *free) { struct rwlock *lock; pt_entry_t *pte, PG_V; PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); - if ((*pde & PG_V) == 0) + if ((*pde & PG_V) == 0) { return; + } pte = pmap_pde_to_pte(pde, va); - if ((*pte & PG_V) == 0) + if ((*pte & PG_V) == 0) { return; + } lock = NULL; pmap_remove_pte(pmap, pte, va, *pde, free, &lock); if (lock != NULL) rw_wunlock(lock); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); } /* @@ -6264,7 +7075,7 @@ pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, */ static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, - pd_entry_t *pde, struct spglist *free, struct rwlock **lockp) + pd_entry_t *pde, struct ptpglist *free, struct rwlock **lockp) { pt_entry_t PG_G, *pte; vm_offset_t va; @@ -6274,8 +7085,9 @@ pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, PG_G = pmap_global_bit(pmap); anyvalid = false; va = eva; - for (pte = pmap_pde_to_pte(pde, sva); sva != eva; pte++, - sva += PAGE_SIZE) { + for (pte = pmap_pde_to_pte(pde, sva); sva != eva; + pte += PAGE_SIZE_PTES, sva += PAGE_SIZE) { + if (*pte == 0) { if (va != eva) { pmap_invalidate_range(pmap, va, sva); @@ -6301,14 +7113,14 @@ static void pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) { struct rwlock *lock; - vm_page_t mt; + ptpage_t ptp; vm_offset_t va_next; pml5_entry_t *pml5e; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t PG_G, PG_V; - struct spglist free; + struct ptpglist free; int anyvalid; PG_G = pmap_global_bit(pmap); @@ -6385,9 +7197,9 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) MPASS((*pdpe & (PG_MANAGED | PG_G)) == 0); anyvalid = 1; *pdpe = 0; - pmap_resident_count_adj(pmap, -NBPDP / PAGE_SIZE); - mt = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, sva) & PG_FRAME); - pmap_unwire_ptp(pmap, sva, mt, &free); + pmap_resident_count_adj(pmap, -NBPDP / PAGE_SIZE_PT); + ptp = pmap_pa_to_ptpage(*pmap_pml4e(pmap, sva) & PG_FRAME); + pmap_unwire_ptp(pmap, sva, ptp, &free); continue; } @@ -6411,6 +7223,7 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { + /* * Are we removing the entire large page? If not, * demote the mapping and fall through. @@ -6451,7 +7264,7 @@ pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); pmap_delayed_invl_finish(); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); } /* @@ -6475,6 +7288,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_map_delete(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { + pmap_remove1(pmap, sva, eva, true); } @@ -6501,7 +7315,7 @@ pmap_remove_all(vm_page_t m) pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW; pd_entry_t *pde; vm_offset_t va; - struct spglist free; + struct ptpglist free; int pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -6545,12 +7359,12 @@ pmap_remove_all(vm_page_t m) PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); - pmap_resident_count_adj(pmap, -1); + pmap_resident_count_adj(pmap, -PAGE_SIZE_PTES); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - tpte = pte_load_clear(pte); + tpte = pte_load_clear_datapg(pte); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) @@ -6562,7 +7376,7 @@ pmap_remove_all(vm_page_t m) if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_datapg(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); @@ -6571,7 +7385,7 @@ pmap_remove_all(vm_page_t m) vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(lock); pmap_delayed_invl_wait(m); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); } /* @@ -6638,6 +7452,9 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) pt_entry_t obits, pbits; bool anychanged; + KASSERT((sva & PAGE_MASK) == 0, ("sva not page aligned 0x%lx", sva)); + KASSERT((eva & PAGE_MASK) == 0, ("eva not page aligned 0x%lx", eva)); + KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); @@ -6766,8 +7583,14 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if (va_next > eva) va_next = eva; + /* + * Clear RW and M bits and set NX bits as necessary. + * This loop is processing data pages, but we handle one PTE + * at a time since the cmpset logic doesn't really lend itself + * to being separated out into a foo_datapg() function. + */ for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, - sva += PAGE_SIZE) { + sva += PAGE_SIZE_PT) { retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) @@ -6776,7 +7599,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { - m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); + m = PHYS_TO_VM_PAGE(pbits & PG_FRAME & ~PAGE_MASK); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); @@ -6817,7 +7640,7 @@ pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) * identical characteristics. */ static bool -pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, +pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, ptpage_t mpte, struct rwlock **lockp) { pd_entry_t newpde; @@ -6888,7 +7711,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, * characteristics to the first PTE. */ allpte_PG_A = newpde & PG_A; - pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE; + pa = (newpde & (PG_PS_FRAME | PG_V)) + NBPDR - PAGE_SIZE_PT; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { oldpte = *pte; if ((oldpte & (PG_FRAME | PG_V)) != pa) { @@ -6917,7 +7740,7 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, return (false); } allpte_PG_A &= oldpte; - pa -= PAGE_SIZE; + pa -= PAGE_SIZE_PT; } /* @@ -6942,14 +7765,14 @@ pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, vm_page_t mpte, * that the PTP be refilled on demotion. */ if (mpte == NULL) - mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - KASSERT(mpte >= vm_page_array && - mpte < &vm_page_array[vm_page_array_size], + mpte = pmap_pa_to_ptpage(*pde & PG_FRAME); + KASSERT(mpte >= pmap_pt_page_array && + mpte < &pmap_pt_page_array[pmap_pt_page_array_size], ("pmap_promote_pde: page table page is out of range")); - KASSERT(mpte->pindex == pmap_pde_pindex(va), + KASSERT(pmap_ptpage_pindex(mpte) == pmap_pde_pindex(va), ("pmap_promote_pde: page table page's pindex is wrong " "mpte %p pidx %#lx va %#lx va pde pidx %#lx", - mpte, mpte->pindex, va, pmap_pde_pindex(va))); + mpte, pmap_ptpage_pindex(mpte), va, pmap_pde_pindex(va))); if (pmap_insert_pt_page(pmap, mpte, true, allpte_PG_A != 0)) { counter_u64_add(pmap_pde_p_failures, 1); CTR2(KTR_PMAP, @@ -6988,7 +7811,7 @@ static int pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, int psind) { - vm_page_t mp; + ptpage_t mp; pt_entry_t origpte, *pml4e, *pdpe, *pde, pten, PG_V; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -7018,7 +7841,7 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, NULL, va); if (mp == NULL) goto allocf; - pdpe = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp)); + pdpe = pmap_ptpage_va(mp); pdpe = &pdpe[pmap_pdpe_index(va)]; origpte = *pdpe; MPASS(origpte == 0); @@ -7027,8 +7850,8 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, KASSERT(pdpe != NULL, ("va %#lx lost pdpe", va)); origpte = *pdpe; if ((origpte & PG_V) == 0) { - mp = PHYS_TO_VM_PAGE(*pml4e & PG_FRAME); - mp->ref_count++; + mp = pmap_pa_to_ptpage(*pml4e & PG_FRAME); + pmap_ptpage_ref_add(mp, 1); } } *pdpe = pten; @@ -7039,7 +7862,7 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, NULL, va); if (mp == NULL) goto allocf; - pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mp)); + pde = pmap_ptpage_va(mp); pde = &pde[pmap_pde_index(va)]; origpte = *pde; MPASS(origpte == 0); @@ -7048,8 +7871,8 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, if ((origpte & PG_V) == 0) { pdpe = pmap_pdpe(pmap, va); MPASS(pdpe != NULL && (*pdpe & PG_V) != 0); - mp = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); - mp->ref_count++; + mp = pmap_pa_to_ptpage(*pdpe & PG_FRAME); + pmap_ptpage_ref_add(mp, 1); } } *pde = pten; @@ -7059,11 +7882,11 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags, ("va %#lx changing %s phys page origpte %#lx pten %#lx", va, psind == 2 ? "1G" : "2M", origpte, pten)); if ((pten & PG_W) != 0 && (origpte & PG_W) == 0) - pmap->pm_stats.wired_count += pagesizes[psind] / PAGE_SIZE; + pmap->pm_stats.wired_count += pagesizes[psind] / PAGE_SIZE_PT; else if ((pten & PG_W) == 0 && (origpte & PG_W) != 0) - pmap->pm_stats.wired_count -= pagesizes[psind] / PAGE_SIZE; + pmap->pm_stats.wired_count -= pagesizes[psind] / PAGE_SIZE_PT; if ((origpte & PG_V) == 0) - pmap_resident_count_adj(pmap, pagesizes[psind] / PAGE_SIZE); + pmap_resident_count_adj(pmap, pagesizes[psind] / PAGE_SIZE_PT); return (KERN_SUCCESS); @@ -7102,7 +7925,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; - vm_page_t mpte, om; + vm_page_t om; + ptpage_t mpte; int rv; bool nosleep; @@ -7180,8 +8004,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pmap_demote_pde_locked(pmap, pde, va, &lock))) { pte = pmap_pde_to_pte(pde, va); if (va < VM_MAXUSER_ADDRESS && mpte == NULL) { - mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - mpte->ref_count++; + mpte = pmap_pa_to_ptpage(*pde & PG_FRAME); + pmap_ptpage_ref_add(mpte, 1); } } else if (va < VM_MAXUSER_ADDRESS) { /* @@ -7223,8 +8047,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * Remove the extra PT page reference. */ if (mpte != NULL) { - mpte->ref_count--; - KASSERT(mpte->ref_count > 0, + pmap_ptpage_ref_add(mpte, -1); + KASSERT(pmap_ptpage_refs(mpte) > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } @@ -7255,7 +8079,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * * For consistency, handle unmanaged mappings the same way. */ - origpte = pte_load_clear(pte); + origpte = pte_load_clear_datapg(pte); KASSERT((origpte & PG_FRAME) == opa, ("pmap_enter: unexpected pa update for %#lx", va)); if ((origpte & PG_MANAGED) != 0) { @@ -7269,7 +8093,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(om); if ((origpte & PG_A) != 0) { - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); vm_page_aflag_set(om, PGA_REFERENCED); } CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); @@ -7288,7 +8112,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * Since this mapping is unmanaged, assume that PG_A * is set. */ - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); } origpte = 0; } else { @@ -7297,7 +8121,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, */ if ((newpte & PG_W) != 0) pmap->pm_stats.wired_count++; - pmap_resident_count_adj(pmap, 1); + pmap_resident_count_adj(pmap, PAGE_SIZE_PTES); } /* @@ -7309,6 +8133,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, pv->pv_va = va; } CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((newpte & PG_RW) != 0) @@ -7320,7 +8145,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, */ if ((origpte & PG_V) != 0) { validate: - origpte = pte_load_store(pte, newpte); + origpte = pte_load_store_datapg(pte, newpte); KASSERT((origpte & PG_FRAME) == pa, ("pmap_enter: unexpected pa update for %#lx", va)); if ((newpte & PG_M) == 0 && (origpte & (PG_M | PG_RW)) == @@ -7340,9 +8165,9 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, goto unchanged; } if ((origpte & PG_A) != 0) - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); } else - pte_store(pte, newpte); + pte_store_datapg(pte, newpte); unchanged: @@ -7351,7 +8176,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * If both the page table page and the reservation are fully * populated, then attempt promotion. */ - if ((mpte == NULL || mpte->ref_count == NPTEPG) && + if ((mpte == NULL || pmap_ptpage_refs(mpte) == NDPTEPG) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) (void)pmap_promote_pde(pmap, pde, va, mpte, &lock); @@ -7401,7 +8226,7 @@ pmap_every_pte_zero(vm_paddr_t pa) { pt_entry_t *pt_end, *pte; - KASSERT((pa & PAGE_MASK) == 0, ("pa is misaligned")); + KASSERT((pa & PAGE_MASK_PT) == 0, ("pa is misaligned")); pte = (pt_entry_t *)PHYS_TO_DMAP(pa); for (pt_end = pte + NPTEPG; pte < pt_end; pte++) { if (*pte != 0) @@ -7432,11 +8257,11 @@ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m, struct rwlock **lockp) { - struct spglist free; + struct ptpglist free; pd_entry_t oldpde, *pde; pt_entry_t PG_G, PG_RW, PG_V; - vm_page_t mt, pdpg; - vm_page_t uwptpg; + vm_page_t mt; + ptpage_t pdpg, uwptpg, tptp; PG_G = pmap_global_bit(pmap); PG_RW = pmap_rw_bit(pmap); @@ -7476,12 +8301,12 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, */ oldpde = *pde; if ((oldpde & PG_V) != 0) { - KASSERT(pdpg == NULL || pdpg->ref_count > 1, + KASSERT(pdpg == NULL || pmap_ptpage_refs(pdpg) > 1, ("pmap_enter_pde: pdpg's reference count is too low")); if ((flags & PMAP_ENTER_NOREPLACE) != 0) { if ((oldpde & PG_PS) != 0) { if (pdpg != NULL) - pdpg->ref_count--; + pmap_ptpage_ref_add(pdpg, -1); CTR2(KTR_PMAP, "pmap_enter_pde: no space for va %#lx" " in pmap %p", va, pmap); @@ -7489,7 +8314,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, } else if (va < VM_MAXUSER_ADDRESS || !pmap_every_pte_zero(oldpde & PG_FRAME)) { if (pdpg != NULL) - pdpg->ref_count--; + pmap_ptpage_ref_add(pdpg, -1); CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -7517,8 +8342,8 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * before any changes to mappings are * made. Abort on failure. */ - mt = PHYS_TO_VM_PAGE(oldpde & PG_FRAME); - if (pmap_insert_pt_page(pmap, mt, false, + tptp = pmap_pa_to_ptpage(oldpde & PG_FRAME); + if (pmap_insert_pt_page(pmap, tptp, false, false)) { CTR1(KTR_PMAP, "pmap_enter_pde: cannot ins kern ptp va %#lx", @@ -7538,12 +8363,23 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, pmap_delayed_invl_finish(); } if (va < VM_MAXUSER_ADDRESS) { - vm_page_free_pages_toq(&free, true); - KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", - pde)); + pmap_ptpage_slist_free(&free, true); + KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p *pde 0x%lx", + pde, *pde)); } else { KASSERT(SLIST_EMPTY(&free), ("pmap_enter_pde: freed kernel page table page")); + +#if 0 +/* CHUQ leftover from rebase? */ + /* + * Both pmap_remove_pde() and pmap_remove_ptes() will + * leave the kernel page table page zero filled. + */ + tptp = pmap_pa_to_ptpage(*pde & PG_FRAME); + if (pmap_insert_pt_page(pmap, tptp, false, false)) + panic("pmap_enter_pde: trie insert failed"); +#endif } } @@ -7564,7 +8400,7 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, return (KERN_RESOURCE_SHORTAGE); } - uwptpg->ref_count = NPTEPG; + pmap_ptpage_ref_set(uwptpg, NDPTEPG); } if ((newpde & PG_MANAGED) != 0) { /* @@ -7577,16 +8413,16 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, KASSERT(va >= VM_MAXUSER_ADDRESS && (*pde & (PG_PS | PG_V)) == PG_V, ("pmap_enter_pde: invalid kernel PDE")); - mt = pmap_remove_pt_page(pmap, va); - KASSERT(mt != NULL, + tptp = pmap_remove_pt_page(pmap, va); + KASSERT(tptp != NULL, ("pmap_enter_pde: missing kernel PTP")); } if (uwptpg != NULL) { - mt = pmap_remove_pt_page(pmap, va); - KASSERT(mt == uwptpg, - ("removed pt page %p, expected %p", mt, + tptp = pmap_remove_pt_page(pmap, va); + KASSERT(tptp == uwptpg, + ("removed pt page %p, expected %p", tptp, uwptpg)); - uwptpg->ref_count = 1; + pmap_ptpage_ref_set(uwptpg, 1); pmap_free_pt_page(pmap, uwptpg, false); } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" @@ -7603,8 +8439,8 @@ pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, * Increment counters. */ if ((newpde & PG_W) != 0) - pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; - pmap_resident_count_adj(pmap, NBPDR / PAGE_SIZE); + pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE_PT; + pmap_resident_count_adj(pmap, NBPDR / PAGE_SIZE_PT); /* * Map the superpage. (This is not a promoted mapping; there will not @@ -7637,7 +8473,8 @@ pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, struct pctrie_iter pages; struct rwlock *lock; vm_offset_t va; - vm_page_t m, mpte; + vm_page_t m; + ptpage_t mpte; int rv; VM_OBJECT_ASSERT_LOCKED(m_start->object); @@ -7688,9 +8525,9 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) PMAP_UNLOCK(pmap); } -static vm_page_t +static ptpage_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, - vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) + vm_prot_t prot, ptpage_t mpte, struct rwlock **lockp) { pd_entry_t *pde; pt_entry_t newpte, *pte, PG_V; @@ -7714,8 +8551,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); - if (mpte && (mpte->pindex == ptepindex)) { - mpte->ref_count++; + if (mpte && (pmap_ptpage_pindex(mpte) == ptepindex)) { + pmap_ptpage_ref_add(mpte, 1); } else { /* * If the page table page is mapped, we just increment @@ -7733,8 +8570,8 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, if ((*pde & PG_V) != 0) { if ((*pde & PG_PS) != 0) return (NULL); - mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); - mpte->ref_count++; + mpte = pmap_pa_to_ptpage(*pde & PG_FRAME); + pmap_ptpage_ref_add(mpte, 1); } else { mpte = pmap_allocpte_alloc(pmap, ptepindex, NULL, va); @@ -7748,7 +8585,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, return (NULL); } } - pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); + pte = pmap_ptpage_va(mpte); pte = &pte[pmap_pte_index(va)]; } else { mpte = NULL; @@ -7756,7 +8593,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, } if (*pte) { if (mpte != NULL) - mpte->ref_count--; + pmap_ptpage_ref_add(mpte, -1); return (NULL); } @@ -7773,7 +8610,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, /* * Increment counters */ - pmap_resident_count_adj(pmap, 1); + pmap_resident_count_adj(pmap, PAGE_SIZE_PTES); newpte = VM_PAGE_TO_PHYS(m) | PG_V | pmap_cache_bits(pmap, m->md.pat_mode, false); @@ -7783,7 +8620,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, newpte |= pg_nx; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U | pmap_pkru_get(pmap, va); - pte_store(pte, newpte); + pte_store_datapg(pte, newpte); #if VM_NRESERVLEVEL > 0 /* @@ -7791,7 +8628,7 @@ pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, * attempt promotion. */ if ((prot & VM_PROT_NO_PROMOTE) == 0 && - (mpte == NULL || mpte->ref_count == NPTEPG) && + (mpte == NULL || pmap_ptpage_refs(mpte) == NDPTEPG) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { if (pde == NULL) @@ -7820,7 +8657,7 @@ pmap_kenter_temporary(vm_paddr_t pa, int i) va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); - pmap_invlpg(kernel_pmap, va); + pmap_invlpg_datapg(kernel_pmap, va); return ((void *)crashdumpmap); } @@ -7837,7 +8674,8 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, pd_entry_t *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa, ptepa; - vm_page_t p, pdpg; + vm_page_t p; + ptpage_t pdpg; int pat_mode; PG_A = pmap_accessed_bit(pmap); @@ -7905,12 +8743,12 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); - pmap_resident_count_adj(pmap, NBPDR / PAGE_SIZE); + pmap_resident_count_adj(pmap, NBPDR / PAGE_SIZE_PT); counter_u64_add(pmap_pde_mappings, 1); } else { /* Continue on if the PDE is already valid. */ - pdpg->ref_count--; - KASSERT(pdpg->ref_count > 0, + pmap_ptpage_ref_add(pdpg, -1); + KASSERT(pmap_ptpage_refs(pdpg) > 0, ("pmap_object_init_pt: missing reference " "to page directory page, va: 0x%lx", addr)); } @@ -7997,7 +8835,7 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, - sva += PAGE_SIZE) { + sva += PAGE_SIZE_PT) { if ((*pte & PG_V) == 0) continue; if ((*pte & PG_W) == 0) @@ -8033,7 +8871,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, pd_entry_t *pde, srcptepaddr; pt_entry_t *dst_pte, PG_A, PG_M, PG_V, ptetemp, *src_pte; vm_offset_t addr, end_addr, va_next; - vm_page_t dst_pdpg, dstmpte, srcmpte; + ptpage_t dst_pdpg, dstmpte, srcmpte; if (dst_addr != src_addr) return; @@ -8101,15 +8939,15 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, pdpe = pmap_pdpe(dst_pmap, addr); } else { pml4e = pmap_pml4e(dst_pmap, addr); - dst_pdpg = PHYS_TO_VM_PAGE(*pml4e & PG_FRAME); - dst_pdpg->ref_count++; + dst_pdpg = pmap_pa_to_ptpage(*pml4e & PG_FRAME); + pmap_ptpage_ref_add(dst_pdpg, 1); } KASSERT(*pdpe == 0, ("1G mapping present in dst pmap " "pdpe %#lx sva %#lx eva %#lx va_next %#lx", *pdpe, addr, end_addr, va_next)); *pdpe = srcptepaddr & ~PG_W; - pmap_resident_count_adj(dst_pmap, NBPDP / PAGE_SIZE); + pmap_resident_count_adj(dst_pmap, NBPDP / PAGE_SIZE_PT); continue; } @@ -8148,7 +8986,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, srcptepaddr &= ~PG_A; *pde = srcptepaddr; pmap_resident_count_adj(dst_pmap, NBPDR / - PAGE_SIZE); + PAGE_SIZE_PT); counter_u64_add(pmap_pde_mappings, 1); } else pmap_abort_ptp(dst_pmap, addr, dst_pdpg); @@ -8156,17 +8994,17 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, } srcptepaddr &= PG_FRAME; - srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); - KASSERT(srcmpte->ref_count > 0, + srcmpte = pmap_pa_to_ptpage(srcptepaddr); + KASSERT(pmap_ptpage_refs(srcmpte) > 0, ("pmap_copy: source page table page is unused")); if (va_next > end_addr) va_next = end_addr; - src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr); + src_pte = pmap_ptpage_va(srcmpte); src_pte = &src_pte[pmap_pte_index(addr)]; dstmpte = NULL; - for (; addr < va_next; addr += PAGE_SIZE, src_pte++) { + for (; addr < va_next; addr += PAGE_SIZE, src_pte += PAGE_SIZE_PTES) { ptetemp = *src_pte; /* @@ -8176,15 +9014,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, continue; if (dstmpte != NULL) { - KASSERT(dstmpte->pindex == + KASSERT(pmap_ptpage_pindex(dstmpte) == pmap_pde_pindex(addr), ("dstmpte pindex/addr mismatch")); - dstmpte->ref_count++; + pmap_ptpage_ref_add(dstmpte, 1); } else if ((dstmpte = pmap_allocpte(dst_pmap, addr, NULL)) == NULL) goto out; - dst_pte = (pt_entry_t *) - PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); + dst_pte = pmap_ptpage_va(dstmpte); dst_pte = &dst_pte[pmap_pte_index(addr)]; if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, @@ -8193,14 +9030,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, * Clear the wired, modified, and accessed * (referenced) bits during the copy. */ - *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); - pmap_resident_count_adj(dst_pmap, 1); + pte_store_datapg(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A)); + pmap_resident_count_adj(dst_pmap, PAGE_SIZE_PTES); } else { pmap_abort_ptp(dst_pmap, addr, dstmpte); goto out; } /* Have we copied all of the valid mappings? */ - if (dstmpte->ref_count >= srcmpte->ref_count) + if (pmap_ptpage_refs(dstmpte) >= pmap_ptpage_refs(srcmpte)) break; } } @@ -8398,7 +9235,7 @@ pmap_page_wired_mappings(vm_page_t m) } } pte = pmap_pte(pmap, pv->pv_va); - if ((*pte & PG_W) != 0) + if ((pte_load_datapg(pte) & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } @@ -8480,9 +9317,10 @@ pmap_remove_pages(pmap_t pmap) pd_entry_t ptepde; pt_entry_t *pte, tpte; pt_entry_t PG_M, PG_RW, PG_V; - struct spglist free; + struct ptpglist free; struct pv_chunklist free_chunks[PMAP_MEMDOM]; - vm_page_t m, mpte, mt; + vm_page_t m, mt; + ptpage_t mpte; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; @@ -8584,12 +9422,13 @@ pmap_remove_pages(pmap_t pmap) * processors, the dirty bit cannot have * changed state since we last loaded pte. */ - pte_clear(pte); - - if (superpage) + if (superpage) { + pte_clear(pte); pa = tpte & PG_PS_FRAME; - else + } else { + pte_clear_datapg(pte); pa = tpte & PG_FRAME; + } m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, @@ -8616,7 +9455,7 @@ pmap_remove_pages(pmap_t pmap) CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); if (superpage) { - pmap_resident_count_adj(pmap, -NBPDR / PAGE_SIZE); + pmap_resident_count_adj(pmap, -NBPDR / PAGE_SIZE_PT); pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; @@ -8628,16 +9467,16 @@ pmap_remove_pages(pmap_t pmap) } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { - KASSERT(vm_page_any_valid(mpte), + KASSERT(pmap_ptpage_valid_get(mpte) != 0, ("pmap_remove_pages: pte page not promoted")); pmap_pt_page_count_adj(pmap, -1); - KASSERT(mpte->ref_count == NPTEPG, + KASSERT(pmap_ptpage_refs(mpte) == NDPTEPG, ("pmap_remove_pages: pte page reference count error")); - mpte->ref_count = 0; - pmap_add_delayed_free_list(mpte, &free, false); + pmap_ptpage_ref_set(mpte, 0); + pmap_add_delayed_free_list(pmap, mpte, &free, false); } } else { - pmap_resident_count_adj(pmap, -1); + pmap_resident_count_adj(pmap, -PAGE_SIZE_PTES); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((m->a.flags & PGA_WRITEABLE) != 0 && @@ -8668,7 +9507,7 @@ pmap_remove_pages(pmap_t pmap) pmap_pkru_deassign_all(pmap); free_pv_chunk_batch((struct pv_chunklist *)&free_chunks); PMAP_UNLOCK(pmap); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); } static bool @@ -8711,7 +9550,7 @@ pmap_page_test_mappings(vm_page_t m, bool accessed, bool modified) PG_V = pmap_valid_bit(pmap); mask |= PG_V | PG_A; } - rv = (*pte & mask) == mask; + rv = (pte_load_datapg(pte) & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; @@ -8893,12 +9732,16 @@ pmap_remove_write(vm_page_t m) pte = pmap_pde_to_pte(pde, pv->pv_va); oldpte = *pte; if (oldpte & PG_RW) { +#if 0 while (!atomic_fcmpset_long(pte, &oldpte, oldpte & ~(PG_RW | PG_M))) cpu_spinwait(); +#else + oldpte = pte_load_store_datapg(pte, oldpte & ~(PG_RW | PG_M)); +#endif if ((oldpte & PG_M) != 0) vm_page_dirty(m); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_datapg(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } @@ -8935,11 +9778,11 @@ pmap_ts_referenced(vm_page_t m) pmap_t pmap; struct rwlock *lock; pd_entry_t oldpde, *pde; - pt_entry_t *pte, PG_A, PG_M, PG_RW; + pt_entry_t oldpte, *pte, PG_A, PG_M, PG_RW; vm_offset_t va; vm_paddr_t pa; int cleared, md_gen, not_cleared, pvh_gen; - struct spglist free; + struct ptpglist free; bool demoted; KASSERT((m->oflags & VPO_UNMANAGED) == 0, @@ -9003,11 +9846,11 @@ pmap_ts_referenced(vm_page_t m) * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ - (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && + (uintptr_t)pmap) & (NDPTEPG - 1)) == 0 && (oldpde & PG_W) == 0) { if (safe_to_clear_referenced(pmap, oldpde)) { - atomic_clear_long(pde, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + atomic_clear_long_datapg(pde, PG_A); + pmap_invalidate_page_datapg(pmap, pv->pv_va); demoted = false; } else if (pmap_demote_pde_locked(pmap, pde, pv->pv_va, &lock)) { @@ -9025,7 +9868,7 @@ pmap_ts_referenced(vm_page_t m) pte = pmap_pde_to_pte(pde, va); pmap_remove_pte(pmap, pte, va, *pde, NULL, &lock); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); } else demoted = true; @@ -9083,14 +9926,15 @@ pmap_ts_referenced(vm_page_t m) ("pmap_ts_referenced: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + oldpte = pte_load_datapg(pte); + if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); - if ((*pte & PG_A) != 0) { - if (safe_to_clear_referenced(pmap, *pte)) { - atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); + if ((oldpte & PG_A) != 0) { + if (safe_to_clear_referenced(pmap, oldpte)) { + atomic_clear_long_datapg(pte, PG_A); + pmap_invalidate_page_datapg(pmap, pv->pv_va); cleared++; - } else if ((*pte & PG_W) == 0) { + } else if ((oldpte & PG_W) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for @@ -9099,7 +9943,7 @@ pmap_ts_referenced(vm_page_t m) */ pmap_remove_pte(pmap, pte, pv->pv_va, *pde, &free, &lock); - pmap_invalidate_page(pmap, pv->pv_va); + pmap_invalidate_page_datapg(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; @@ -9121,7 +9965,7 @@ pmap_ts_referenced(vm_page_t m) not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); - vm_page_free_pages_toq(&free, true); + pmap_ptpage_slist_free(&free, true); return (cleared + not_cleared); } @@ -9137,7 +9981,7 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t oldpde, *pde; - pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V; + pt_entry_t *pte, oldpte, PG_A, PG_G, PG_M, PG_RW, PG_V; vm_offset_t va, va_next; vm_page_t m; bool anychanged; @@ -9230,27 +10074,28 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) if (va_next > eva) va_next = eva; va = va_next; - for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, - sva += PAGE_SIZE) { - if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) + for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; + pte += PAGE_SIZE_PTES, sva += PAGE_SIZE) { + oldpte = pte_load_datapg(pte); + if ((oldpte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) goto maybe_invlrng; - else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + else if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ - m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); + m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); vm_page_dirty(m); } - atomic_clear_long(pte, PG_M | PG_A); - } else if ((*pte & PG_A) != 0) - atomic_clear_long(pte, PG_A); + atomic_clear_long_datapg(pte, PG_M | PG_A); + } else if ((oldpte & PG_A) != 0) + atomic_clear_long_datapg(pte, PG_A); else goto maybe_invlrng; - if ((*pte & PG_G) != 0) { + if ((oldpte & PG_G) != 0) { if (va == va_next) va = sva; } else @@ -9324,9 +10169,9 @@ pmap_clear_modify(vm_page_t m) */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pde_to_pte(pde, va); - atomic_clear_long(pte, PG_M | PG_RW); + atomic_clear_long_datapg(pte, PG_M | PG_RW); vm_page_dirty(m); - pmap_invalidate_page(pmap, va); + pmap_invalidate_page_datapg(pmap, va); } PMAP_UNLOCK(pmap); } @@ -9349,9 +10194,9 @@ pmap_clear_modify(vm_page_t m) KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); - if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { - atomic_clear_long(pte, PG_M); - pmap_invalidate_page(pmap, pv->pv_va); + if ((pte_load_datapg(pte) & (PG_M | PG_RW)) == (PG_M | PG_RW)) { + atomic_clear_long_datapg(pte, PG_M); + pmap_invalidate_page_datapg(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } @@ -9519,13 +10364,13 @@ pmap_unmapdev(void *p, vm_size_t size) * Tries to demote a 1GB page mapping. */ static bool -pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) +pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, ptpage_t m) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pdpgpa; - vm_page_t pdpg; + ptpage_t pdpg; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); @@ -9547,11 +10392,11 @@ pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va, vm_page_t m) } } else { pdpg = m; - pdpg->pindex = va >> PDPSHIFT; + pmap_ptpage_pindex_set(pdpg, va >> PDPSHIFT); pmap_pt_page_count_adj(pmap, 1); } - pdpgpa = VM_PAGE_TO_PHYS(pdpg); - firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); + pdpgpa = pmap_ptpage_pa(pdpg); + firstpde = pmap_ptpage_va(pdpg); newpdpe = pdpgpa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; KASSERT((oldpdpe & PG_A) != 0, ("pmap_demote_pdpe: oldpdpe is missing PG_A")); @@ -9802,7 +10647,7 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, ("%s: addr %#lx is not mapped", __func__, tmpva)); return (EINVAL); } - tmpva += PAGE_SIZE; + tmpva += PAGE_SIZE_PT; } error = 0; @@ -9882,9 +10727,9 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pte & PG_FRAME; - pa_end = pa_start + PAGE_SIZE; + pa_end = pa_start + PAGE_SIZE_PT; } else if (pa_end == (*pte & PG_FRAME)) - pa_end += PAGE_SIZE; + pa_end += PAGE_SIZE_PT; else { /* Run ended, update direct map. */ error = pmap_change_props_locked( @@ -9895,10 +10740,10 @@ pmap_change_props_locked(vm_offset_t va, vm_size_t size, vm_prot_t prot, break; /* Start physical address run. */ pa_start = *pte & PG_FRAME; - pa_end = pa_start + PAGE_SIZE; + pa_end = pa_start + PAGE_SIZE_PT; } } - tmpva += PAGE_SIZE; + tmpva += PAGE_SIZE_PT; } } if (error == 0 && pa_start != pa_end && pa_start < dmaplimit) { @@ -9935,7 +10780,7 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) pdp_entry_t *pdpe; pd_entry_t *pde; vm_offset_t va; - vm_page_t m, mpte; + ptpage_t m, mpte; bool changed, rv __diagused; if (len == 0) @@ -9954,10 +10799,18 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) * The only existing caller of pmap_demote_DMAP() is the * x86_mr_split_dmap() function. */ +#if 1 + m = pmap_alloc_pt_page(kernel_pmap, 0, VM_ALLOC_WIRED | VM_ALLOC_WAITOK); +#else m = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK); +#endif if (len < NBPDR) { +#if 1 + mpte = pmap_alloc_pt_page(kernel_pmap, pmap_pde_pindex(va), VM_ALLOC_WIRED | VM_ALLOC_WAITOK); +#else mpte = vm_page_alloc_noobj(VM_ALLOC_WIRED | VM_ALLOC_WAITOK); +#endif } else mpte = NULL; @@ -9976,8 +10829,10 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) if ((*pde & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDE"); if ((*pde & PG_PS) != 0) { +#if 0 mpte->pindex = pmap_pde_pindex(va); pmap_pt_page_count_adj(kernel_pmap, 1); +#endif rv = pmap_demote_pde_mpte(kernel_pmap, pde, va, NULL, mpte); KASSERT(rv, ("pmap_demote_DMAP: PDE failed")); @@ -9989,12 +10844,20 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, bool invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); if (m != NULL) { +#if 1 + pmap_free_pt_page(kernel_pmap, m, 0); +#else vm_page_unwire_noq(m); vm_page_free(m); +#endif } if (mpte != NULL) { +#if 1 + pmap_free_pt_page(kernel_pmap, mpte, 0); +#else vm_page_unwire_noq(mpte); vm_page_free(mpte); +#endif } } } @@ -10041,7 +10904,13 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) PDRMASK)) & PG_FRAME; val = MINCORE_PSIND(1); } else { + /* XXX CHUQ need to load M/A/RW bits from all ptes */ +#if 0 + /* CHUQ this should be right but trips over a different XXX */ + pte = pte_load_datapg(pmap_pde_to_pte(pdep, addr)); +#else pte = *pmap_pde_to_pte(pdep, addr); +#endif pa = pte & PG_FRAME; val = 0; } @@ -10340,7 +11209,8 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype) int rv; struct rwlock *lock; #if VM_NRESERVLEVEL > 0 - vm_page_t m, mpte; + vm_page_t m; + ptpage_t mpte; #endif pd_entry_t *pde; pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V; @@ -10399,13 +11269,13 @@ pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype) #if VM_NRESERVLEVEL > 0 /* try to promote the mapping */ if (va < VM_MAXUSER_ADDRESS) - mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); + mpte = pmap_pa_to_ptpage(*pde & PG_FRAME); else mpte = NULL; m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); - if ((mpte == NULL || mpte->ref_count == NPTEPG) && + if ((mpte == NULL || pmap_ptpage_refs(mpte) == NDPTEPG) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0 && pmap_promote_pde(pmap, pde, va, mpte, &lock)) { @@ -10537,7 +11407,7 @@ pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, } else { pmap_kenter_attr(vaddr[i], paddr, page[i]->md.pat_mode); - pmap_invlpg(kernel_pmap, vaddr[i]); + pmap_invlpg_datapg(kernel_pmap, vaddr[i]); } } } @@ -10579,9 +11449,9 @@ pmap_quick_enter_page(vm_page_t m) * Since qframe is exclusively mapped by us, and we do not set * PG_G, we can use INVLPG here. */ - invlpg(qframe); + invlpg_datapg(qframe); - pte_store(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A | + pte_store_datapg(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A | X86_PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, false)); return (qframe); } @@ -10592,7 +11462,7 @@ pmap_quick_remove_page(vm_offset_t addr) if (addr != qframe) return; - pte_store(vtopte(qframe), 0); + pte_clear_datapg(vtopte(qframe)); mtx_unlock_spin(&qframe_mtx); } @@ -10607,16 +11477,16 @@ pmap_quick_remove_page(vm_offset_t addr) * pages. They are dynamically allocated, and their reference count * represents the number of valid entries within the page. */ -static vm_page_t +static ptpage_t pmap_large_map_getptp_unlocked(void) { return (pmap_alloc_pt_page(kernel_pmap, 0, VM_ALLOC_ZERO)); } -static vm_page_t +static ptpage_t pmap_large_map_getptp(void) { - vm_page_t m; + ptpage_t m; PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); m = pmap_large_map_getptp_unlocked(); @@ -10661,7 +11531,7 @@ static pd_entry_t * pmap_large_map_pde(vm_offset_t va) { pdp_entry_t *pdpe; - vm_page_t m; + ptpage_t m; vm_paddr_t mphys; retry: @@ -10670,7 +11540,7 @@ pmap_large_map_pde(vm_offset_t va) m = pmap_large_map_getptp(); if (m == NULL) goto retry; - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pdpe = mphys | X86_PG_A | X86_PG_RW | X86_PG_V | pg_nx; } else { MPASS((*pdpe & X86_PG_PS) == 0); @@ -10683,7 +11553,7 @@ static pt_entry_t * pmap_large_map_pte(vm_offset_t va) { pd_entry_t *pde; - vm_page_t m; + ptpage_t m; vm_paddr_t mphys; retry: @@ -10692,9 +11562,9 @@ pmap_large_map_pte(vm_offset_t va) m = pmap_large_map_getptp(); if (m == NULL) goto retry; - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pde = mphys | X86_PG_A | X86_PG_RW | X86_PG_V | pg_nx; - PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde))->ref_count++; + pmap_ptpage_ref_add(pmap_va_to_ptpage(pde), 1); } else { MPASS((*pde & X86_PG_PS) == 0); mphys = *pde & PG_FRAME; @@ -10786,7 +11656,7 @@ pmap_large_map(vm_paddr_t spa, vm_size_t len, void **addr, error = pmap_large_map_getva(len, NBPDR, spa & PDRMASK, &vmem_res); if (error != 0) - error = pmap_large_map_getva(len, PAGE_SIZE, 0, &vmem_res); + error = pmap_large_map_getva(len, PAGE_SIZE_PT, 0, &vmem_res); if (error != 0) return (error); @@ -10813,8 +11683,7 @@ pmap_large_map(vm_paddr_t spa, vm_size_t len, void **addr, *pde = pa | pg_g | X86_PG_PS | X86_PG_RW | X86_PG_V | X86_PG_A | pg_nx | pmap_cache_bits(kernel_pmap, mattr, true); - PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde))-> - ref_count++; + pmap_ptpage_ref_add(pmap_va_to_ptpage(pde), 1); inc = NBPDR; } else { pte = pmap_large_map_pte(va); @@ -10822,9 +11691,8 @@ pmap_large_map(vm_paddr_t spa, vm_size_t len, void **addr, *pte = pa | pg_g | X86_PG_RW | X86_PG_V | X86_PG_A | pg_nx | pmap_cache_bits(kernel_pmap, mattr, false); - PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte))-> - ref_count++; - inc = PAGE_SIZE; + pmap_ptpage_ref_add(pmap_va_to_ptpage(pte), 1); + inc = PAGE_SIZE_PT; } } PMAP_UNLOCK(kernel_pmap); @@ -10842,15 +11710,15 @@ pmap_large_unmap(void *svaa, vm_size_t len) pdp_entry_t *pdpe, pdp; pd_entry_t *pde, pd; pt_entry_t *pte; - vm_page_t m; - struct spglist spgf; + ptpage_t m; + struct ptpglist ptpgf; sva = (vm_offset_t)svaa; if (len == 0 || sva + len < sva || (sva >= kva_layout.dmap_low && sva + len < kva_layout.dmap_high)) return; - SLIST_INIT(&spgf); + SLIST_INIT(&ptpgf); KASSERT(PMAP_ADDRESS_IN_LARGEMAP(sva) && PMAP_ADDRESS_IN_LARGEMAP(sva + len - 1), ("not largemap range %#lx %#lx", (u_long)svaa, (u_long)svaa + len)); @@ -10891,11 +11759,11 @@ pmap_large_unmap(void *svaa, vm_size_t len) pd, len)); pde_store(pde, 0); inc = NBPDR; - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pde)); - m->ref_count--; - if (m->ref_count == 0) { + m = pmap_va_to_ptpage(pde); + pmap_ptpage_ref_add(m, -1); + if (pmap_ptpage_refs(m) == 0) { *pdpe = 0; - SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss); + pmap_ptpage_slist_add(kernel_pmap, &ptpgf, m); } continue; } @@ -10904,23 +11772,23 @@ pmap_large_unmap(void *svaa, vm_size_t len) ("invalid pte va %#lx pte %#lx pt %#lx", va, (u_long)pte, *pte)); pte_clear(pte); - inc = PAGE_SIZE; - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pte)); - m->ref_count--; - if (m->ref_count == 0) { + inc = PAGE_SIZE_PT; + m = pmap_va_to_ptpage(pte); + pmap_ptpage_ref_add(m, -1); + if (pmap_ptpage_refs(m) == 0) { *pde = 0; - SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pde)); - m->ref_count--; - if (m->ref_count == 0) { + pmap_ptpage_slist_add(kernel_pmap, &ptpgf, m); + m = pmap_va_to_ptpage(pde); + pmap_ptpage_ref_add(m, -1); + if (pmap_ptpage_refs(m) == 0) { *pdpe = 0; - SLIST_INSERT_HEAD(&spgf, m, plinks.s.ss); + pmap_ptpage_slist_add(kernel_pmap, &ptpgf, m); } } } pmap_invalidate_range(kernel_pmap, sva, sva + len); PMAP_UNLOCK(kernel_pmap); - vm_page_free_pages_toq(&spgf, false); + pmap_ptpage_slist_free(&ptpgf, false); vmem_free(large_vmem, sva, len); } @@ -11027,7 +11895,7 @@ pmap_large_map_wb_large(vm_offset_t sva, vm_offset_t eva) if (inc == 0) { pe = (volatile u_long *)pmap_large_map_pte(va); p = *pe; - inc = PAGE_SIZE; + inc = PAGE_SIZE_PT; } seen_other = false; for (;;) { @@ -11103,19 +11971,23 @@ pmap_large_map_wb(void *svap, vm_size_t len) pmap_large_map_wb_fence(); } -static vm_page_t +#if PAGE_SIZE == PAGE_SIZE_4K + +static ptpage_t pmap_pti_alloc_page(void) { vm_page_t m; VM_OBJECT_ASSERT_WLOCKED(pti_obj); m = vm_page_grab(pti_obj, pti_pg_idx++, VM_ALLOC_WIRED | VM_ALLOC_ZERO); - return (m); + return ((ptpage_t)m); } static bool -pmap_pti_free_page(vm_page_t m) +pmap_pti_free_page(ptpage_t ptp) { + vm_page_t m = (void *)ptp; + if (!vm_page_unwire_noq(m)) return (false); vm_page_xbusy_claim(m); @@ -11123,10 +11995,29 @@ pmap_pti_free_page(vm_page_t m) return (true); } +#else + +static ptpage_t +pmap_pti_alloc_page(void) +{ + + VM_OBJECT_ASSERT_WLOCKED(pti_obj); + return (pmap_alloc_pt_page(NULL, 0, VM_ALLOC_WIRED | VM_ALLOC_ZERO)); +} + +static bool +pmap_pti_free_page(ptpage_t ptp) +{ + + pmap_free_pt_page(NULL, ptp, false); + return (false); +} +#endif + static void pmap_pti_init(void) { - vm_page_t pml4_pg; + ptpage_t pml4_pg; pdp_entry_t *pdpe; vm_offset_t va; int i; @@ -11136,7 +12027,7 @@ pmap_pti_init(void) pti_obj = vm_pager_allocate(OBJT_PHYS, NULL, 0, VM_PROT_ALL, 0, NULL); VM_OBJECT_WLOCK(pti_obj); pml4_pg = pmap_pti_alloc_page(); - pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); + pti_pml4 = pmap_ptpage_va(pml4_pg); for (va = kva_layout.km_low; va <= kva_layout.km_high && va >= kva_layout.km_low && va > NBPML4; va += NBPML4) { pdpe = pmap_pti_pdpe(va); @@ -11180,7 +12071,7 @@ pmap_pti_pdpe(vm_offset_t va) { pml4_entry_t *pml4e; pdp_entry_t *pdpe; - vm_page_t m; + ptpage_t m; vm_pindex_t pml4_idx; vm_paddr_t mphys; @@ -11197,7 +12088,7 @@ pmap_pti_pdpe(vm_offset_t va) pmap_pti_free_page(m); mphys = *pml4e & ~PAGE_MASK; } else { - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pml4e = mphys | X86_PG_RW | X86_PG_V; } } else { @@ -11210,32 +12101,32 @@ pmap_pti_pdpe(vm_offset_t va) static void pmap_pti_wire_pte(void *pte) { - vm_page_t m; + ptpage_t m; VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte)); - m->ref_count++; + m = pmap_va_to_ptpage(pte); + pmap_ptpage_ref_add(m, 1); } static void pmap_pti_unwire_pde(void *pde, bool only_ref) { - vm_page_t m; + ptpage_t m; VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde)); - MPASS(only_ref || m->ref_count > 1); + m = pmap_va_to_ptpage(pde); + MPASS(only_ref || pmap_ptpage_refs(m) > 1); pmap_pti_free_page(m); } static void pmap_pti_unwire_pte(void *pte, vm_offset_t va) { - vm_page_t m; + ptpage_t m; pd_entry_t *pde; VM_OBJECT_ASSERT_WLOCKED(pti_obj); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte)); + m = pmap_va_to_ptpage(pte); if (pmap_pti_free_page(m)) { pde = pmap_pti_pde(va); MPASS((*pde & (X86_PG_PS | X86_PG_V)) == X86_PG_V); @@ -11249,7 +12140,7 @@ pmap_pti_pde(vm_offset_t va) { pdp_entry_t *pdpe; pd_entry_t *pde; - vm_page_t m; + ptpage_t m; vm_pindex_t pd_idx; vm_paddr_t mphys; @@ -11263,7 +12154,7 @@ pmap_pti_pde(vm_offset_t va) MPASS((*pdpe & X86_PG_PS) == 0); mphys = *pdpe & ~PAGE_MASK; } else { - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pdpe = mphys | X86_PG_RW | X86_PG_V; } } else { @@ -11282,7 +12173,7 @@ pmap_pti_pte(vm_offset_t va, bool *unwire_pde) { pd_entry_t *pde; pt_entry_t *pte; - vm_page_t m; + ptpage_t m; vm_paddr_t mphys; VM_OBJECT_ASSERT_WLOCKED(pti_obj); @@ -11299,7 +12190,7 @@ pmap_pti_pte(vm_offset_t va, bool *unwire_pde) MPASS((*pde & X86_PG_PS) == 0); mphys = *pde & ~(PAGE_MASK | pg_nx); } else { - mphys = VM_PAGE_TO_PHYS(m); + mphys = pmap_ptpage_pa(m); *pde = mphys | X86_PG_RW | X86_PG_V; if (unwire_pde != NULL) *unwire_pde = false; @@ -11329,7 +12220,7 @@ pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva, bool exec) MPASS(sva > VM_MAXUSER_ADDRESS); eva = round_page(eva); MPASS(sva < eva); - for (; sva < eva; sva += PAGE_SIZE) { + for (; sva < eva; sva += PAGE_SIZE_PT) { pte = pmap_pti_pte(sva, &unwire_pde); pa = pmap_kextract(sva); ptev = pa | X86_PG_RW | X86_PG_V | X86_PG_A | X86_PG_G | @@ -11377,7 +12268,7 @@ pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva) eva = roundup2(eva, PAGE_SIZE); MPASS(sva < eva); VM_OBJECT_WLOCK(pti_obj); - for (va = sva; va < eva; va += PAGE_SIZE) { + for (va = sva; va < eva; va += PAGE_SIZE_PT) { pte = pmap_pti_pte(va, NULL); KASSERT((*pte & X86_PG_V) != 0, ("invalid pte va %#lx pte %#lx pt %#lx", va, @@ -11597,7 +12488,7 @@ pmap_pkru_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, va_next = eva; for (ptep = pmap_pde_to_pte(pde, va); va != va_next; - ptep++, va += PAGE_SIZE) { + ptep++, va += PAGE_SIZE_PT) { pte = *ptep; if ((pte & X86_PG_V) == 0) continue; @@ -11780,11 +12671,12 @@ pmap_san_enter_early(vm_offset_t va) *pte = (pt_entry_t)(pa | X86_PG_A | X86_PG_M | X86_PG_RW | X86_PG_V); } -static vm_page_t +static ptpage_t pmap_san_enter_alloc_4k(void) { - vm_page_t m; + ptpage_t m; + /* CHUQ how to allocate this? */ m = vm_page_alloc_noobj(VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (m == NULL) @@ -11792,9 +12684,11 @@ pmap_san_enter_alloc_4k(void) return (m); } -static vm_page_t +static ptpage_t pmap_san_enter_alloc_2m(void) { + + /* CHUQ how to allocate this? */ return (vm_page_alloc_noobj_contig(VM_ALLOC_WIRED | VM_ALLOC_ZERO, NPTEPG, 0, ~0ul, NBPDR, 0, VM_MEMATTR_DEFAULT)); } @@ -11809,7 +12703,7 @@ pmap_san_enter(vm_offset_t va) pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; - vm_page_t m; + ptpage_t m; if (kernphys == 0) { /* @@ -11824,14 +12718,14 @@ pmap_san_enter(vm_offset_t va) pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) { m = pmap_san_enter_alloc_4k(); - *pdpe = (pdp_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | + *pdpe = (pdp_entry_t)(pmap_ptpage_pa(m) | X86_PG_RW | X86_PG_V | pg_nx); } pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & X86_PG_V) == 0) { m = pmap_san_enter_alloc_2m(); if (m != NULL) { - *pde = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | + *pde = (pd_entry_t)(pmap_ptpage_pa(m) | X86_PG_RW | X86_PG_PS | X86_PG_V | X86_PG_A | X86_PG_M | pg_nx); } else { m = pmap_san_enter_alloc_4k(); @@ -11845,7 +12739,7 @@ pmap_san_enter(vm_offset_t va) if ((*pte & X86_PG_V) != 0) return; m = pmap_san_enter_alloc_4k(); - *pte = (pt_entry_t)(VM_PAGE_TO_PHYS(m) | X86_PG_RW | X86_PG_V | + *pte = (pt_entry_t)(pmap_ptpage_pa(m) | X86_PG_RW | X86_PG_V | X86_PG_M | X86_PG_A | pg_nx); } #endif @@ -12095,7 +12989,7 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS) continue; } if (PMAP_ADDRESS_IN_LARGEMAP(sva) && - vm_phys_paddr_to_vm_page(pa) == NULL) { + pmap_pa_to_ptpage(pa) == NULL) { /* * Page table pages for the large map may be * freed. Validate the next-level address @@ -12124,7 +13018,7 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS) continue; } if (PMAP_ADDRESS_IN_LARGEMAP(sva) && - vm_phys_paddr_to_vm_page(pa) == NULL) { + pmap_pa_to_ptpage(pa) == NULL) { /* * Page table pages for the large map * may be freed. Validate the @@ -12134,7 +13028,6 @@ sysctl_kmaps(SYSCTL_HANDLER_ARGS) goto restart; } pt = (pt_entry_t *)PHYS_TO_DMAP(pa); - for (l = pmap_pte_index(sva); l < NPTEPG; l++, sva += PAGE_SIZE) { pte = pt[l]; @@ -12230,10 +13123,10 @@ DB_SHOW_COMMAND(phys2dmap, pmap_phys2dmap) } static void -ptpages_show_page(int level, int idx, vm_page_t pg) +ptpages_show_page(int level, int idx, ptpage_t ptp) { - db_printf("l %d i %d pg %p phys %#lx ref %x\n", - level, idx, pg, VM_PAGE_TO_PHYS(pg), pg->ref_count); + db_printf("l %d i %d ptp %p phys %#lx ref 0x%x\n", + level, idx, ptp, pmap_ptpage_pa(ptp), pmap_ptpage_refs(ptp)); } static void @@ -12243,44 +13136,51 @@ ptpages_show_complain(int level, int idx, uint64_t pte) } static void -ptpages_show_pml4(vm_page_t pg4, int num_entries, uint64_t PG_V) +ptpages_show_pml4(ptpage_t pg4, int num_entries, uint64_t PG_V) { - vm_page_t pg3, pg2, pg1; + ptpage_t pg3, pg2, pg1; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pd; - int i4, i3, i2; + pt_entry_t *pt; + int i4, i3, i2, i1; - pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pg4)); + pml4 = pmap_ptpage_va(pg4); for (i4 = 0; i4 < num_entries; i4++) { if ((pml4[i4] & PG_V) == 0) continue; - pg3 = PHYS_TO_VM_PAGE(pml4[i4] & PG_FRAME); + pg3 = pmap_pa_to_ptpage(pml4[i4] & PG_FRAME); if (pg3 == NULL) { ptpages_show_complain(3, i4, pml4[i4]); continue; } ptpages_show_page(3, i4, pg3); - pdp = (pdp_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pg3)); + pdp = pmap_ptpage_va(pg3); for (i3 = 0; i3 < NPDPEPG; i3++) { if ((pdp[i3] & PG_V) == 0) continue; - pg2 = PHYS_TO_VM_PAGE(pdp[i3] & PG_FRAME); + pg2 = pmap_pa_to_ptpage(pdp[i3] & PG_FRAME); if (pg3 == NULL) { ptpages_show_complain(2, i3, pdp[i3]); continue; } ptpages_show_page(2, i3, pg2); - pd = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pg2)); + pd = pmap_ptpage_va(pg2); for (i2 = 0; i2 < NPDEPG; i2++) { if ((pd[i2] & PG_V) == 0) continue; - pg1 = PHYS_TO_VM_PAGE(pd[i2] & PG_FRAME); + pg1 = pmap_pa_to_ptpage(pd[i2] & PG_FRAME); if (pg1 == NULL) { ptpages_show_complain(1, i2, pd[i2]); continue; } ptpages_show_page(1, i2, pg1); + pt = pmap_ptpage_va(pg1); + for (i1 = 0; i1 < PAGE_SIZE_PT / sizeof (pt_entry_t); i1++) { + if ((pt[i1] & PG_V) == 0) + continue; + ptpages_show_complain(0, i1, pt[i1]); + } } } } @@ -12289,7 +13189,7 @@ ptpages_show_pml4(vm_page_t pg4, int num_entries, uint64_t PG_V) DB_SHOW_COMMAND(ptpages, pmap_ptpages) { pmap_t pmap; - vm_page_t pg; + ptpage_t pg; pml5_entry_t *pml5; uint64_t PG_V; int i5; @@ -12306,7 +13206,7 @@ DB_SHOW_COMMAND(ptpages, pmap_ptpages) for (i5 = 0; i5 < NUPML5E; i5++) { if ((pml5[i5] & PG_V) == 0) continue; - pg = PHYS_TO_VM_PAGE(pml5[i5] & PG_FRAME); + pg = pmap_pa_to_ptpage(pml5[i5] & PG_FRAME); if (pg == NULL) { ptpages_show_complain(4, i5, pml5[i5]); continue; @@ -12315,8 +13215,7 @@ DB_SHOW_COMMAND(ptpages, pmap_ptpages) ptpages_show_pml4(pg, NPML4EPG, PG_V); } } else { - ptpages_show_pml4(PHYS_TO_VM_PAGE(DMAP_TO_PHYS( - (vm_offset_t)pmap->pm_pmltop)), NUP4ML4E, PG_V); + ptpages_show_pml4(pmap_va_to_ptpage(pmap->pm_pmltop), NUP4ML4E, PG_V); } } #endif diff --git a/sys/amd64/amd64/pmap_radix.c b/sys/amd64/amd64/pmap_radix.c new file mode 100644 index 00000000000000..fb8ed77d456462 --- /dev/null +++ b/sys/amd64/amd64/pmap_radix.c @@ -0,0 +1,342 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2013 EMC Corp. + * Copyright (c) 2011 Jeffrey Roberson + * Copyright (c) 2008 Mayur Shardul + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include + +#include + +void ptpage_radix_wait(void); +void ptpage_radix_zinit(void); +void *ptpage_radix_node_alloc(struct pctrie *ptree); +void ptpage_radix_node_free(struct pctrie *ptree, void *node); +smr_t ptpage_radix_smr; + +static __inline void +ptpage_radix_init(struct ptpage_radix *rtree) +{ + pctrie_init(&rtree->rt_trie); +} + +static __inline bool +ptpage_radix_is_empty(struct ptpage_radix *rtree) +{ + return (pctrie_is_empty(&rtree->rt_trie)); +} + +PCTRIE_DEFINE_SMR(PTPAGE_RADIX, ptpage, pindex, ptpage_radix_node_alloc, + ptpage_radix_node_free, ptpage_radix_smr); + +/* + * Inserts the key-value pair into the trie, starting search from root. + * Panics if the key already exists. + */ +static __noinline int +ptpage_radix_insert(struct ptpage_radix *rtree, ptpage_t page) +{ + return (PTPAGE_RADIX_PCTRIE_INSERT(&rtree->rt_trie, page)); +} + +/* + * Inserts the key-value pair into the trie, starting search from iterator. + * Panics if the key already exists. + */ +static __inline int +ptpage_radix_iter_insert(struct pctrie_iter *pages, ptpage_t page) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_INSERT(pages, page)); +} + +/* + * Insert the page into the ptpage_radix tree with its pindex as the key. Panic if + * the pindex already exists. Return zero on success or a non-zero error on + * memory allocation failure. Set the out parameter mpred to the previous page + * in the tree as if found by a previous call to ptpage_radix_lookup_le with the + * new page pindex. + */ +static __inline int +ptpage_radix_insert_lookup_lt(struct ptpage_radix *rtree, ptpage_t page, + ptpage_t *mpred) +{ + int error; + + error = PTPAGE_RADIX_PCTRIE_INSERT_LOOKUP_LE(&rtree->rt_trie, page, mpred); + if (__predict_false(error == EEXIST)) + panic("ptpage_radix_insert_lookup_lt: page already present, %p", + *mpred); + return (error); +} + +/* + * Returns the value stored at the index assuming there is an external lock. + * + * If the index is not present, NULL is returned. + */ +static __inline ptpage_t +ptpage_radix_lookup(struct ptpage_radix *rtree, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_LOOKUP(&rtree->rt_trie, index)); +} + +/* + * Returns the value stored at the index without requiring an external lock. + * + * If the index is not present, NULL is returned. + */ +static __inline ptpage_t +ptpage_radix_lookup_unlocked(struct ptpage_radix *rtree, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_LOOKUP_UNLOCKED(&rtree->rt_trie, index)); +} + +/* + * Initialize an iterator for ptpage_radix. + */ +static __inline void +ptpage_radix_iter_init(struct pctrie_iter *pages, struct ptpage_radix *rtree) +{ + pctrie_iter_init(pages, &rtree->rt_trie); +} + +/* + * Initialize an iterator for ptpage_radix. + */ +static __inline void +ptpage_radix_iter_limit_init(struct pctrie_iter *pages, struct ptpage_radix *rtree, + vm_pindex_t limit) +{ + pctrie_iter_limit_init(pages, &rtree->rt_trie, limit); +} + +/* + * Returns the value stored at the index. + * Requires that access be externally synchronized by a lock. + * + * If the index is not present, NULL is returned. + */ +static __inline ptpage_t +ptpage_radix_iter_lookup(struct pctrie_iter *pages, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_LOOKUP(pages, index)); +} + +/* + * Returns the value stored 'stride' steps beyond the current position. + * Requires that access be externally synchronized by a lock. + * + * If the index is not present, NULL is returned. + */ +static __inline ptpage_t +ptpage_radix_iter_stride(struct pctrie_iter *pages, int stride) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_STRIDE(pages, stride)); +} + +/* + * Returns the page with the least pindex that is greater than or equal to the + * specified pindex, or NULL if there are no such pages. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_lookup_ge(struct ptpage_radix *rtree, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_LOOKUP_GE(&rtree->rt_trie, index)); +} + +/* + * Returns the page with the greatest pindex that is less than or equal to the + * specified pindex, or NULL if there are no such pages. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_lookup_le(struct ptpage_radix *rtree, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_LOOKUP_LE(&rtree->rt_trie, index)); +} + +/* + * Remove the specified index from the trie, and return the value stored at + * that index. If the index is not present, return NULL. + */ +static __inline ptpage_t +ptpage_radix_remove(struct ptpage_radix *rtree, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_REMOVE_LOOKUP(&rtree->rt_trie, index)); +} + +/* + * Remove the current page from the trie. + */ +static __inline void +ptpage_radix_iter_remove(struct pctrie_iter *pages) +{ + PTPAGE_RADIX_PCTRIE_ITER_REMOVE(pages); +} + +/* + * Reclaim all the interior nodes of the trie, and invoke the callback + * on all the pages, in order. + */ +static __inline void +ptpage_radix_reclaim_callback(struct ptpage_radix *rtree, + void (*page_cb)(ptpage_t, void *), void *arg) +{ + PTPAGE_RADIX_PCTRIE_RECLAIM_CALLBACK(&rtree->rt_trie, page_cb, arg); +} + +/* + * Initialize an iterator pointing to the page with the least pindex that is + * greater than or equal to the specified pindex, or NULL if there are no such + * pages. Return the page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_lookup_ge(struct pctrie_iter *pages, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_LOOKUP_GE(pages, index)); +} + +/* + * Update the iterator to point to the page with the least pindex that is 'jump' + * or more greater than or equal to the current pindex, or NULL if there are no + * such pages. Return the page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_jump(struct pctrie_iter *pages, vm_pindex_t jump) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_JUMP_GE(pages, jump)); +} + +/* + * Update the iterator to point to the page with the least pindex that is one or + * more greater than the current pindex, or NULL if there are no such pages. + * Return the page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_step(struct pctrie_iter *pages) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_STEP_GE(pages)); +} + +/* + * Initialize an iterator pointing to the page with the greatest pindex that is + * less than or equal to the specified pindex, or NULL if there are no such + * pages. Return the page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_lookup_le(struct pctrie_iter *pages, vm_pindex_t index) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_LOOKUP_LE(pages, index)); +} + +/* + * Update the iterator to point to the page with the pindex that is one greater + * than the current pindex, or NULL if there is no such page. Return the page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_next(struct pctrie_iter *pages) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_NEXT(pages)); +} + +/* + * Update the iterator to point to the page with the pindex that is one less + * than the current pindex, or NULL if there is no such page. Return the page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_prev(struct pctrie_iter *pages) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_PREV(pages)); +} + +/* + * Return the current page. + * + * Requires that access be externally synchronized by a lock. + */ +static __inline ptpage_t +ptpage_radix_iter_page(struct pctrie_iter *pages) +{ + return (PTPAGE_RADIX_PCTRIE_ITER_VALUE(pages)); +} + +/* + * Replace an existing page in the trie with another one. + * Panics if there is not an old page in the trie at the new page's index. + */ +static __inline ptpage_t +ptpage_radix_replace(struct ptpage_radix *rtree, ptpage_t newpage) +{ + return (PTPAGE_RADIX_PCTRIE_REPLACE(&rtree->rt_trie, newpage)); +} + + +static uma_zone_t ptpage_radix_node_zone; + +void * +ptpage_radix_node_alloc(struct pctrie *ptree) +{ + return (uma_zalloc_smr(ptpage_radix_node_zone, M_NOWAIT)); +} + +void +ptpage_radix_node_free(struct pctrie *ptree, void *node) +{ + uma_zfree_smr(ptpage_radix_node_zone, node); +} + +void +ptpage_radix_zinit(void) +{ + + ptpage_radix_node_zone = uma_zcreate("RADIX NODE", pctrie_node_size(), + NULL, NULL, pctrie_zone_init, NULL, + PCTRIE_PAD, UMA_ZONE_VM | UMA_ZONE_SMR); + ptpage_radix_smr = uma_zone_get_smr(ptpage_radix_node_zone); +} + +void +ptpage_radix_wait(void) +{ + uma_zwait(ptpage_radix_node_zone); +} diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 870cd255abb794..f164fbd3b33aee 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -1606,7 +1606,7 @@ ENTRY(pmap_pti_pcid_invlrng) cli movq %rdi,%cr3 /* to user page table */ 1: invlpg (%rdx) - addq $PAGE_SIZE,%rdx + addq $PAGE_SIZE_PT,%rdx cmpq %rdx,%rcx ja 1b movq %rsi,%cr3 /* back to kernel */ diff --git a/sys/amd64/amd64/sys_machdep.c b/sys/amd64/amd64/sys_machdep.c index 51f55687bbcf1d..5a14f128ea2549 100644 --- a/sys/amd64/amd64/sys_machdep.c +++ b/sys/amd64/amd64/sys_machdep.c @@ -415,6 +415,7 @@ amd64_set_ioperm(struct thread *td, struct i386_ioperm_args *uap) struct amd64tss *tssp; struct system_segment_descriptor *tss_sd; struct pcb *pcb; + size_t tss_size; u_int i; int error; @@ -423,7 +424,7 @@ amd64_set_ioperm(struct thread *td, struct i386_ioperm_args *uap) if ((error = securelevel_gt(td->td_ucred, 0)) != 0) return (error); if (uap->start > uap->start + uap->length || - uap->start + uap->length > IOPAGES * PAGE_SIZE * NBBY) + uap->start + uap->length > IOPERM_BITMAP_BITS) return (EINVAL); /* @@ -434,9 +435,10 @@ amd64_set_ioperm(struct thread *td, struct i386_ioperm_args *uap) */ pcb = td->td_pcb; if (pcb->pcb_tssp == NULL) { - tssp = kmem_malloc(ctob(IOPAGES + 1), M_WAITOK); + tss_size = PAGE_SIZE_PT + IOPERM_BITMAP_BYTES; + tssp = kmem_malloc(tss_size, M_WAITOK); pmap_pti_add_kva((vm_offset_t)tssp, (vm_offset_t)tssp + - ctob(IOPAGES + 1), false); + tss_size, false); iomap = (char *)&tssp[1]; memset(iomap, 0xff, IOPERM_BITMAP_SIZE); critical_enter(); @@ -468,7 +470,7 @@ amd64_get_ioperm(struct thread *td, struct i386_ioperm_args *uap) int i, state; char *iomap; - if (uap->start >= IOPAGES * PAGE_SIZE * NBBY) + if (uap->start >= IOPERM_BITMAP_BITS) return (EINVAL); if (td->td_pcb->pcb_tssp == NULL) { uap->length = 0; @@ -482,7 +484,7 @@ amd64_get_ioperm(struct thread *td, struct i386_ioperm_args *uap) uap->enable = !state; uap->length = 1; - for (i = uap->start + 1; i < IOPAGES * PAGE_SIZE * NBBY; i++) { + for (i = uap->start + 1; i < IOPERM_BITMAP_BITS; i++) { if (state != ((iomap[i >> 3] >> (i & 7)) & 1)) break; uap->length++; diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index c763ff58680ed1..e53ae5b1c65182 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -362,6 +362,7 @@ void cpu_thread_clean(struct thread *td) { struct pcb *pcb; + size_t tss_size; pcb = td->td_pcb; @@ -369,9 +370,10 @@ cpu_thread_clean(struct thread *td) * Clean TSS/iomap */ if (pcb->pcb_tssp != NULL) { + tss_size = PAGE_SIZE_PT + IOPERM_BITMAP_BYTES; pmap_pti_remove_kva((vm_offset_t)pcb->pcb_tssp, - (vm_offset_t)pcb->pcb_tssp + ctob(IOPAGES + 1)); - kmem_free(pcb->pcb_tssp, ctob(IOPAGES + 1)); + (vm_offset_t)pcb->pcb_tssp + tss_size); + kmem_free(pcb->pcb_tssp, tss_size); pcb->pcb_tssp = NULL; } } diff --git a/sys/amd64/conf/GENERIC b/sys/amd64/conf/GENERIC index 786edc4125c942..8754a306c5f108 100644 --- a/sys/amd64/conf/GENERIC +++ b/sys/amd64/conf/GENERIC @@ -115,7 +115,7 @@ device cpufreq # Bus support. device acpi device smbios -options IOMMU +#options IOMMU device pci options PCI_HP # PCI-Express native HotPlug options PCI_IOV # PCI SR-IOV support @@ -368,10 +368,10 @@ device hyperv # HyperV drivers # Xen HVM Guest Optimizations # NOTE: XENHVM depends on xenpci and xentimer. # They must be added or removed together. -options XENHVM # Xen HVM kernel infrastructure -device xenefi # Xen EFI timer device -device xenpci # Xen HVM Hypervisor services driver -device xentimer # Xen x86 PV timer device +#options XENHVM # Xen HVM kernel infrastructure +#device xenefi # Xen EFI timer device +#device xenpci # Xen HVM Hypervisor services driver +#device xentimer # Xen x86 PV timer device # Netmap provides direct access to TX/RX rings on supported NICs device netmap # netmap(4) support @@ -391,3 +391,13 @@ options U2F_MAKE_UHID_ALIAS # install /dev/uhid alias for /dev/u2f/ # EFI devices device efidev # EFI pseudo-device device efirtc # EFI RTC + + + +options DIAGNOSTIC +options INVARIANTS +options WITNESS +options ALT_BREAK_TO_DEBUGGER + +options UART_NS8250_EARLY_PORT=0x3f8 +options EARLY_PRINTF=ns8250 diff --git a/sys/amd64/conf/GENERIC-PAGE16K b/sys/amd64/conf/GENERIC-PAGE16K new file mode 100644 index 00000000000000..45ee09300f045f --- /dev/null +++ b/sys/amd64/conf/GENERIC-PAGE16K @@ -0,0 +1,4 @@ +include GENERIC +ident GENERIC-PAGE16K + +options OS_PAGE_SHIFT=14 diff --git a/sys/amd64/conf/GENERIC-PAGE16K-NODEBUG b/sys/amd64/conf/GENERIC-PAGE16K-NODEBUG new file mode 100644 index 00000000000000..467910fd1f63c1 --- /dev/null +++ b/sys/amd64/conf/GENERIC-PAGE16K-NODEBUG @@ -0,0 +1,4 @@ +include GENERIC-NODEBUG +ident GENERIC-PAGE16K-NODEBUG + +options OS_PAGE_SHIFT=14 diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h index 5a9c3162e14c3a..f4e165fe814c67 100644 --- a/sys/amd64/include/param.h +++ b/sys/amd64/include/param.h @@ -87,31 +87,49 @@ #define CACHE_LINE_SIZE (1 << CACHE_LINE_SHIFT) /* Size of the level 1 page table units */ -#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t))) +#define NPTEPG (PAGE_SIZE_PT/(sizeof (pt_entry_t))) +#define NDPTEPG (NPTEPG / PAGE_SIZE_PTES) #define NPTEPGSHIFT 9 /* LOG2(NPTEPG) */ +#ifdef OS_PAGE_SHIFT +#define PAGE_SHIFT OS_PAGE_SHIFT /* LOG2(PAGE_SIZE) */ +#else #define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */ +#endif #define PAGE_SIZE (1<> PAGE_SHIFT_PT) + #define amd64_btop(x) ((unsigned long)(x) >> PAGE_SHIFT) #define amd64_ptob(x) ((unsigned long)(x) << PAGE_SHIFT) diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 213790d245dce1..631c95ed5c697f 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -101,7 +101,7 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); u_int pc_small_core; \ u_int pc_pcid_invlpg_workaround; \ struct pmap_pcid pc_kpmap_store; \ - char __pad[2900] /* pad to UMA_PCPU_ALLOC_SIZE */ + char __pad[2900 + PAGE_SIZE - 4096] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h index e2f97442c10fe5..0316728f14d55c 100644 --- a/sys/amd64/include/pmap.h +++ b/sys/amd64/include/pmap.h @@ -117,14 +117,14 @@ ((unsigned long)(l4) << PML4SHIFT) | \ ((unsigned long)(l3) << PDPSHIFT) | \ ((unsigned long)(l2) << PDRSHIFT) | \ - ((unsigned long)(l1) << PAGE_SHIFT)) + ((unsigned long)(l1) << PAGE_SHIFT_PT)) #define UVADDR(l5, l4, l3, l2, l1) ( \ ((unsigned long)(l5) << PML5SHIFT) | \ ((unsigned long)(l4) << PML4SHIFT) | \ ((unsigned long)(l3) << PDPSHIFT) | \ ((unsigned long)(l2) << PDRSHIFT) | \ - ((unsigned long)(l1) << PAGE_SHIFT)) + ((unsigned long)(l1) << PAGE_SHIFT_PT)) /* * Number of kernel PML4 slots. Can be anywhere from 1 to 64 or so, @@ -282,12 +282,14 @@ extern u_int64_t KPML5phys; /* physical address of kernel level 5 */ pt_entry_t *vtopte(vm_offset_t); #define vtophys(va) pmap_kextract(((vm_offset_t) (va))) +#define pte_load(ptep) atomic_load_long(ptep) #define pte_load_store(ptep, pte) atomic_swap_long(ptep, pte) #define pte_load_clear(ptep) atomic_swap_long(ptep, 0) #define pte_store(ptep, pte) do { \ *(u_long *)(ptep) = (u_long)(pte); \ } while (0) #define pte_clear(ptep) pte_store(ptep, 0) +#define pte_clear_datapg(ptep) pte_store_datapg(ptep, 0) #define pde_store(pdep, pde) pte_store(pdep, pde) @@ -315,6 +317,17 @@ enum pmap_type { PT_RVI, /* AMD's nested page tables */ }; +struct ptpage; +typedef struct ptpage *ptpage_t; +SLIST_HEAD(ptpglist, ptpage); +struct ptpage_radix { +#if PAGE_SIZE == PAGE_SIZE_4K + struct vm_radix vmr; +#else + struct pctrie rt_trie; +#endif +}; + /* * The kernel virtual address (KVA) of the level 4 page table page is always * within the direct map (DMAP) region. @@ -326,10 +339,14 @@ struct pmap { uint64_t pm_cr3; uint64_t pm_ucr3; TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ +#if PAGE_SIZE == PAGE_SIZE_4K +#else + TAILQ_HEAD(,ptpage) pm_ptpfree; /* list of free ptpages */ +#endif cpuset_t pm_active; /* active on cpus */ enum pmap_type pm_type; /* regular or nested tables */ struct pmap_statistics pm_stats; /* pmap statistics */ - struct vm_radix pm_root; /* spare page table pages */ + struct ptpage_radix pm_root; /* spare page table pages */ long pm_eptgen; /* EPT pmap generation id */ smr_t pm_eptsmr; int pm_flags; @@ -413,8 +430,8 @@ bool pmap_not_in_di(void); bool pmap_page_is_mapped(vm_page_t m); void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma); void pmap_page_set_memattr_noflush(vm_page_t m, vm_memattr_t ma); -void pmap_pinit_pml4(vm_page_t); -void pmap_pinit_pml5(vm_page_t); +void pmap_pinit_pml4(ptpage_t); +void pmap_pinit_pml5(ptpage_t); bool pmap_ps_enabled(pmap_t pmap); void pmap_unmapdev(void *, vm_size_t); void pmap_invalidate_page(pmap_t, vm_offset_t); @@ -440,12 +457,18 @@ int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, void pmap_thread_init_invl_gen(struct thread *td); int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap); void pmap_page_array_startup(long count); -vm_page_t pmap_page_alloc_below_4g(bool zeroed); +void pmap_pt_page_array_mark(void); +ptpage_t pmap_page_alloc_below_4g(bool zeroed); +void pmap_free_pt_page(pmap_t, ptpage_t, bool); #if defined(KASAN) || defined(KMSAN) void pmap_san_enter(vm_offset_t); #endif +/* CHUQ I hoped these would be static in pmap.c */ +vm_paddr_t pmap_ptpage_pa(ptpage_t ptp); +void *pmap_ptpage_va(ptpage_t ptp); + /* * Returns a pointer to a set of CPUs on which the pmap is currently active. * Note that the set can be modified without any mutual exclusion, so a copy @@ -524,7 +547,7 @@ static __inline vm_pindex_t pmap_pte_index(vm_offset_t va) { - return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); + return ((va >> PAGE_SHIFT_PT) & ((1ul << NPTEPGSHIFT) - 1)); } static __inline vm_pindex_t diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index d2ac3c6648b2fb..0f1a5b0ffea257 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -139,7 +139,8 @@ * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL -#define VM_NRESERVLEVEL 1 +/* CHUQ XXX disable superpages for now */ +#define VM_NRESERVLEVEL 0 #endif /* diff --git a/sys/amd64/vmm/amd/amdvi_priv.h b/sys/amd64/vmm/amd/amdvi_priv.h index 2a2646b6907e29..750a4da1bb3272 100644 --- a/sys/amd64/vmm/amd/amdvi_priv.h +++ b/sys/amd64/vmm/amd/amdvi_priv.h @@ -252,11 +252,11 @@ CTASSERT(offsetof(struct amdvi_ctrl, pad1)== 0x58); CTASSERT(offsetof(struct amdvi_ctrl, pad2)== 0x2028); CTASSERT(offsetof(struct amdvi_ctrl, pad3)== 0x2040); -#define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE) /* v1 size */ +#define AMDVI_MMIO_V1_SIZE (4 * PAGE_SIZE_4K) /* v1 size */ /* * AMF IOMMU v2 size including event counters */ -#define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE) +#define AMDVI_MMIO_V2_SIZE (8 * PAGE_SIZE_4K) CTASSERT(sizeof(struct amdvi_ctrl) == 0x4000); CTASSERT(sizeof(struct amdvi_ctrl) == AMDVI_MMIO_V1_SIZE); diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 2fe6a5bc358484..f0872ac25a6e00 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -249,6 +249,11 @@ svm_modinit(int ipinum) { int error, cpu; + if (PAGE_SIZE != PAGE_SIZE_PT) { + printf("SVM: larger software PAGE_SIZE is not yet supported.\n"); + return (ENXIO); + } + if (!svm_available()) return (ENXIO); diff --git a/sys/amd64/vmm/amd/vmcb.h b/sys/amd64/vmm/amd/vmcb.h index 09150fc26a72c5..9b044ad6cb2776 100644 --- a/sys/amd64/vmm/amd/vmcb.h +++ b/sys/amd64/vmm/amd/vmcb.h @@ -349,7 +349,7 @@ struct vmcb { struct vmcb_ctrl ctrl; struct vmcb_state state; } __attribute__ ((__packed__)); -CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); +CTASSERT(sizeof(struct vmcb) == PAGE_SIZE_4K); CTASSERT(offsetof(struct vmcb, state) == 0x400); int vmcb_read(struct svm_vcpu *vcpu, int ident, uint64_t *retval); diff --git a/sys/amd64/vmm/intel/ept.c b/sys/amd64/vmm/intel/ept.c index 5432c7da5df774..fe7cd0d14a28ec 100644 --- a/sys/amd64/vmm/intel/ept.c +++ b/sys/amd64/vmm/intel/ept.c @@ -95,6 +95,12 @@ ept_init(int ipinum) !INVEPT_ALL_TYPES_SUPPORTED(cap)) return (EINVAL); + /* + * Larger software PAGE_SIZE does not yet support EPT. + */ + if (PAGE_SIZE != PAGE_SIZE_PT) + return (EINVAL); + ept_pmap_flags = ipinum & PMAP_NESTED_IPIMASK; use_superpages = 1; diff --git a/sys/arm64/include/param.h b/sys/arm64/include/param.h index 753035b7775e8e..9ab0fbc79654c3 100644 --- a/sys/arm64/include/param.h +++ b/sys/arm64/include/param.h @@ -95,6 +95,10 @@ #define PAGE_SIZE (1 << PAGE_SHIFT) #define PAGE_MASK (PAGE_SIZE - 1) +#define MINIDUMP_PAGE_SIZE PAGE_SIZE +#define MINIDUMP_PAGE_MASK PAGE_MASK +#define MINIDUMP_PAGE_SHIFT PAGE_SHIFT + #define MAXPAGESIZES 4 /* maximum number of supported page sizes */ #ifndef KSTACK_PAGES diff --git a/sys/conf/options b/sys/conf/options index 66f7f2ee2d7e6f..2ef2065957b29e 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -193,6 +193,7 @@ NO_OBSOLETE_CODE opt_global.h NO_SYSCTL_DESCR opt_global.h NSWBUF_MIN opt_param.h MBUF_PACKET_ZONE_DISABLE opt_global.h +OS_PAGE_SHIFT opt_global.h PANIC_REBOOT_WAIT_TIME opt_panic.h PCI_HP opt_pci.h PCI_IOV opt_global.h diff --git a/sys/dev/hyperv/vmbus/vmbus_reg.h b/sys/dev/hyperv/vmbus/vmbus_reg.h index 76cdca0ebeb234..7e38c7f8f900ea 100644 --- a/sys/dev/hyperv/vmbus/vmbus_reg.h +++ b/sys/dev/hyperv/vmbus/vmbus_reg.h @@ -82,6 +82,12 @@ struct vmbus_mon_trig { #define VMBUS_MONTRIGS_MAX 4 #define VMBUS_MONTRIG_LEN 32 +#ifdef __amd64__ +#define VMBUS_PAGE_SIZE PAGE_SIZE_4K +#else +#define VMBUS_PAGE_SIZE PAGE_SIZE +#endif + struct vmbus_mnf { uint32_t mnf_state; uint32_t mnf_rsvd1; @@ -94,9 +100,9 @@ struct vmbus_mnf { struct hyperv_mon_param mnf_param[VMBUS_MONTRIGS_MAX][VMBUS_MONTRIG_LEN]; - uint8_t mnf_rsvd4[1984]; + uint8_t mnf_rsvd4[VMBUS_PAGE_SIZE - 2112]; } __packed; -CTASSERT(sizeof(struct vmbus_mnf) == PAGE_SIZE); +CTASSERT(sizeof(struct vmbus_mnf) == VMBUS_PAGE_SIZE); /* * Buffer ring @@ -159,8 +165,8 @@ struct vmbus_bufring { uint32_t value; } br_feature_bits; - /* Padding to PAGE_SIZE */ - uint8_t br_rsvd2[4020]; + /* Padding to VMBUS_PAGE_SIZE */ + uint8_t br_rsvd2[VMBUS_PAGE_SIZE - 76]; /* * Total guest to host interrupt count @@ -174,7 +180,7 @@ struct vmbus_bufring { uint8_t br_data[]; } __packed; -CTASSERT(sizeof(struct vmbus_bufring) == PAGE_SIZE); +CTASSERT(sizeof(struct vmbus_bufring) == VMBUS_PAGE_SIZE); /* * Channel diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index d51763b2079804..3d55b8a22fab32 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -86,7 +86,7 @@ _Static_assert(sizeof(struct monitorbuf) == 128, "2x cache line"); uint32_t pc_pad[4]; \ uint8_t pc_mds_tmp[64]; \ u_int pc_ipi_bitmap; \ - char __pad[3518] + char __pad[3518 + PAGE_SIZE - 4096] #ifdef _KERNEL diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 5a53fac50f2c9c..04b7a72747d230 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -153,7 +153,7 @@ SYSCTL_INT(ELF_NODE_OID, OID_AUTO, read_exec, CTLFLAG_RW, &i386_read_exec, 0, "enable execution from readable segments"); #endif -static u_long __elfN(pie_base) = ET_DYN_LOAD_ADDR; +static u_long __elfN(pie_base) = round_page(ET_DYN_LOAD_ADDR); static int sysctl_pie_base(SYSCTL_HANDLER_ARGS) { @@ -1118,6 +1118,13 @@ __CONCAT(exec_, __elfN(imgact))(struct image_params *imgp) bool free_interp; int error, i, n; +#if defined(__amd64__) && __ELF_WORD_SIZE == 32 && PAGE_SIZE != PAGE_SIZE_4K + /* + * XXX ia32 code hard-codes 4k page size, reject if PAGE_SIZE is larger. + */ + return -1; +#endif + hdr = (const Elf_Ehdr *)imgp->image_header; /* diff --git a/sys/kern/kern_vnodedumper.c b/sys/kern/kern_vnodedumper.c index c89e209cc1a5f5..e257ce268108d9 100644 --- a/sys/kern/kern_vnodedumper.c +++ b/sys/kern/kern_vnodedumper.c @@ -106,8 +106,8 @@ livedump_start_vnode(struct vnode *vp, int flags, uint8_t compression) di.dumper_start = vnode_dumper_start; di.dumper = vnode_dump; di.dumper_hdr = vnode_write_headers; - di.blocksize = PAGE_SIZE; /* Arbitrary. */ - di.maxiosize = MAXDUMPPGS * PAGE_SIZE; + di.blocksize = MINIDUMP_PAGE_SIZE; /* Arbitrary. */ + di.maxiosize = MAXDUMPPGS * MINIDUMP_PAGE_SIZE; bzero(&kda, sizeof(kda)); kda.kda_compression = compression; diff --git a/sys/kern/subr_vmem.c b/sys/kern/subr_vmem.c index 9288b093544125..9aa7f73a70581e 100644 --- a/sys/kern/subr_vmem.c +++ b/sys/kern/subr_vmem.c @@ -827,6 +827,8 @@ vmem_add1(vmem_t *vm, vmem_addr_t addr, vmem_size_t size, int type) VMEM_ASSERT_LOCKED(vm); MPASS(type == BT_TYPE_SPAN || type == BT_TYPE_SPAN_STATIC); + KASSERT((size & vm->vm_quantum_mask) == 0, + ("size 0x%lx not aliged to quantum mask 0x%lx", size, vm->vm_quantum_mask)); MPASS((size & vm->vm_quantum_mask) == 0); if (vm->vm_releasefn == NULL) { diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c index 66ce1b5a081dff..043b36a3283bd1 100644 --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -438,14 +438,16 @@ ktls_buffer_import(void *arg, void **store, int count, int domain, int flags) vm_page_t m; int i, req; +#if 0 KASSERT((ktls_maxlen & PAGE_MASK) == 0, ("%s: ktls max length %d is not page size-aligned", __func__, ktls_maxlen)); +#endif req = VM_ALLOC_WIRED | VM_ALLOC_NODUMP | malloc2vm_flags(flags); for (i = 0; i < count; i++) { m = vm_page_alloc_noobj_contig_domain(domain, req, - atop(ktls_maxlen), 0, ~0ul, PAGE_SIZE, 0, + howmany(ktls_maxlen, PAGE_SIZE), 0, ~0ul, PAGE_SIZE, 0, VM_MEMATTR_DEFAULT); if (m == NULL) break; @@ -462,7 +464,7 @@ ktls_buffer_release(void *arg __unused, void **store, int count) for (i = 0; i < count; i++) { m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)store[i])); - for (j = 0; j < atop(ktls_maxlen); j++) { + for (j = 0; j < howmany(ktls_maxlen, PAGE_SIZE); j++) { (void)vm_page_unwire_noq(m + j); vm_page_free(m + j); } @@ -2826,7 +2828,7 @@ ktls_encrypt_record(struct ktls_wq *wq, struct mbuf *m, KASSERT((m->m_flags & (M_EXTPG | M_NOTREADY)) == (M_EXTPG | M_NOTREADY), ("%p not unready & nomap mbuf\n", m)); - KASSERT(ptoa(m->m_epg_npgs) <= ktls_maxlen, + KASSERT(ptoa(m->m_epg_npgs) <= ktls_maxlen || m->m_epg_npgs == 1, ("page count %d larger than maximum frame length %d", m->m_epg_npgs, ktls_maxlen)); @@ -3271,7 +3273,7 @@ ktls_reclaim_thread(void *ctx) * surges of traffic and potential NIC output drops. */ if (vm_page_reclaim_contig_domain_ext(domain, VM_ALLOC_NORMAL, - atop(ktls_maxlen), 0, ~0ul, PAGE_SIZE, 0, + howmany(ktls_maxlen, PAGE_SIZE), 0, ~0ul, PAGE_SIZE, 0, ktls_max_reclaim) != 0) { vm_wait_domain(domain); } else { diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 19c39e42bafa69..ba9dcb9d13d874 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -4953,22 +4953,20 @@ void vfs_bio_bzero_buf(struct buf *bp, int base, int size) { vm_page_t m; - int i, n; + int i, n, pgoff; if (buf_mapped(bp)) { BUF_CHECK_MAPPED(bp); bzero(bp->b_data + base, size); } else { BUF_CHECK_UNMAPPED(bp); - n = PAGE_SIZE - (base & PAGE_MASK); for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) { m = bp->b_pages[i]; - if (n > size) - n = size; - pmap_zero_page_area(m, base & PAGE_MASK, n); + pgoff = (bp->b_offset & PAGE_MASK) + (base & PAGE_MASK); + n = MIN(PAGE_SIZE - pgoff, size); + pmap_zero_page_area(m, pgoff, n); base += n; size -= n; - n = PAGE_SIZE; } } } diff --git a/sys/sys/_pv_entry.h b/sys/sys/_pv_entry.h index ec3ea24a21a5e5..9322e63d62b7aa 100644 --- a/sys/sys/_pv_entry.h +++ b/sys/sys/_pv_entry.h @@ -60,7 +60,14 @@ typedef struct pv_entry { * is the value of all the other entries in the pc_map[] array when a * chunk is completely free. */ -#if PAGE_SIZE == 4 * 1024 + +#ifndef PAGE_SIZE_PV +#define PAGE_SIZE_PV PAGE_SIZE +#endif +#ifndef PAGE_MASK_PV +#define PAGE_MASK_PV PAGE_MASK +#endif +#if PAGE_SIZE_PV == 4 * 1024 #ifdef __LP64__ #define _NPCPV 168 #define _NPAD 0 @@ -68,7 +75,7 @@ typedef struct pv_entry { #define _NPCPV 336 #define _NPAD 0 #endif -#elif PAGE_SIZE == 16 * 1024 +#elif PAGE_SIZE_PV == 16 * 1024 #ifdef __LP64__ #define _NPCPV 677 #define _NPAD 1 @@ -76,7 +83,7 @@ typedef struct pv_entry { #endif #ifndef _NPCPV -#error Unsupported page size +#error Unsupported page size PAGE_SIZE_PV #endif /* Support clang < 14 */ @@ -104,7 +111,7 @@ struct pv_chunk { unsigned long pc_pad[_NPAD]; }; -_Static_assert(sizeof(struct pv_chunk) == PAGE_SIZE, +_Static_assert(sizeof(struct pv_chunk) == PAGE_SIZE_PV, "PV entry chunk size mismatch"); #ifdef _KERNEL @@ -131,7 +138,7 @@ pc_is_free(struct pv_chunk *pc) static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { - return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); + return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK_PV)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 9140cee56885e3..fe3120c3fb6ef4 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -573,6 +573,7 @@ enum { #define TDP2_SAN_QUIET 0x00000008 /* Disable warnings from K(A|M)SAN */ #define TDP2_EXTERR 0x00000010 /* Kernel reported ext error */ #define TDP2_UEXTERR 0x00000020 /* User set ext error reporting ptr */ +#define TDP2_CHUQ 0x00000040 /* * Reasons that the current thread can not be run yet. diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 62da6a5e7ff4d9..677413f42d90ae 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -167,7 +167,11 @@ void pmap_unwire(pmap_t pmap, vm_offset_t start, vm_offset_t end); void pmap_zero_page(vm_page_t); void pmap_zero_page_area(vm_page_t, int off, int size); +#ifdef __amd64__ +#define pmap_resident_count(pm) ((pm)->pm_stats.resident_count * PAGE_SIZE_PT / PAGE_SIZE) +#else #define pmap_resident_count(pm) ((pm)->pm_stats.resident_count) +#endif #define pmap_wired_count(pm) ((pm)->pm_stats.wired_count) #endif /* _KERNEL */ diff --git a/sys/vm/vm_dumpset.h b/sys/vm/vm_dumpset.h index 36de93fb19b98b..a0e5cc82546c73 100644 --- a/sys/vm/vm_dumpset.h +++ b/sys/vm/vm_dumpset.h @@ -43,18 +43,26 @@ static inline void vm_page_dump_add(struct bitset *bitset, vm_paddr_t pa) { vm_pindex_t adj; - int i; + int i, j; adj = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) { + + for (j = 0; j < PAGE_SIZE / MINIDUMP_PAGE_SIZE; j++) { + BIT_SET_ATOMIC(vm_page_dump_pages, - (pa >> PAGE_SHIFT) - (dump_avail[i] >> PAGE_SHIFT) + - adj, bitset); + (pa >> MINIDUMP_PAGE_SHIFT) - + (dump_avail[i] >> MINIDUMP_PAGE_SHIFT) + adj, + bitset); + + pa += MINIDUMP_PAGE_SIZE; + } + return; } - adj += howmany(dump_avail[i + 1], PAGE_SIZE) - - dump_avail[i] / PAGE_SIZE; + adj += howmany(dump_avail[i + 1], MINIDUMP_PAGE_SIZE) - + dump_avail[i] / MINIDUMP_PAGE_SIZE; } } @@ -62,18 +70,26 @@ static inline void vm_page_dump_drop(struct bitset *bitset, vm_paddr_t pa) { vm_pindex_t adj; - int i; + int i, j; adj = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) { if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) { + + for (j = 0; j < PAGE_SIZE / MINIDUMP_PAGE_SIZE; j++) { + BIT_CLR_ATOMIC(vm_page_dump_pages, - (pa >> PAGE_SHIFT) - (dump_avail[i] >> PAGE_SHIFT) + - adj, bitset); + (pa >> MINIDUMP_PAGE_SHIFT) - + (dump_avail[i] >> MINIDUMP_PAGE_SHIFT) + adj, + bitset); + + pa += MINIDUMP_PAGE_SIZE; + } + return; } - adj += howmany(dump_avail[i + 1], PAGE_SIZE) - - dump_avail[i] / PAGE_SIZE; + adj += howmany(dump_avail[i + 1], MINIDUMP_PAGE_SIZE) - + dump_avail[i] / MINIDUMP_PAGE_SIZE; } } @@ -83,11 +99,11 @@ vm_page_dump_index_to_pa(int bit) int i, tot; for (i = 0; dump_avail[i + 1] != 0; i += 2) { - tot = howmany(dump_avail[i + 1], PAGE_SIZE) - - dump_avail[i] / PAGE_SIZE; + tot = howmany(dump_avail[i + 1], MINIDUMP_PAGE_SIZE) - + dump_avail[i] / MINIDUMP_PAGE_SIZE; if (bit < tot) - return ((vm_paddr_t)bit * PAGE_SIZE + - (dump_avail[i] & ~PAGE_MASK)); + return ((vm_paddr_t)bit * MINIDUMP_PAGE_SIZE + + (dump_avail[i] & ~MINIDUMP_PAGE_MASK)); bit -= tot; } return (0); diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index a0d3651ba266e2..875195af3e408e 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -229,7 +229,8 @@ vm_ksubmap_init(struct kva_md_info *kmi) /* * Allocate the clean map to hold all of I/O virtual memory. */ - size = (long)nbuf * BKVASIZE + (long)bio_transient_maxcnt * maxphys; + size = round_page((long)nbuf * BKVASIZE) + + round_page((long)bio_transient_maxcnt * maxphys); kmi->clean_sva = kva_alloc(size); kmi->clean_eva = kmi->clean_sva + size; @@ -239,7 +240,7 @@ vm_ksubmap_init(struct kva_md_info *kmi) * Enable the quantum cache if we have more than 4 cpus. This * avoids lock contention at the expense of some fragmentation. */ - size = (long)nbuf * BKVASIZE; + size = round_page((long)nbuf * BKVASIZE); kmi->buffer_sva = kmi->clean_sva; kmi->buffer_eva = kmi->buffer_sva + size; vmem_init(buffer_arena, "buffer arena", kmi->buffer_sva, size, @@ -249,7 +250,7 @@ vm_ksubmap_init(struct kva_md_info *kmi) * And optionally transient bio space. */ if (bio_transient_maxcnt != 0) { - size = (long)bio_transient_maxcnt * maxphys; + size = round_page((long)bio_transient_maxcnt * maxphys); vmem_init(transient_arena, "transient arena", kmi->buffer_eva, size, PAGE_SIZE, 0, M_WAITOK); } diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index e7d7b6726d2c3c..a8b330c3eca89c 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -860,6 +860,7 @@ kmem_init(vm_offset_t start, vm_offset_t end) (vm_offset_t)vm_page_array + round_2mpage(vm_page_array_size * sizeof(struct vm_page)), VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); + pmap_pt_page_array_mark(); #endif vm_map_unlock(kernel_map); @@ -947,6 +948,14 @@ kmem_bootstrap_free(vm_offset_t start, vm_size_t size) end = trunc_page(start + size); start = round_page(start); + /* + * If rounding to page boundaries leaves us with nothing to free, + * just return now. vmem_add() will fail an assertion if we call it + * with a zero size. + */ + if (end <= start) + return; + #ifdef __amd64__ /* * Preloaded files do not have execute permissions by default on amd64. diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index abad5efb8a79ab..316b6e97b2b85f 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -621,8 +621,9 @@ vm_page_startup(vm_offset_t vaddr) last_pa = 0; vm_page_dump_pages = 0; for (i = 0; dump_avail[i + 1] != 0; i += 2) { - vm_page_dump_pages += howmany(dump_avail[i + 1], PAGE_SIZE) - - dump_avail[i] / PAGE_SIZE; + vm_page_dump_pages += + howmany(dump_avail[i + 1], MINIDUMP_PAGE_SIZE) - + dump_avail[i] / MINIDUMP_PAGE_SIZE; if (dump_avail[i + 1] > last_pa) last_pa = dump_avail[i + 1]; } @@ -5912,9 +5913,10 @@ DB_SHOW_COMMAND(pginfo, vm_page_print_pginfo) m = (vm_page_t)addr; db_printf( "page %p obj %p pidx 0x%jx phys 0x%jx q %d ref 0x%x\n" - " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%x dirty 0x%x\n", + " af 0x%x of 0x%x f 0x%x act %d busy %x valid 0x%jx dirty 0x%jx\n", m, m->object, (uintmax_t)m->pindex, (uintmax_t)m->phys_addr, m->a.queue, m->ref_count, m->a.flags, m->oflags, - m->flags, m->a.act_count, m->busy_lock, m->valid, m->dirty); + m->flags, m->a.act_count, m->busy_lock, (uintmax_t)m->valid, + (uintmax_t)m->dirty); } #endif /* DDB */ diff --git a/tests/sys/kern/sendfile_test.sh b/tests/sys/kern/sendfile_test.sh index 7e549eec610a7a..8eb046cefd185b 100755 --- a/tests/sys/kern/sendfile_test.sh +++ b/tests/sys/kern/sendfile_test.sh @@ -39,7 +39,7 @@ MD_DEVS="md.devs" MNT=mnt FILE=$MNT/file HELPER="$(atf_get_srcdir)/sendfile_helper" -BSIZE=4096 +BSIZE=65536 atf_test_case io_success cleanup io_success_head() diff --git a/tests/sys/vm/mmap_test.c b/tests/sys/vm/mmap_test.c index 6bc30f73ca95b6..27d02ae667fb4a 100644 --- a/tests/sys/vm/mmap_test.c +++ b/tests/sys/vm/mmap_test.c @@ -36,21 +36,6 @@ #include #include -static const struct { - void *addr; - int ok[2]; /* Depending on security.bsd.map_at_zero {0, !=0}. */ -} map_at_zero_tests[] = { - { (void *)0, { 0, 1 } }, /* Test sysctl. */ - { (void *)1, { 0, 0 } }, - { (void *)(PAGE_SIZE - 1), { 0, 0 } }, - { (void *)PAGE_SIZE, { 1, 1 } }, - { (void *)-1, { 0, 0 } }, - { (void *)(-PAGE_SIZE), { 0, 0 } }, - { (void *)(-1 - PAGE_SIZE), { 0, 0 } }, - { (void *)(-1 - PAGE_SIZE - 1), { 0, 0 } }, - { (void *)(0x1000 * PAGE_SIZE), { 1, 1 } }, -}; - #define MAP_AT_ZERO "security.bsd.map_at_zero" #ifdef __LP64__ @@ -68,6 +53,22 @@ ATF_TC_BODY(mmap__map_at_zero, tc) int map_at_zero; bool allow_wx; int prot_flags; + size_t pgsz = getpagesize(); + + const struct { + void *addr; + int ok[2]; /* Depending on security.bsd.map_at_zero {0, !=0}. */ + } map_at_zero_tests[] = { + { (void *)0, { 0, 1 } }, /* Test sysctl. */ + { (void *)1, { 0, 0 } }, + { (void *)(pgsz - 1), { 0, 0 } }, + { (void *)pgsz, { 1, 1 } }, + { (void *)-1, { 0, 0 } }, + { (void *)(-pgsz), { 0, 0 } }, + { (void *)(-1 - pgsz), { 0, 0 } }, + { (void *)(-1 - pgsz - 1), { 0, 0 } }, + { (void *)(0x1000 * pgsz), { 1, 1 } }, + }; len = sizeof(map_at_zero); if (sysctlbyname(MAP_AT_ZERO, &map_at_zero, &len, NULL, 0) == -1) {