Date: Sun, 29 Sep 2019 22:41:07 +0000 (UTC) From: Mark Johnston <markj@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r352877 - stable/12/sys/amd64/amd64 Message-ID: <201909292241.x8TMf76K091300@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: markj Date: Sun Sep 29 22:41:06 2019 New Revision: 352877 URL: https://svnweb.freebsd.org/changeset/base/352877 Log: MFC r351728, r352581 (by kib), r352606 (by kib): Add a sysctl to dump kernel mappings and their properties on amd64. Modified: stable/12/sys/amd64/amd64/pmap.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/amd64/amd64/pmap.c ============================================================================== --- stable/12/sys/amd64/amd64/pmap.c Sun Sep 29 22:37:59 2019 (r352876) +++ stable/12/sys/amd64/amd64/pmap.c Sun Sep 29 22:41:06 2019 (r352877) @@ -124,6 +124,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/rangeset.h> #include <sys/rwlock.h> +#include <sys/sbuf.h> #include <sys/sx.h> #include <sys/turnstile.h> #include <sys/vmem.h> @@ -2083,6 +2084,41 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde) return (mask); } +static int +pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde) +{ + int pat_flag, pat_idx; + + pat_idx = 0; + switch (pmap->pm_type) { + case PT_X86: + case PT_RVI: + /* The PAT bit is different for PTE's and PDE's. */ + pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; + + if ((pte & pat_flag) != 0) + pat_idx |= 0x4; + if ((pte & PG_NC_PCD) != 0) + pat_idx |= 0x2; + if ((pte & PG_NC_PWT) != 0) + pat_idx |= 0x1; + break; + case PT_EPT: + if ((pte & EPT_PG_IGNORE_PAT) != 0) + panic("EPT PTE %#lx has no PAT memory type", pte); + pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3; + break; + } + + /* See pmap_init_pat(). */ + if (pat_idx == 4) + pat_idx = 0; + if (pat_idx == 7) + pat_idx = 3; + + return (pat_idx); +} + bool pmap_ps_enabled(pmap_t pmap) { @@ -9907,6 +9943,271 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offse return (error); } +/* + * Track a range of the kernel's virtual address space that is contiguous + * in various mapping attributes. + */ +struct pmap_kernel_map_range { + vm_offset_t sva; + pt_entry_t attrs; + int ptes; + int pdes; + int pdpes; +}; + +static void +sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t eva) +{ + const char *mode; + int i, pat_idx; + + if (eva <= range->sva) + return; + + pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true); + for (i = 0; i < PAT_INDEX_SIZE; i++) + if (pat_index[i] == pat_idx) + break; + + switch (i) { + case PAT_WRITE_BACK: + mode = "WB"; + break; + case PAT_WRITE_THROUGH: + mode = "WT"; + break; + case PAT_UNCACHEABLE: + mode = "UC"; + break; + case PAT_UNCACHED: + mode = "U-"; + break; + case PAT_WRITE_PROTECTED: + mode = "WP"; + break; + case PAT_WRITE_COMBINING: + mode = "WC"; + break; + default: + printf("%s: unknown PAT mode %#x for range 0x%016lx-0x%016lx\n", + __func__, pat_idx, range->sva, eva); + mode = "??"; + break; + } + + sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %s %d %d %d\n", + range->sva, eva, + (range->attrs & X86_PG_RW) != 0 ? 'w' : '-', + (range->attrs & pg_nx) != 0 ? '-' : 'x', + (range->attrs & X86_PG_U) != 0 ? 'u' : 's', + (range->attrs & X86_PG_G) != 0 ? 'g' : '-', + mode, range->pdpes, range->pdes, range->ptes); + + /* Reset to sentinel value. */ + range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); +} + +/* + * Determine whether the attributes specified by a page table entry match those + * being tracked by the current range. This is not quite as simple as a direct + * flag comparison since some PAT modes have multiple representations. + */ +static bool +sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs) +{ + pt_entry_t diff, mask; + + mask = X86_PG_G | X86_PG_RW | X86_PG_U | X86_PG_PDE_CACHE | pg_nx; + diff = (range->attrs ^ attrs) & mask; + if (diff == 0) + return (true); + if ((diff & ~X86_PG_PDE_PAT) == 0 && + pmap_pat_index(kernel_pmap, range->attrs, true) == + pmap_pat_index(kernel_pmap, attrs, true)) + return (true); + return (false); +} + +static void +sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, + pt_entry_t attrs) +{ + + memset(range, 0, sizeof(*range)); + range->sva = va; + range->attrs = attrs; +} + +/* + * Given a leaf PTE, derive the mapping's attributes. If they do not match + * those of the current run, dump the address range and its attributes, and + * begin a new run. + */ +static void +sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde, + pt_entry_t pte) +{ + pt_entry_t attrs; + + attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx); + + attrs |= pdpe & pg_nx; + attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U)); + if ((pdpe & PG_PS) != 0) { + attrs |= pdpe & (X86_PG_G | X86_PG_PDE_CACHE); + } else if (pde != 0) { + attrs |= pde & pg_nx; + attrs &= pg_nx | (pde & (X86_PG_RW | X86_PG_U)); + } + if ((pde & PG_PS) != 0) { + attrs |= pde & (X86_PG_G | X86_PG_PDE_CACHE); + } else if (pte != 0) { + attrs |= pte & pg_nx; + attrs &= pg_nx | (pte & (X86_PG_RW | X86_PG_U)); + attrs |= pte & (X86_PG_G | X86_PG_PTE_CACHE); + + /* Canonicalize by always using the PDE PAT bit. */ + if ((attrs & X86_PG_PTE_PAT) != 0) + attrs ^= X86_PG_PDE_PAT | X86_PG_PTE_PAT; + } + + if (range->sva > va || !sysctl_kmaps_match(range, attrs)) { + sysctl_kmaps_dump(sb, range, va); + sysctl_kmaps_reinit(range, va, attrs); + } +} + +static int +sysctl_kmaps(SYSCTL_HANDLER_ARGS) +{ + struct pmap_kernel_map_range range; + struct sbuf sbuf, *sb; + pml4_entry_t pml4e; + pdp_entry_t *pdp, pdpe; + pd_entry_t *pd, pde; + pt_entry_t *pt, pte; + vm_offset_t sva; + vm_paddr_t pa; + int error, i, j, k, l; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + sb = &sbuf; + sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); + + /* Sentinel value. */ + range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); + + /* + * Iterate over the kernel page tables without holding the kernel pmap + * lock. Outside of the large map, kernel page table pages are never + * freed, so at worst we will observe inconsistencies in the output. + * Within the large map, ensure that PDP and PD page addresses are + * valid before descending. + */ + for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) { + switch (i) { + case PML4PML4I: + sbuf_printf(sb, "\nRecursive map:\n"); + break; + case DMPML4I: + sbuf_printf(sb, "\nDirect map:\n"); + break; + case KPML4BASE: + sbuf_printf(sb, "\nKernel map:\n"); + break; + case LMSPML4I: + sbuf_printf(sb, "\nLarge map:\n"); + break; + } + + /* Convert to canonical form. */ + if (sva == 1ul << 47) + sva |= -1ul << 48; + +restart: + pml4e = kernel_pmap->pm_pml4[i]; + if ((pml4e & X86_PG_V) == 0) { + sva = rounddown2(sva, NBPML4); + sysctl_kmaps_dump(sb, &range, sva); + sva += NBPML4; + continue; + } + pa = pml4e & PG_FRAME; + pdp = (pdp_entry_t *)PHYS_TO_DMAP(pa); + + for (j = pmap_pdpe_index(sva); j < NPDPEPG; j++) { + pdpe = pdp[j]; + if ((pdpe & X86_PG_V) == 0) { + sva = rounddown2(sva, NBPDP); + sysctl_kmaps_dump(sb, &range, sva); + sva += NBPDP; + continue; + } + pa = pdpe & PG_FRAME; + if (PMAP_ADDRESS_IN_LARGEMAP(sva) && + vm_phys_paddr_to_vm_page(pa) == NULL) + goto restart; + if ((pdpe & PG_PS) != 0) { + sva = rounddown2(sva, NBPDP); + sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe, + 0, 0); + range.pdpes++; + sva += NBPDP; + continue; + } + pd = (pd_entry_t *)PHYS_TO_DMAP(pa); + + for (k = pmap_pde_index(sva); k < NPDEPG; k++) { + pde = pd[k]; + if ((pde & X86_PG_V) == 0) { + sva = rounddown2(sva, NBPDR); + sysctl_kmaps_dump(sb, &range, sva); + sva += NBPDR; + continue; + } + pa = pde & PG_FRAME; + if (PMAP_ADDRESS_IN_LARGEMAP(sva) && + vm_phys_paddr_to_vm_page(pa) == NULL) + goto restart; + if ((pde & PG_PS) != 0) { + sva = rounddown2(sva, NBPDR); + sysctl_kmaps_check(sb, &range, sva, + pml4e, pdpe, pde, 0); + range.pdes++; + sva += NBPDR; + continue; + } + pt = (pt_entry_t *)PHYS_TO_DMAP(pa); + + for (l = pmap_pte_index(sva); l < NPTEPG; l++, + sva += PAGE_SIZE) { + pte = pt[l]; + if ((pte & X86_PG_V) == 0) { + sysctl_kmaps_dump(sb, &range, + sva); + continue; + } + sysctl_kmaps_check(sb, &range, sva, + pml4e, pdpe, pde, pte); + range.ptes++; + } + } + } + } + + error = sbuf_finish(sb); + sbuf_delete(sb); + return (error); +} +SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_kmaps, "A", + "Dump kernel address layout"); + #ifdef DDB DB_SHOW_COMMAND(pte, pmap_print_pte) { @@ -9930,25 +10231,25 @@ DB_SHOW_COMMAND(pte, pmap_print_pte) PG_V = pmap_valid_bit(pmap); pml4 = pmap_pml4e(pmap, va); - db_printf("VA %#016lx pml4e %#016lx", va, *pml4); + db_printf("VA 0x%016lx pml4e 0x%016lx", va, *pml4); if ((*pml4 & PG_V) == 0) { db_printf("\n"); return; } pdp = pmap_pml4e_to_pdpe(pml4, va); - db_printf(" pdpe %#016lx", *pdp); + db_printf(" pdpe 0x%016lx", *pdp); if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) { db_printf("\n"); return; } pde = pmap_pdpe_to_pde(pdp, va); - db_printf(" pde %#016lx", *pde); + db_printf(" pde 0x%016lx", *pde); if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) { db_printf("\n"); return; } pte = pmap_pde_to_pte(pde, va); - db_printf(" pte %#016lx\n", *pte); + db_printf(" pte 0x%016lx\n", *pte); } DB_SHOW_COMMAND(phys2dmap, pmap_phys2dmap)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201909292241.x8TMf76K091300>