From owner-svn-src-all@freebsd.org Sun Sep 29 22:41:07 2019 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 900EE12CE46; Sun, 29 Sep 2019 22:41:07 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) server-signature RSA-PSS (4096 bits) client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 46hL9H3Fh3z44J2; Sun, 29 Sep 2019 22:41:07 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 38D2C2111C; Sun, 29 Sep 2019 22:41:07 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id x8TMf74c091301; Sun, 29 Sep 2019 22:41:07 GMT (envelope-from markj@FreeBSD.org) Received: (from markj@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id x8TMf76K091300; Sun, 29 Sep 2019 22:41:07 GMT (envelope-from markj@FreeBSD.org) Message-Id: <201909292241.x8TMf76K091300@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: markj set sender to markj@FreeBSD.org using -f From: Mark Johnston Date: Sun, 29 Sep 2019 22:41:07 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org Subject: svn commit: r352877 - stable/12/sys/amd64/amd64 X-SVN-Group: stable-12 X-SVN-Commit-Author: markj X-SVN-Commit-Paths: stable/12/sys/amd64/amd64 X-SVN-Commit-Revision: 352877 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 29 Sep 2019 22:41:07 -0000 Author: markj Date: Sun Sep 29 22:41:06 2019 New Revision: 352877 URL: https://svnweb.freebsd.org/changeset/base/352877 Log: MFC r351728, r352581 (by kib), r352606 (by kib): Add a sysctl to dump kernel mappings and their properties on amd64. Modified: stable/12/sys/amd64/amd64/pmap.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/amd64/amd64/pmap.c ============================================================================== --- stable/12/sys/amd64/amd64/pmap.c Sun Sep 29 22:37:59 2019 (r352876) +++ stable/12/sys/amd64/amd64/pmap.c Sun Sep 29 22:41:06 2019 (r352877) @@ -124,6 +124,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -2083,6 +2084,41 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde) return (mask); } +static int +pmap_pat_index(pmap_t pmap, pt_entry_t pte, bool is_pde) +{ + int pat_flag, pat_idx; + + pat_idx = 0; + switch (pmap->pm_type) { + case PT_X86: + case PT_RVI: + /* The PAT bit is different for PTE's and PDE's. */ + pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; + + if ((pte & pat_flag) != 0) + pat_idx |= 0x4; + if ((pte & PG_NC_PCD) != 0) + pat_idx |= 0x2; + if ((pte & PG_NC_PWT) != 0) + pat_idx |= 0x1; + break; + case PT_EPT: + if ((pte & EPT_PG_IGNORE_PAT) != 0) + panic("EPT PTE %#lx has no PAT memory type", pte); + pat_idx = (pte & EPT_PG_MEMORY_TYPE(0x7)) >> 3; + break; + } + + /* See pmap_init_pat(). */ + if (pat_idx == 4) + pat_idx = 0; + if (pat_idx == 7) + pat_idx = 3; + + return (pat_idx); +} + bool pmap_ps_enabled(pmap_t pmap) { @@ -9907,6 +9943,271 @@ pmap_pkru_clear(pmap_t pmap, vm_offset_t sva, vm_offse return (error); } +/* + * Track a range of the kernel's virtual address space that is contiguous + * in various mapping attributes. + */ +struct pmap_kernel_map_range { + vm_offset_t sva; + pt_entry_t attrs; + int ptes; + int pdes; + int pdpes; +}; + +static void +sysctl_kmaps_dump(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t eva) +{ + const char *mode; + int i, pat_idx; + + if (eva <= range->sva) + return; + + pat_idx = pmap_pat_index(kernel_pmap, range->attrs, true); + for (i = 0; i < PAT_INDEX_SIZE; i++) + if (pat_index[i] == pat_idx) + break; + + switch (i) { + case PAT_WRITE_BACK: + mode = "WB"; + break; + case PAT_WRITE_THROUGH: + mode = "WT"; + break; + case PAT_UNCACHEABLE: + mode = "UC"; + break; + case PAT_UNCACHED: + mode = "U-"; + break; + case PAT_WRITE_PROTECTED: + mode = "WP"; + break; + case PAT_WRITE_COMBINING: + mode = "WC"; + break; + default: + printf("%s: unknown PAT mode %#x for range 0x%016lx-0x%016lx\n", + __func__, pat_idx, range->sva, eva); + mode = "??"; + break; + } + + sbuf_printf(sb, "0x%016lx-0x%016lx r%c%c%c%c %s %d %d %d\n", + range->sva, eva, + (range->attrs & X86_PG_RW) != 0 ? 'w' : '-', + (range->attrs & pg_nx) != 0 ? '-' : 'x', + (range->attrs & X86_PG_U) != 0 ? 'u' : 's', + (range->attrs & X86_PG_G) != 0 ? 'g' : '-', + mode, range->pdpes, range->pdes, range->ptes); + + /* Reset to sentinel value. */ + range->sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); +} + +/* + * Determine whether the attributes specified by a page table entry match those + * being tracked by the current range. This is not quite as simple as a direct + * flag comparison since some PAT modes have multiple representations. + */ +static bool +sysctl_kmaps_match(struct pmap_kernel_map_range *range, pt_entry_t attrs) +{ + pt_entry_t diff, mask; + + mask = X86_PG_G | X86_PG_RW | X86_PG_U | X86_PG_PDE_CACHE | pg_nx; + diff = (range->attrs ^ attrs) & mask; + if (diff == 0) + return (true); + if ((diff & ~X86_PG_PDE_PAT) == 0 && + pmap_pat_index(kernel_pmap, range->attrs, true) == + pmap_pat_index(kernel_pmap, attrs, true)) + return (true); + return (false); +} + +static void +sysctl_kmaps_reinit(struct pmap_kernel_map_range *range, vm_offset_t va, + pt_entry_t attrs) +{ + + memset(range, 0, sizeof(*range)); + range->sva = va; + range->attrs = attrs; +} + +/* + * Given a leaf PTE, derive the mapping's attributes. If they do not match + * those of the current run, dump the address range and its attributes, and + * begin a new run. + */ +static void +sysctl_kmaps_check(struct sbuf *sb, struct pmap_kernel_map_range *range, + vm_offset_t va, pml4_entry_t pml4e, pdp_entry_t pdpe, pd_entry_t pde, + pt_entry_t pte) +{ + pt_entry_t attrs; + + attrs = pml4e & (X86_PG_RW | X86_PG_U | pg_nx); + + attrs |= pdpe & pg_nx; + attrs &= pg_nx | (pdpe & (X86_PG_RW | X86_PG_U)); + if ((pdpe & PG_PS) != 0) { + attrs |= pdpe & (X86_PG_G | X86_PG_PDE_CACHE); + } else if (pde != 0) { + attrs |= pde & pg_nx; + attrs &= pg_nx | (pde & (X86_PG_RW | X86_PG_U)); + } + if ((pde & PG_PS) != 0) { + attrs |= pde & (X86_PG_G | X86_PG_PDE_CACHE); + } else if (pte != 0) { + attrs |= pte & pg_nx; + attrs &= pg_nx | (pte & (X86_PG_RW | X86_PG_U)); + attrs |= pte & (X86_PG_G | X86_PG_PTE_CACHE); + + /* Canonicalize by always using the PDE PAT bit. */ + if ((attrs & X86_PG_PTE_PAT) != 0) + attrs ^= X86_PG_PDE_PAT | X86_PG_PTE_PAT; + } + + if (range->sva > va || !sysctl_kmaps_match(range, attrs)) { + sysctl_kmaps_dump(sb, range, va); + sysctl_kmaps_reinit(range, va, attrs); + } +} + +static int +sysctl_kmaps(SYSCTL_HANDLER_ARGS) +{ + struct pmap_kernel_map_range range; + struct sbuf sbuf, *sb; + pml4_entry_t pml4e; + pdp_entry_t *pdp, pdpe; + pd_entry_t *pd, pde; + pt_entry_t *pt, pte; + vm_offset_t sva; + vm_paddr_t pa; + int error, i, j, k, l; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + sb = &sbuf; + sbuf_new_for_sysctl(sb, NULL, PAGE_SIZE, req); + + /* Sentinel value. */ + range.sva = KVADDR(NPML4EPG - 1, NPDPEPG - 1, NPDEPG - 1, NPTEPG - 1); + + /* + * Iterate over the kernel page tables without holding the kernel pmap + * lock. Outside of the large map, kernel page table pages are never + * freed, so at worst we will observe inconsistencies in the output. + * Within the large map, ensure that PDP and PD page addresses are + * valid before descending. + */ + for (sva = 0, i = pmap_pml4e_index(sva); i < NPML4EPG; i++) { + switch (i) { + case PML4PML4I: + sbuf_printf(sb, "\nRecursive map:\n"); + break; + case DMPML4I: + sbuf_printf(sb, "\nDirect map:\n"); + break; + case KPML4BASE: + sbuf_printf(sb, "\nKernel map:\n"); + break; + case LMSPML4I: + sbuf_printf(sb, "\nLarge map:\n"); + break; + } + + /* Convert to canonical form. */ + if (sva == 1ul << 47) + sva |= -1ul << 48; + +restart: + pml4e = kernel_pmap->pm_pml4[i]; + if ((pml4e & X86_PG_V) == 0) { + sva = rounddown2(sva, NBPML4); + sysctl_kmaps_dump(sb, &range, sva); + sva += NBPML4; + continue; + } + pa = pml4e & PG_FRAME; + pdp = (pdp_entry_t *)PHYS_TO_DMAP(pa); + + for (j = pmap_pdpe_index(sva); j < NPDPEPG; j++) { + pdpe = pdp[j]; + if ((pdpe & X86_PG_V) == 0) { + sva = rounddown2(sva, NBPDP); + sysctl_kmaps_dump(sb, &range, sva); + sva += NBPDP; + continue; + } + pa = pdpe & PG_FRAME; + if (PMAP_ADDRESS_IN_LARGEMAP(sva) && + vm_phys_paddr_to_vm_page(pa) == NULL) + goto restart; + if ((pdpe & PG_PS) != 0) { + sva = rounddown2(sva, NBPDP); + sysctl_kmaps_check(sb, &range, sva, pml4e, pdpe, + 0, 0); + range.pdpes++; + sva += NBPDP; + continue; + } + pd = (pd_entry_t *)PHYS_TO_DMAP(pa); + + for (k = pmap_pde_index(sva); k < NPDEPG; k++) { + pde = pd[k]; + if ((pde & X86_PG_V) == 0) { + sva = rounddown2(sva, NBPDR); + sysctl_kmaps_dump(sb, &range, sva); + sva += NBPDR; + continue; + } + pa = pde & PG_FRAME; + if (PMAP_ADDRESS_IN_LARGEMAP(sva) && + vm_phys_paddr_to_vm_page(pa) == NULL) + goto restart; + if ((pde & PG_PS) != 0) { + sva = rounddown2(sva, NBPDR); + sysctl_kmaps_check(sb, &range, sva, + pml4e, pdpe, pde, 0); + range.pdes++; + sva += NBPDR; + continue; + } + pt = (pt_entry_t *)PHYS_TO_DMAP(pa); + + for (l = pmap_pte_index(sva); l < NPTEPG; l++, + sva += PAGE_SIZE) { + pte = pt[l]; + if ((pte & X86_PG_V) == 0) { + sysctl_kmaps_dump(sb, &range, + sva); + continue; + } + sysctl_kmaps_check(sb, &range, sva, + pml4e, pdpe, pde, pte); + range.ptes++; + } + } + } + } + + error = sbuf_finish(sb); + sbuf_delete(sb); + return (error); +} +SYSCTL_OID(_vm_pmap, OID_AUTO, kernel_maps, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, + NULL, 0, sysctl_kmaps, "A", + "Dump kernel address layout"); + #ifdef DDB DB_SHOW_COMMAND(pte, pmap_print_pte) { @@ -9930,25 +10231,25 @@ DB_SHOW_COMMAND(pte, pmap_print_pte) PG_V = pmap_valid_bit(pmap); pml4 = pmap_pml4e(pmap, va); - db_printf("VA %#016lx pml4e %#016lx", va, *pml4); + db_printf("VA 0x%016lx pml4e 0x%016lx", va, *pml4); if ((*pml4 & PG_V) == 0) { db_printf("\n"); return; } pdp = pmap_pml4e_to_pdpe(pml4, va); - db_printf(" pdpe %#016lx", *pdp); + db_printf(" pdpe 0x%016lx", *pdp); if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) { db_printf("\n"); return; } pde = pmap_pdpe_to_pde(pdp, va); - db_printf(" pde %#016lx", *pde); + db_printf(" pde 0x%016lx", *pde); if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) { db_printf("\n"); return; } pte = pmap_pde_to_pte(pde, va); - db_printf(" pte %#016lx\n", *pte); + db_printf(" pte 0x%016lx\n", *pte); } DB_SHOW_COMMAND(phys2dmap, pmap_phys2dmap)