From owner-svn-src-head@freebsd.org Wed Sep 9 21:08:46 2020 Return-Path: Delivered-To: svn-src-head@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 601CD3DCCBE; Wed, 9 Sep 2020 21:08:46 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4Bmvl21zTXz3bwS; Wed, 9 Sep 2020 21:08:46 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 2861627A55; Wed, 9 Sep 2020 21:08:46 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 089L8kG9070389; Wed, 9 Sep 2020 21:08:46 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 089L8kXW070388; Wed, 9 Sep 2020 21:08:46 GMT (envelope-from kib@FreeBSD.org) Message-Id: <202009092108.089L8kXW070388@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Wed, 9 Sep 2020 21:08:46 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r365512 - head/sys/amd64/amd64 X-SVN-Group: head X-SVN-Commit-Author: kib X-SVN-Commit-Paths: head/sys/amd64/amd64 X-SVN-Commit-Revision: 365512 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 09 Sep 2020 21:08:46 -0000 Author: kib Date: Wed Sep 9 21:08:45 2020 New Revision: 365512 URL: https://svnweb.freebsd.org/changeset/base/365512 Log: amd64 pmap: teach functions walking user page tables about PG_PS bit in PDPE. Only unmanaged 1G superpages are handled. Reviewed by: markj Tested by: pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D24652 Modified: head/sys/amd64/amd64/pmap.c Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Wed Sep 9 21:01:36 2020 (r365511) +++ head/sys/amd64/amd64/pmap.c Wed Sep 9 21:08:45 2020 (r365512) @@ -48,7 +48,7 @@ */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. - * Copyright (c) 2014-2019 The FreeBSD Foundation + * Copyright (c) 2014-2020 The FreeBSD Foundation * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, @@ -1415,6 +1415,8 @@ pmap_pde(pmap_t pmap, vm_offset_t va) pdpe = pmap_pdpe(pmap, va); if (pdpe == NULL || (*pdpe & PG_V) == 0) return (NULL); + KASSERT((*pdpe & PG_PS) == 0, + ("pmap_pde for 1G page, pmap %p va %#lx", pmap, va)); return (pmap_pdpe_to_pde(pdpe, va)); } @@ -3570,6 +3572,7 @@ pmap_extract(pmap_t pmap, vm_offset_t va) vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { + pdp_entry_t pdpe, *pdpep; pd_entry_t pde, *pdep; pt_entry_t pte, PG_RW, PG_V; vm_page_t m; @@ -3577,23 +3580,38 @@ pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_ m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); - PMAP_LOCK(pmap); - pdep = pmap_pde(pmap, va); - if (pdep != NULL && (pde = *pdep)) { - if (pde & PG_PS) { - if ((pde & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0) - m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | - (va & PDRMASK)); - } else { - pte = *pmap_pde_to_pte(pdep, va); - if ((pte & PG_V) != 0 && - ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)) - m = PHYS_TO_VM_PAGE(pte & PG_FRAME); - } - if (m != NULL && !vm_page_wire_mapped(m)) - m = NULL; + + pdpep = pmap_pdpe(pmap, va); + if (pdpep == NULL || ((pdpe = *pdpep) & PG_V) == 0) + goto out; + if ((pdpe & PG_PS) != 0) { + if ((pdpe & PG_RW) == 0 && (prot & VM_PROT_WRITE) != 0) + goto out; + m = PHYS_TO_VM_PAGE((pdpe & PG_PS_FRAME) | (va & PDPMASK)); + goto check_page; } + + pdep = pmap_pdpe_to_pde(pdpep, va); + if (pdep == NULL || ((pde = *pdep) & PG_V) == 0) + goto out; + if ((pde & PG_PS) != 0) { + if ((pde & PG_RW) == 0 && (prot & VM_PROT_WRITE) != 0) + goto out; + m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | (va & PDRMASK)); + goto check_page; + } + + pte = *pmap_pde_to_pte(pdep, va); + if ((pte & PG_V) == 0 || + ((pte & PG_RW) == 0 && (prot & VM_PROT_WRITE) != 0)) + goto out; + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); + +check_page: + if (m != NULL && !vm_page_wire_mapped(m)) + m = NULL; +out: PMAP_UNLOCK(pmap); return (m); } @@ -5854,6 +5872,7 @@ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; + vm_page_t mt; vm_offset_t va_next; pml5_entry_t *pml5e; pml4_entry_t *pml4e; @@ -5917,13 +5936,28 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); + va_next = (sva + NBPDP) & ~PDPMASK; if ((*pdpe & PG_V) == 0) { - va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } + KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, + ("pmap_remove of non-transient 1G page " + "pdpe %#lx sva %#lx eva %#lx va_next %#lx", + *pdpe, sva, eva, va_next)); + if ((*pdpe & PG_PS) != 0) { + MPASS(pmap != kernel_pmap); /* XXXKIB */ + MPASS((*pdpe & (PG_MANAGED | PG_G)) == 0); + anyvalid = 1; + *pdpe = 0; + pmap_resident_count_dec(pmap, NBPDP / PAGE_SIZE); + mt = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, sva) & PG_FRAME); + pmap_unwire_ptp(pmap, sva, mt, &free); + continue; + } + /* * Calculate index for next page table. */ @@ -6139,11 +6173,13 @@ retry: void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { + vm_page_t m; vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V; + pt_entry_t obits, pbits; boolean_t anychanged; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); @@ -6193,13 +6229,36 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); + va_next = (sva + NBPDP) & ~PDPMASK; if ((*pdpe & PG_V) == 0) { - va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } + KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, + ("pmap_remove of non-transient 1G page " + "pdpe %#lx sva %#lx eva %#lx va_next %#lx", + *pdpe, sva, eva, va_next)); + if ((*pdpe & PG_PS) != 0) { +retry_pdpe: + obits = pbits = *pdpe; + MPASS((pbits & (PG_MANAGED | PG_G)) == 0); + MPASS(pmap != kernel_pmap); /* XXXKIB */ + if ((prot & VM_PROT_WRITE) == 0) + pbits &= ~(PG_RW | PG_M); + if ((prot & VM_PROT_EXECUTE) == 0) + pbits |= pg_nx; + + if (pbits != obits) { + if (!atomic_cmpset_long(pdpe, obits, pbits)) + /* PG_PS cannot be cleared under us, */ + goto retry_pdpe; + anychanged = TRUE; + } + continue; + } + va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; @@ -6242,9 +6301,6 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { - pt_entry_t obits, pbits; - vm_page_t m; - retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) @@ -7184,9 +7240,10 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t *pde; - pt_entry_t *pte, PG_V; + pt_entry_t *pte, PG_V, PG_G; PG_V = pmap_valid_bit(pmap); + PG_G = pmap_global_bit(pmap); PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); @@ -7197,12 +7254,23 @@ pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); - if ((*pdpe & PG_V) == 0) { - va_next = (sva + NBPDP) & ~PDPMASK; - if (va_next < sva) - va_next = eva; + va_next = (sva + NBPDP) & ~PDPMASK; + if (va_next < sva) + va_next = eva; + if ((*pdpe & PG_V) == 0) continue; + KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, + ("pmap_unwire of non-transient 1G page " + "pdpe %#lx sva %#lx eva %#lx va_next %#lx", + *pdpe, sva, eva, va_next)); + if ((*pdpe & PG_PS) != 0) { + MPASS(pmap != kernel_pmap); /* XXXKIB */ + MPASS((*pdpe & (PG_MANAGED | PG_G)) == 0); + atomic_clear_long(pdpe, PG_W); + pmap->pm_stats.wired_count -= NBPDP / PAGE_SIZE; + continue; } + va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; @@ -7319,6 +7387,12 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_ } va_next = (addr + NBPDR) & ~PDRMASK; + KASSERT((*pdpe & PG_PS) == 0 || va_next <= end_addr, + ("pmap_copy of partial non-transient 1G page " + "pdpe %#lx sva %#lx eva %#lx va_next %#lx", + *pdpe, addr, end_addr, va_next)); + if ((*pdpe & PG_PS) != 0) + continue; if (va_next < addr) va_next = end_addr; @@ -8375,6 +8449,12 @@ pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; + KASSERT((*pdpe & PG_PS) == 0 || va_next <= eva, + ("pmap_advise of non-transient 1G page " + "pdpe %#lx sva %#lx eva %#lx va_next %#lx", + *pdpe, sva, eva, va_next)); + if ((*pdpe & PG_PS) != 0) + continue; pde = pmap_pdpe_to_pde(pdpe, sva); oldpde = *pde; if ((oldpde & PG_V) == 0) @@ -9136,6 +9216,7 @@ pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boole int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *pap) { + pdp_entry_t *pdpe; pd_entry_t *pdep; pt_entry_t pte, PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa; @@ -9147,23 +9228,32 @@ pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t PG_RW = pmap_rw_bit(pmap); PMAP_LOCK(pmap); - pdep = pmap_pde(pmap, addr); - if (pdep != NULL && (*pdep & PG_V)) { - if (*pdep & PG_PS) { - pte = *pdep; - /* Compute the physical address of the 4KB page. */ - pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & + pte = 0; + pa = 0; + val = 0; + pdpe = pmap_pdpe(pmap, addr); + if ((*pdpe & PG_V) != 0) { + if ((*pdpe & PG_PS) != 0) { + pte = *pdpe; + pa = ((pte & PG_PS_PDP_FRAME) | (addr & PDPMASK)) & PG_FRAME; - val = MINCORE_PSIND(1); + val = MINCORE_PSIND(2); } else { - pte = *pmap_pde_to_pte(pdep, addr); - pa = pte & PG_FRAME; - val = 0; + pdep = pmap_pde(pmap, addr); + if (pdep != NULL && (*pdep & PG_V) != 0) { + if ((*pdep & PG_PS) != 0) { + pte = *pdep; + /* Compute the physical address of the 4KB page. */ + pa = ((pte & PG_PS_FRAME) | (addr & + PDRMASK)) & PG_FRAME; + val = MINCORE_PSIND(1); + } else { + pte = *pmap_pde_to_pte(pdep, addr); + pa = pte & PG_FRAME; + val = 0; + } + } } - } else { - pte = 0; - pa = 0; - val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE;