From owner-svn-src-all@freebsd.org Thu Mar 31 11:07:25 2016 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id E1A0EAE4A19; Thu, 31 Mar 2016 11:07:25 +0000 (UTC) (envelope-from andrew@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 8D7751D98; Thu, 31 Mar 2016 11:07:25 +0000 (UTC) (envelope-from andrew@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u2VB7OQn052562; Thu, 31 Mar 2016 11:07:24 GMT (envelope-from andrew@FreeBSD.org) Received: (from andrew@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u2VB7OWn052556; Thu, 31 Mar 2016 11:07:24 GMT (envelope-from andrew@FreeBSD.org) Message-Id: <201603311107.u2VB7OWn052556@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: andrew set sender to andrew@FreeBSD.org using -f From: Andrew Turner Date: Thu, 31 Mar 2016 11:07:24 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r297446 - in head/sys/arm64: arm64 include X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 31 Mar 2016 11:07:26 -0000 Author: andrew Date: Thu Mar 31 11:07:24 2016 New Revision: 297446 URL: https://svnweb.freebsd.org/changeset/base/297446 Log: Add support for 4 level pagetables. The userland address space has been increased to 256TiB. The kernel address space can also be increased to be the same size, but this will be performed in a later change. To help work with an extra level of page tables two new functions have been added, one to file the lowest level table entry, and one to find the block/page level. Both of these find the entry for a given pmap and virtual address. This has been tested with a combination of buildworld, stress2 tests, and by using sort to consume a large amount of memory by sorting /dev/zero. No new issues are known to be present from this change. Reviewed by: kib Obtained from: ABT Systems Ltd Relnotes: yes Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D5720 Modified: head/sys/arm64/arm64/genassym.c head/sys/arm64/arm64/locore.S head/sys/arm64/arm64/machdep.c head/sys/arm64/arm64/minidump_machdep.c head/sys/arm64/arm64/pmap.c head/sys/arm64/arm64/swtch.S head/sys/arm64/arm64/vm_machdep.c head/sys/arm64/include/machdep.h head/sys/arm64/include/pcb.h head/sys/arm64/include/pmap.h head/sys/arm64/include/pte.h head/sys/arm64/include/vmparam.h Modified: head/sys/arm64/arm64/genassym.c ============================================================================== --- head/sys/arm64/arm64/genassym.c Thu Mar 31 09:55:21 2016 (r297445) +++ head/sys/arm64/arm64/genassym.c Thu Mar 31 11:07:24 2016 (r297446) @@ -52,7 +52,7 @@ ASSYM(PCB_SIZE, roundup2(sizeof(struct p ASSYM(PCB_SINGLE_STEP_SHIFT, PCB_SINGLE_STEP_SHIFT); ASSYM(PCB_REGS, offsetof(struct pcb, pcb_x)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); -ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); +ASSYM(PCB_L0ADDR, offsetof(struct pcb, pcb_l0addr)); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_FLAGS, offsetof(struct pcb, pcb_flags)); Modified: head/sys/arm64/arm64/locore.S ============================================================================== --- head/sys/arm64/arm64/locore.S Thu Mar 31 09:55:21 2016 (r297445) +++ head/sys/arm64/arm64/locore.S Thu Mar 31 11:07:24 2016 (r297446) @@ -35,7 +35,7 @@ #include #include -#define VIRT_BITS 39 +#define VIRT_BITS 48 .globl kernbase .set kernbase, KERNBASE @@ -89,7 +89,8 @@ _start: /* * At this point: * x27 = TTBR0 table - * x26 = TTBR1 table + * x26 = Kernel L1 table + * x24 = TTBR1 table */ /* Enable the mmu */ @@ -100,16 +101,6 @@ _start: br x15 virtdone: - /* - * Now that we are in virtual address space, - * we don't need the identity mapping in TTBR0 and - * can set the TCR to a more useful value. - */ - ldr x2, tcr - mrs x3, id_aa64mmfr0_el1 - bfi x2, x3, #32, #3 - msr tcr_el1, x2 - /* Set up the stack */ adr x25, initstack_end mov sp, x25 @@ -128,6 +119,7 @@ virtdone: /* Make the page table base a virtual address */ sub x26, x26, x29 + sub x24, x24, x29 sub sp, sp, #(64 * 4) mov x0, sp @@ -139,6 +131,7 @@ virtdone: str x26, [x0, 8] /* kern_l1pt */ str x29, [x0, 16] /* kern_delta */ str x25, [x0, 24] /* kern_stack */ + str x24, [x0, 32] /* kern_l0pt */ /* trace back starts here */ mov fp, #0 @@ -175,7 +168,7 @@ ENTRY(mpentry) msr contextidr_el1, x1 /* Load the kernel page table */ - adr x26, pagetable_l1_ttbr1 + adr x24, pagetable_l0_ttbr1 /* Load the identity page table */ adr x27, pagetable_l0_ttbr0 @@ -187,16 +180,6 @@ ENTRY(mpentry) br x15 mp_virtdone: - /* - * Now that we are in virtual address space, - * we don't need the identity mapping in TTBR0 and - * can set the TCR to a more useful value. - */ - ldr x2, tcr - mrs x3, id_aa64mmfr0_el1 - bfi x2, x3, #32, #3 - msr tcr_el1, x2 - ldr x4, =secondary_stacks mov x5, #(PAGE_SIZE * KSTACK_PAGES) mul x5, x0, x5 @@ -388,11 +371,18 @@ create_pagetables: mov x6, x26 bl link_l1_pagetable + /* Move to the l0 table */ + add x24, x26, #PAGE_SIZE + + /* Link the l0 -> l1 table */ + mov x9, x6 + mov x6, x24 + bl link_l0_pagetable /* * Build the TTBR0 maps. */ - add x27, x26, #PAGE_SIZE + add x27, x24, #PAGE_SIZE mov x6, x27 /* The initial page table */ #if defined(SOCDEV_PA) && defined(SOCDEV_VA) @@ -440,7 +430,7 @@ link_l0_pagetable: */ /* Find the table index */ lsr x11, x8, #L0_SHIFT - and x11, x11, #Ln_ADDR_MASK + and x11, x11, #L0_ADDR_MASK /* Build the L0 block entry */ mov x12, #L0_TABLE @@ -582,7 +572,7 @@ start_mmu: /* Load ttbr0 and ttbr1 */ msr ttbr0_el1, x27 - msr ttbr1_el1, x26 + msr ttbr1_el1, x24 isb /* Clear the Monitor Debug System control register */ @@ -596,11 +586,8 @@ start_mmu: /* * Setup TCR according to PARange bits from ID_AA64MMFR0_EL1. - * Some machines have physical memory mapped >512GiB, which can not - * be identity-mapped using the default 39 VA bits. Thus, use - * 48 VA bits for now and switch back to 39 after the VA jump. */ - ldr x2, tcr_early + ldr x2, tcr mrs x3, id_aa64mmfr0_el1 bfi x2, x3, #32, #3 msr tcr_el1, x2 @@ -623,9 +610,6 @@ mair: tcr: .quad (TCR_TxSZ(64 - VIRT_BITS) | TCR_ASID_16 | TCR_TG1_4K | \ TCR_CACHE_ATTRS | TCR_SMP_ATTRS) -tcr_early: - .quad (TCR_T1SZ(64 - VIRT_BITS) | TCR_T0SZ(64 - 48) | \ - TCR_ASID_16 | TCR_TG1_4K | TCR_CACHE_ATTRS | TCR_SMP_ATTRS) sctlr_set: /* Bits to set */ .quad (SCTLR_UCI | SCTLR_nTWE | SCTLR_nTWI | SCTLR_UCT | SCTLR_DZE | \ @@ -651,6 +635,8 @@ pagetable: .space PAGE_SIZE pagetable_l1_ttbr1: .space PAGE_SIZE +pagetable_l0_ttbr1: + .space PAGE_SIZE pagetable_l1_ttbr0: .space PAGE_SIZE pagetable_l0_ttbr0: Modified: head/sys/arm64/arm64/machdep.c ============================================================================== --- head/sys/arm64/arm64/machdep.c Thu Mar 31 09:55:21 2016 (r297445) +++ head/sys/arm64/arm64/machdep.c Thu Mar 31 11:07:24 2016 (r297446) @@ -896,8 +896,8 @@ initarm(struct arm64_bootparams *abp) cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ - pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta, - lastaddr - KERNBASE); + pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, + KERNBASE - abp->kern_delta, lastaddr - KERNBASE); arm_devmap_bootstrap(0, NULL); Modified: head/sys/arm64/arm64/minidump_machdep.c ============================================================================== --- head/sys/arm64/arm64/minidump_machdep.c Thu Mar 31 09:55:21 2016 (r297445) +++ head/sys/arm64/arm64/minidump_machdep.c Thu Mar 31 11:07:24 2016 (r297446) @@ -218,7 +218,7 @@ blk_write(struct dumperinfo *di, char *p int minidumpsys(struct dumperinfo *di) { - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; pt_entry_t *l3; uint32_t pmapsize; vm_offset_t va; @@ -236,7 +236,7 @@ minidumpsys(struct dumperinfo *di) pmapsize = 0; for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { pmapsize += PAGE_SIZE; - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) continue; /* We should always be using the l2 table for kvm */ @@ -335,7 +335,7 @@ minidumpsys(struct dumperinfo *di) /* Dump kernel page directory pages */ bzero(&tmpbuffer, sizeof(tmpbuffer)); for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { - if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) { + if (!pmap_get_tables(pmap_kernel(), va, &l0, &l1, &l2, &l3)) { /* We always write a page, even if it is zero */ error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); if (error) Modified: head/sys/arm64/arm64/pmap.c ============================================================================== --- head/sys/arm64/arm64/pmap.c Thu Mar 31 09:55:21 2016 (r297445) +++ head/sys/arm64/arm64/pmap.c Thu Mar 31 11:07:24 2016 (r297446) @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. - * Copyright (c) 2014 The FreeBSD Foundation + * Copyright (c) 2014-2016 The FreeBSD Foundation * All rights reserved. * * This code is derived from software contributed to Berkeley by @@ -142,9 +142,14 @@ __FBSDID("$FreeBSD$"); #include #include -#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) -#define NUPDE (NPDEPG * NPDEPG) -#define NUSERPGTBLS (NUPDE + NPDEPG) +#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) +#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) + +#define NUL0E L0_ENTRIES +#define NUL1E (NUL0E * NL1PG) +#define NUL2E (NUL1E * NL2PG) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ @@ -273,15 +278,37 @@ pagezero(void *p) bzero(p, PAGE_SIZE); } +#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) static __inline pd_entry_t * +pmap_l0(pmap_t pmap, vm_offset_t va) +{ + + return (&pmap->pm_l0[pmap_l0_index(va)]); +} + +static __inline pd_entry_t * +pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) +{ + pd_entry_t *l1; + + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + return (&l1[pmap_l1_index(va)]); +} + +static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) + return (NULL); - return (&pmap->pm_l1[pmap_l1_index(va)]); + return (pmap_l0_to_l1(l0, va)); } static __inline pd_entry_t * @@ -314,28 +341,103 @@ pmap_l2_to_l3(pd_entry_t *l2, vm_offset_ return (&l3[pmap_l3_index(va)]); } +/* + * Returns the lowest valid pde for a given virtual address. + * The next level may or may not point to a valid page or block. + */ +static __inline pd_entry_t * +pmap_pde(pmap_t pmap, vm_offset_t va, int *level) +{ + pd_entry_t *l0, *l1, *l2, desc; + + l0 = pmap_l0(pmap, va); + desc = pmap_load(l0) & ATTR_DESCR_MASK; + if (desc != L0_TABLE) { + *level = -1; + return (NULL); + } + + l1 = pmap_l0_to_l1(l0, va); + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc != L1_TABLE) { + *level = 0; + return (l0); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc != L2_TABLE) { + *level = 1; + return (l1); + } + + *level = 2; + return (l2); +} + +/* + * Returns the lowest valid pte block or table entry for a given virtual + * address. If there are no valid entries return NULL and set the level to + * the first invalid level. + */ static __inline pt_entry_t * -pmap_l3(pmap_t pmap, vm_offset_t va) +pmap_pte(pmap_t pmap, vm_offset_t va, int *level) { - pd_entry_t *l2; + pd_entry_t *l1, *l2, desc; + pt_entry_t *l3; - l2 = pmap_l2(pmap, va); - if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) + l1 = pmap_l1(pmap, va); + if (l1 == NULL) { + *level = 0; return (NULL); + } + desc = pmap_load(l1) & ATTR_DESCR_MASK; + if (desc == L1_BLOCK) { + *level = 1; + return (l1); + } - return (pmap_l2_to_l3(l2, va)); + if (desc != L1_TABLE) { + *level = 1; + return (NULL); + } + + l2 = pmap_l1_to_l2(l1, va); + desc = pmap_load(l2) & ATTR_DESCR_MASK; + if (desc == L2_BLOCK) { + *level = 2; + return (l2); + } + + if (desc != L2_TABLE) { + *level = 2; + return (NULL); + } + + *level = 3; + l3 = pmap_l2_to_l3(l2, va); + if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) + return (NULL); + + return (l3); } bool -pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, - pt_entry_t **l3) +pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, + pd_entry_t **l2, pt_entry_t **l3) { - pd_entry_t *l1p, *l2p; + pd_entry_t *l0p, *l1p, *l2p; + + if (pmap->pm_l0 == NULL) + return (false); + + l0p = pmap_l0(pmap, va); + *l0 = l0p; - if (pmap->pm_l1 == NULL) + if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) return (false); - l1p = pmap_l1(pmap, va); + l1p = pmap_l0_to_l1(l0p, va); *l1 = l1p; if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { @@ -544,7 +646,8 @@ pmap_bootstrap_l3(vm_offset_t l1pt, vm_o * Bootstrap the system enough to run with virtual memory. */ void -pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) +pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, + vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; @@ -562,7 +665,7 @@ pmap_bootstrap(vm_offset_t l1pt, vm_padd printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ - kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; + kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; PMAP_LOCK_INIT(kernel_pmap); /* @@ -805,30 +908,40 @@ pmap_invalidate_all(pmap_t pmap) vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; + int lvl; pa = 0; PMAP_LOCK(pmap); /* - * Start with the l2 tabel. We are unable to allocate - * pages in the l1 table. + * Find the block or page map for this virtual address. pmap_pte + * will return either a valid block/page entry, or NULL. */ - l2p = pmap_l2(pmap, va); - if (l2p != NULL) { - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { - l3p = pmap_l2_to_l3(l2p, va); - if (l3p != NULL) { - l3 = pmap_load(l3p); - - if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) - pa = (l3 & ~ATTR_MASK) | - (va & L3_OFFSET); - } - } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_extract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_extract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_extract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } } PMAP_UNLOCK(pmap); return (pa); @@ -844,21 +957,31 @@ pmap_extract(pmap_t pmap, vm_offset_t va vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - pt_entry_t *l3p, l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; vm_page_t m; + int lvl; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: - l3p = pmap_l3(pmap, va); - if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { - if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || + pte = pmap_pte(pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + + KASSERT(lvl > 0 && lvl <= 3, + ("pmap_extract_and_hold: Invalid level %d", lvl)); + CTASSERT(L1_BLOCK == L2_BLOCK); + KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || + (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), + ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, + tpte & ATTR_DESCR_MASK)); + if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || ((prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) + if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) goto retry; - m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); + m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); vm_page_hold(m); } } @@ -870,25 +993,39 @@ retry: vm_paddr_t pmap_kextract(vm_offset_t va) { - pd_entry_t *l2p, l2; - pt_entry_t *l3; + pt_entry_t *pte, tpte; vm_paddr_t pa; + int lvl; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { - l2p = pmap_l2(kernel_pmap, va); - if (l2p == NULL) - panic("pmap_kextract: No l2"); - l2 = pmap_load(l2p); - if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) - return ((l2 & ~ATTR_MASK) | - (va & L2_OFFSET)); - - l3 = pmap_l2_to_l3(l2p, va); - if (l3 == NULL) - panic("pmap_kextract: No l3..."); - pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK); + pa = 0; + pte = pmap_pte(kernel_pmap, va, &lvl); + if (pte != NULL) { + tpte = pmap_load(pte); + pa = tpte & ~ATTR_MASK; + switch(lvl) { + case 1: + KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, + ("pmap_kextract: Invalid L1 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L1_OFFSET); + break; + case 2: + KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, + ("pmap_kextract: Invalid L2 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L2_OFFSET); + break; + case 3: + KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, + ("pmap_kextract: Invalid L3 pte found: %lx", + tpte & ATTR_DESCR_MASK)); + pa |= (va & L3_OFFSET); + break; + } + } } return (pa); } @@ -900,8 +1037,10 @@ pmap_kextract(vm_offset_t va) void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { - pt_entry_t *l3; + pd_entry_t *pde; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); @@ -912,11 +1051,16 @@ pmap_kenter_device(vm_offset_t sva, vm_s va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_kenter_device: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_kenter_device: Invalid level %d", lvl)); + + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); - PTE_SYNC(l3); + PTE_SYNC(pte); va += PAGE_SIZE; pa += PAGE_SIZE; @@ -927,28 +1071,30 @@ pmap_kenter_device(vm_offset_t sva, vm_s /* * Remove a page from the kernel pagetables. - * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { - pt_entry_t *l3; + pt_entry_t *pte; + int lvl; - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); + KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); - if (pmap_l3_valid_cacheable(pmap_load(l3))) + if (pmap_l3_valid_cacheable(pmap_load(pte))) cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pmap_load_clear(pte); + PTE_SYNC(pte); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); @@ -957,10 +1103,12 @@ pmap_kremove_device(vm_offset_t sva, vm_ va = sva; while (size != 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + pmap_load_clear(pte); + PTE_SYNC(pte); va += PAGE_SIZE; size -= PAGE_SIZE; @@ -999,19 +1147,26 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *l3, pa; + pd_entry_t *pde; + pt_entry_t *pte, pa; vm_offset_t va; vm_page_t m; - int i; + int i, lvl; va = sva; for (i = 0; i < count; i++) { + pde = pmap_pde(kernel_pmap, va, &lvl); + KASSERT(pde != NULL, + ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); + KASSERT(lvl == 2, + ("pmap_qenter: Invalid level %d", lvl)); + m = ma[i]; pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | ATTR_IDX(m->md.pv_memattr) | L3_PAGE; - l3 = pmap_l3(kernel_pmap, va); - pmap_load_store(l3, pa); - PTE_SYNC(l3); + pte = pmap_l2_to_l3(pde, va); + pmap_load_store(pte, pa); + PTE_SYNC(pte); va += L3_SIZE; } @@ -1021,25 +1176,27 @@ pmap_qenter(vm_offset_t sva, vm_page_t * /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. - * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { - pt_entry_t *l3; + pt_entry_t *pte; vm_offset_t va; + int lvl; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { - l3 = pmap_l3(kernel_pmap, va); - KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); - - if (pmap_l3_valid_cacheable(pmap_load(l3))) - cpu_dcache_wb_range(va, L3_SIZE); - pmap_load_clear(l3); - PTE_SYNC(l3); + pte = pmap_pte(kernel_pmap, va, &lvl); + KASSERT(lvl == 3, + ("Invalid device pagetable level: %d != 3", lvl)); + if (pte != NULL) { + if (pmap_l3_valid_cacheable(pmap_load(pte))) + cpu_dcache_wb_range(va, L3_SIZE); + pmap_load_clear(pte); + PTE_SYNC(pte); + } va += PAGE_SIZE; } @@ -1104,26 +1261,47 @@ _pmap_unwire_l3(pmap_t pmap, vm_offset_t /* * unmap the page table page */ - if (m->pindex >= NUPDE) { - /* PD page */ + if (m->pindex >= (NUL2E + NUL1E)) { + /* l1 page */ + pd_entry_t *l0; + + l0 = pmap_l0(pmap, va); + pmap_load_clear(l0); + PTE_SYNC(l0); + } else if (m->pindex >= NUL2E) { + /* l2 page */ pd_entry_t *l1; + l1 = pmap_l1(pmap, va); pmap_load_clear(l1); PTE_SYNC(l1); } else { - /* PTE page */ + /* l3 page */ pd_entry_t *l2; + l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); - if (m->pindex < NUPDE) { - /* We just released a PT, unhold the matching PD */ - vm_page_t pdpg; + if (m->pindex < NUL2E) { + /* We just released an l3, unhold the matching l2 */ + pd_entry_t *l1, tl1; + vm_page_t l2pg; - pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); - pmap_unwire_l3(pmap, va, pdpg, free); + l1 = pmap_l1(pmap, va); + tl1 = pmap_load(l1); + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l2pg, free); + } else if (m->pindex < (NUL2E + NUL1E)) { + /* We just released an l2, unhold the matching l1 */ + pd_entry_t *l0, tl0; + vm_page_t l1pg; + + l0 = pmap_l0(pmap, va); + tl0 = pmap_load(l0); + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + pmap_unwire_l3(pmap, va, l1pg, free); } pmap_invalidate_page(pmap, va); @@ -1164,27 +1342,27 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); - pmap->pm_l1 = kernel_pmap->pm_l1; + pmap->pm_l0 = kernel_pmap->pm_l0; } int pmap_pinit(pmap_t pmap) { - vm_paddr_t l1phys; - vm_page_t l1pt; + vm_paddr_t l0phys; + vm_page_t l0pt; /* - * allocate the l1 page + * allocate the l0 page */ - while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | + while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; - l1phys = VM_PAGE_TO_PHYS(l1pt); - pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); + l0phys = VM_PAGE_TO_PHYS(l0pt); + pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); - if ((l1pt->flags & PG_ZERO) == 0) - pagezero(pmap->pm_l1); + if ((l0pt->flags & PG_ZERO) == 0) + pagezero(pmap->pm_l0); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); @@ -1205,7 +1383,7 @@ pmap_pinit(pmap_t pmap) static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { - vm_page_t m, /*pdppg, */pdpg; + vm_page_t m, l1pg, l2pg; PMAP_LOCK_ASSERT(pmap, MA_OWNED); @@ -1237,33 +1415,84 @@ _pmap_alloc_l3(pmap_t pmap, vm_pindex_t * it isn't already there. */ - if (ptepindex >= NUPDE) { - pd_entry_t *l1; - vm_pindex_t l1index; + if (ptepindex >= (NUL2E + NUL1E)) { + pd_entry_t *l0; + vm_pindex_t l0index; + + l0index = ptepindex - (NUL2E + NUL1E); + l0 = &pmap->pm_l0[l0index]; + pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); + PTE_SYNC(l0); + } else if (ptepindex >= NUL2E) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1; + pd_entry_t tl0; + + l1index = ptepindex - NUL2E; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int(&vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); + l1pg->wire_count++; + } - l1index = ptepindex - NUPDE; - l1 = &pmap->pm_l1[l1index]; + l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); + l1 = &l1[ptepindex & Ln_ADDR_MASK]; pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); PTE_SYNC(l1); - } else { - vm_pindex_t l1index; - pd_entry_t *l1, *l2; - - l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); - l1 = &pmap->pm_l1[l1index]; - if (pmap_load(l1) == 0) { + vm_pindex_t l0index, l1index; + pd_entry_t *l0, *l1, *l2; + pd_entry_t tl0, tl1; + + l1index = ptepindex >> Ln_ENTRIES_SHIFT; + l0index = l1index >> L0_ENTRIES_SHIFT; + + l0 = &pmap->pm_l0[l0index]; + tl0 = pmap_load(l0); + if (tl0 == 0) { /* recurse for allocating page dir */ - if (_pmap_alloc_l3(pmap, NUPDE + l1index, + if (_pmap_alloc_l3(pmap, NUL2E + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } + tl0 = pmap_load(l0); + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; } else { - pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); - pdpg->wire_count++; + l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); + l1 = &l1[l1index & Ln_ADDR_MASK]; + tl1 = pmap_load(l1); + if (tl1 == 0) { + /* recurse for allocating page dir */ + if (_pmap_alloc_l3(pmap, NUL2E + l1index, + lockp) == NULL) { + --m->wire_count; + /* XXX: release mem barrier? */ + atomic_subtract_int( + &vm_cnt.v_wire_count, 1); + vm_page_free_zero(m); + return (NULL); + } + } else { + l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); + l2pg->wire_count++; + } } l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); @@ -1281,8 +1510,9 @@ static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; - pd_entry_t *l2; + pd_entry_t *pde, tpde; vm_page_t m; + int lvl; /* * Calculate pagetable page index @@ -1292,24 +1522,29 @@ retry: /* * Get the page directory entry */ - l2 = pmap_l2(pmap, va); + pde = pmap_pde(pmap, va, &lvl); /* - * If the page table page is mapped, we just increment the - * hold count, and activate it. + * If the page table page is mapped, we just increment the hold count, + * and activate it. If we get a level 2 pde it will point to a level 3 + * table. */ - if (l2 != NULL && pmap_load(l2) != 0) { - m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); - m->wire_count++; - } else { - /* - * Here if the pte page isn't mapped, or if it has been - * deallocated. - */ - m = _pmap_alloc_l3(pmap, ptepindex, lockp); - if (m == NULL && lockp != NULL) - goto retry; + if (lvl == 2) { + tpde = pmap_load(pde); + if (tpde != 0) { + m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); + m->wire_count++; + return (m); + } } + + /* + * Here if the pte page isn't mapped, or if it has been deallocated. + */ + m = _pmap_alloc_l3(pmap, ptepindex, lockp); + if (m == NULL && lockp != NULL) + goto retry; + return (m); } @@ -1332,7 +1567,7 @@ pmap_release(pmap_t pmap) ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); - m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); @@ -1369,7 +1604,7 @@ pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; - pd_entry_t *l1, *l2; + pd_entry_t *l0, *l1, *l2; mtx_assert(&kernel_map->system_mtx, MA_OWNED); @@ -1377,7 +1612,11 @@ pmap_growkernel(vm_offset_t addr) if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { - l1 = pmap_l1(kernel_pmap, kernel_vm_end); + l0 = pmap_l0(kernel_pmap, kernel_vm_end); + KASSERT(pmap_load(l0) != 0, + ("pmap_growkernel: No level 0 kernel entry")); + + l1 = pmap_l0_to_l1(l0, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***