From owner-svn-src-stable-7@FreeBSD.ORG Thu Sep 3 13:56:19 2009 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 455E6106566C; Thu, 3 Sep 2009 13:56:19 +0000 (UTC) (envelope-from jhb@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 326BC8FC19; Thu, 3 Sep 2009 13:56:19 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n83DuJMq034511; Thu, 3 Sep 2009 13:56:19 GMT (envelope-from jhb@svn.freebsd.org) Received: (from jhb@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n83DuJ7P034506; Thu, 3 Sep 2009 13:56:19 GMT (envelope-from jhb@svn.freebsd.org) Message-Id: <200909031356.n83DuJ7P034506@svn.freebsd.org> From: John Baldwin Date: Thu, 3 Sep 2009 13:56:19 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r196781 - in stable/7/sys: . amd64/amd64 amd64/include contrib/pf i386/i386 i386/include vm X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 03 Sep 2009 13:56:19 -0000 Author: jhb Date: Thu Sep 3 13:56:18 2009 New Revision: 196781 URL: http://svn.freebsd.org/changeset/base/196781 Log: MFC 180430, 180483, 180485, 180601, 180845, 180871-180873, 181043, 181077, 181151, 181284, 181356, 181456, 189454, 194209, 195416, 195836, 196318, 196705, and 196707: Various fixes and enhancements to the amd64 and i386 pmaps to support PAT. - Extend pmap_demote_pde() to include the ability to instantiate a new page table page where none existed before. - Enhance pmap_change_attr() to fully support large (2/4MB) pages by breaking demoting them to 4KB page mappings when needed. - Enhance pmap_change_attr() to avoid page demotions, cache mode changes, and cache and TLB invalidation when some or all of the specified range is already mapped with the specified cache mode. - Enhance pmap_change_attr() to adjust the direct map automatically when changing the cache mode of a kernel virtual address range. - Fix pmap_object_init_pt() to not assume that the pages of a OBJT_DEVICE object are always physically contiguous. - Correct a critical accounting error in pmap_demote_pde(). Reviewed by: alc Modified: stable/7/sys/ (props changed) stable/7/sys/amd64/amd64/pmap.c stable/7/sys/amd64/include/pmap.h stable/7/sys/contrib/pf/ (props changed) stable/7/sys/i386/i386/pmap.c stable/7/sys/i386/include/pmap.h stable/7/sys/vm/vm_object.c stable/7/sys/vm/vm_object.h Modified: stable/7/sys/amd64/amd64/pmap.c ============================================================================== --- stable/7/sys/amd64/amd64/pmap.c Thu Sep 3 13:54:58 2009 (r196780) +++ stable/7/sys/amd64/amd64/pmap.c Thu Sep 3 13:56:18 2009 (r196781) @@ -220,17 +220,22 @@ static void pmap_pvh_free(struct md_page static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); +static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); +static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); +static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); +static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); +static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, @@ -461,11 +466,12 @@ create_pagetables(vm_paddr_t *firstaddr) ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; } - /* Now set up the direct map space using 2MB pages */ + /* Preset PG_M and PG_A because demotion expects it */ for (i = 0; i < NPDEPG * ndmpdp; i++) { ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT; - ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G; + ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G | + PG_M | PG_A; } /* And the direct map space's PDP */ @@ -1068,7 +1074,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t p pte_store(pte, pa | PG_RW | PG_V | PG_G); } -PMAP_INLINE void +static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; @@ -2192,58 +2198,98 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse } /* - * Tries to demote a 2MB page mapping. + * Fills a page table page with mappings to consecutive physical pages. + */ +static void +pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) +{ + pt_entry_t *pte; + + for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { + *pte = newpte; + newpte += PAGE_SIZE; + } +} + +/* + * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page + * mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; - pt_entry_t *firstpte, newpte, *pte; + pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t free, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldpde = *pde; + KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), + ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); mpte = pmap_lookup_pt_page(pmap, va); if (mpte != NULL) pmap_remove_pt_page(pmap, mpte); else { - KASSERT((*pde & PG_W) == 0, + KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); - free = NULL; - pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free); - pmap_invalidate_page(pmap, trunc_2mpage(va)); - pmap_free_zero_pages(free); - CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" - " in pmap %p", va, pmap); - return (FALSE); + + /* + * Invalidate the 2MB page mapping and return "failure" if the + * mapping was never accessed or the allocation of the new + * page table page fails. If the 2MB page mapping belongs to + * the direct map region of the kernel's address space, then + * the page allocation request specifies the highest possible + * priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is + * normal. Page table pages are preallocated for every other + * part of the kernel address space, so the direct map region + * is the only part of the kernel address space that must be + * handled here. + */ + if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, + pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < + DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | + VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { + free = NULL; + pmap_remove_pde(pmap, pde, trunc_2mpage(va), &free); + pmap_invalidate_page(pmap, trunc_2mpage(va)); + pmap_free_zero_pages(free); + CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" + " in pmap %p", va, pmap); + return (FALSE); + } + if (va < VM_MAXUSER_ADDRESS) + pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); - oldpde = *pde; newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; - KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V), - ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V")); + KASSERT((oldpde & PG_A) != 0, + ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); - KASSERT((oldpde & PG_PS) != 0, - ("pmap_demote_pde: oldpde is missing PG_PS")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* - * If the mapping has changed attributes, update the page table - * entries. + * If the page table page is new, initialize it. */ + if (mpte->wire_count == 1) { + mpte->wire_count = NPTEPG; + pmap_fill_ptp(firstpte, newpte); + } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); + + /* + * If the mapping has changed attributes, update the page table + * entries. + */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) - for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { - *pte = newpte; - newpte += PAGE_SIZE; - } + pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has @@ -3310,78 +3356,74 @@ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { - vm_offset_t va; + pd_entry_t *pde; + vm_paddr_t pa, ptepa; vm_page_t p, pdpg; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE, ("pmap_object_init_pt: non-device object")); - if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { - vm_page_t m[1]; - pd_entry_t ptepa, *pde; - - PMAP_LOCK(pmap); - pde = pmap_pde(pmap, addr); - if (pde != 0 && (*pde & PG_V) != 0) - goto out; - PMAP_UNLOCK(pmap); -retry: + if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { + if (!vm_object_populate(object, pindex, pindex + atop(size))) + return; p = vm_page_lookup(object, pindex); - if (p != NULL) { - if (vm_page_sleep_if_busy(p, FALSE, "init4p")) - goto retry; - } else { - p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); - if (p == NULL) - return; - m[0] = p; - - if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); - return; - } - - p = vm_page_lookup(object, pindex); - vm_page_wakeup(p); - } + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + /* + * Abort the mapping if the first page is not physically + * aligned to a 2MB page boundary. + */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; - p->valid = VM_PAGE_BITS_ALL; + /* + * Skip the first page. Abort the mapping if the rest of + * the pages are not physically contiguous. + */ + p = TAILQ_NEXT(p, listq); + for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; + pa += PAGE_SIZE) { + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + if (pa != VM_PAGE_TO_PHYS(p)) + return; + p = TAILQ_NEXT(p, listq); + } + /* Map using 2MB pages. */ PMAP_LOCK(pmap); - for (va = addr; va < addr + size; va += NBPDR) { - while ((pdpg = - pmap_allocpde(pmap, va, M_NOWAIT)) == NULL) { - PMAP_UNLOCK(pmap); - vm_page_busy(p); - VM_OBJECT_UNLOCK(object); - VM_WAIT; - VM_OBJECT_LOCK(object); - vm_page_wakeup(p); - PMAP_LOCK(pmap); + for (pa = ptepa; pa < ptepa + size; pa += NBPDR) { + pdpg = pmap_allocpde(pmap, addr, M_NOWAIT); + if (pdpg == NULL) { + /* + * The creation of mappings below is only an + * optimization. If a page directory page + * cannot be allocated without blocking, + * continue on to the next mapping rather than + * blocking. + */ + addr += NBPDR; + continue; } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); - pde = &pde[pmap_pde_index(va)]; + pde = &pde[pmap_pde_index(addr)]; if ((*pde & PG_V) == 0) { - pde_store(pde, ptepa | PG_PS | PG_M | PG_A | + pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); - pmap->pm_stats.resident_count += - NBPDR / PAGE_SIZE; + pmap->pm_stats.resident_count += NBPDR / + PAGE_SIZE; + pmap_pde_mappings++; } else { + /* Continue on if the PDE is already valid. */ pdpg->wire_count--; KASSERT(pdpg->wire_count > 0, ("pmap_object_init_pt: missing reference " - "to page directory page, va: 0x%lx", va)); + "to page directory page, va: 0x%lx", addr)); } - ptepa += NBPDR; + addr += NBPDR; } - pmap_invalidate_all(pmap); -out: PMAP_UNLOCK(pmap); } } @@ -3739,7 +3781,9 @@ pmap_remove_pages(pmap_t pmap) if ((tpte & PG_PS) != 0) pte = pde; else { - pte = vtopte(pv->pv_va); + pte = (pt_entry_t *)PHYS_TO_DMAP(tpte & + PG_FRAME); + pte = &pte[pmap_pte_index(pv->pv_va)]; tpte = *pte & ~PG_PTE_PAT; } @@ -4160,41 +4204,35 @@ pmap_clear_reference(vm_page_t m) /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void -pmap_pte_attr(vm_offset_t va, int mode) +pmap_pte_attr(pt_entry_t *pte, int cache_bits) { - pt_entry_t *pte; u_int opte, npte; - pte = vtopte(va); - /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; - npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); - npte |= pmap_cache_bits(mode, 0); + npte = opte & ~PG_PTE_CACHE; + npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2MB page mapped via a PDE. */ static __inline void -pmap_pde_attr(vm_offset_t va, int mode) +pmap_pde_attr(pd_entry_t *pde, int cache_bits) { - pd_entry_t *pde; u_int opde, npde; - pde = pmap_pde(kernel_pmap, va); - /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; - npde = opde & ~(PG_PDE_PAT | PG_NC_PCD | PG_NC_PWT); - npde |= pmap_cache_bits(mode, 1); + npde = opde & ~PG_PDE_CACHE; + npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } @@ -4210,11 +4248,14 @@ pmap_mapdev_attr(vm_paddr_t pa, vm_size_ vm_offset_t va, tmpva, offset; /* - * If this fits within the direct map window and use WB caching - * mode, use the direct map. + * If the specified range of physical addresses fits within the direct + * map window, use the direct map. */ - if (pa < dmaplimit && (pa + size) < dmaplimit && mode == PAT_WRITE_BACK) - return ((void *)PHYS_TO_DMAP(pa)); + if (pa < dmaplimit && pa + size < dmaplimit) { + va = PHYS_TO_DMAP(pa); + if (!pmap_change_attr(va, size, mode)) + return ((void *)va); + } offset = pa & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); va = kmem_alloc_nofault(kernel_map, size); @@ -4263,68 +4304,174 @@ pmap_unmapdev(vm_offset_t va, vm_size_t kmem_free(kernel_map, base, size); } +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the direct map or the kernel map. If + * the virtual address range is contained within the kernel map, then the + * memory type for each of the corresponding ranges of the direct map is also + * changed. (The corresponding ranges of the direct map are those ranges that + * map the same physical pages as the specified virtual address range.) These + * changes to the direct map are necessary because Intel describes the + * behavior of their processors as "undefined" if two or more mappings to the + * same physical page have different memory types. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. In the + * latter case, the memory type may have been changed on some part of the + * virtual address range or the direct map. + */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { + int error; + + PMAP_LOCK(kernel_pmap); + error = pmap_change_attr_locked(va, size, mode); + PMAP_UNLOCK(kernel_pmap); + return (error); +} + +static int +pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) +{ vm_offset_t base, offset, tmpva; + vm_paddr_t pa_start, pa_end; pd_entry_t *pde; pt_entry_t *pte; + int cache_bits_pte, cache_bits_pde, error; + boolean_t changed; + PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); - /* Only supported on kernel virtual addresses. */ - if (base <= VM_MAXUSER_ADDRESS) + /* + * Only supported on kernel virtual addresses, including the direct + * map but excluding the recursive map. + */ + if (base < DMAP_MIN_ADDRESS) return (EINVAL); + cache_bits_pde = pmap_cache_bits(mode, 1); + cache_bits_pte = pmap_cache_bits(mode, 0); + changed = FALSE; + /* - * XXX: We have to support tearing 2MB pages down into 4k pages if - * needed here. + * Pages that aren't mapped aren't supported. Also break down 2MB pages + * into 4KB pages if required. */ - /* Pages that aren't mapped aren't supported. */ - for (tmpva = base; tmpva < (base + size); ) { + for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde == 0) return (EINVAL); if (*pde & PG_PS) { - /* Handle 2MB pages that are completely contained. */ - if (size >= NBPDR) { + /* + * If the current 2MB page already has the required + * memory type, then we need not demote this page. Just + * increment tmpva to the next 2MB page frame. + */ + if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { + tmpva = trunc_2mpage(tmpva) + NBPDR; + continue; + } + + /* + * If the current offset aligns with a 2MB page frame + * and there is at least 2MB left within the range, then + * we need not break down this page into 4KB pages. + */ + if ((tmpva & PDRMASK) == 0 && + tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } - return (EINVAL); + if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) + return (ENOMEM); } - pte = vtopte(va); + pte = pmap_pde_to_pte(pde, tmpva); if (*pte == 0) return (EINVAL); tmpva += PAGE_SIZE; } + error = 0; /* * Ok, all the pages exist, so run through them updating their - * cache mode. + * cache mode if required. */ - for (tmpva = base; size > 0; ) { + pa_start = pa_end = 0; + for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); if (*pde & PG_PS) { - pmap_pde_attr(tmpva, mode); - tmpva += NBPDR; - size -= NBPDR; + if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { + pmap_pde_attr(pde, cache_bits_pde); + changed = TRUE; + } + if (tmpva >= VM_MIN_KERNEL_ADDRESS) { + if (pa_start == pa_end) { + /* Start physical address run. */ + pa_start = *pde & PG_PS_FRAME; + pa_end = pa_start + NBPDR; + } else if (pa_end == (*pde & PG_PS_FRAME)) + pa_end += NBPDR; + else { + /* Run ended, update direct map. */ + error = pmap_change_attr_locked( + PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); + if (error != 0) + break; + /* Start physical address run. */ + pa_start = *pde & PG_PS_FRAME; + pa_end = pa_start + NBPDR; + } + } + tmpva = trunc_2mpage(tmpva) + NBPDR; } else { - pmap_pte_attr(tmpva, mode); + pte = pmap_pde_to_pte(pde, tmpva); + if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { + pmap_pte_attr(pte, cache_bits_pte); + changed = TRUE; + } + if (tmpva >= VM_MIN_KERNEL_ADDRESS) { + if (pa_start == pa_end) { + /* Start physical address run. */ + pa_start = *pte & PG_FRAME; + pa_end = pa_start + PAGE_SIZE; + } else if (pa_end == (*pte & PG_FRAME)) + pa_end += PAGE_SIZE; + else { + /* Run ended, update direct map. */ + error = pmap_change_attr_locked( + PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); + if (error != 0) + break; + /* Start physical address run. */ + pa_start = *pte & PG_FRAME; + pa_end = pa_start + PAGE_SIZE; + } + } tmpva += PAGE_SIZE; - size -= PAGE_SIZE; } } + if (error == 0 && pa_start != pa_end) + error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start), + pa_end - pa_start, mode); /* - * Flush CPU caches to make sure any data isn't cached that shouldn't - * be, etc. - */ - pmap_invalidate_range(kernel_pmap, base, tmpva); - pmap_invalidate_cache(); - return (0); + * Flush CPU caches if required to make sure any data isn't cached that + * shouldn't be, etc. + */ + if (changed) { + pmap_invalidate_range(kernel_pmap, base, tmpva); + pmap_invalidate_cache(); + } + return (error); } /* Modified: stable/7/sys/amd64/include/pmap.h ============================================================================== --- stable/7/sys/amd64/include/pmap.h Thu Sep 3 13:54:58 2009 (r196780) +++ stable/7/sys/amd64/include/pmap.h Thu Sep 3 13:56:18 2009 (r196781) @@ -75,6 +75,10 @@ #define PG_PROT (PG_RW|PG_U) /* all protection bits . */ #define PG_N (PG_NC_PWT|PG_NC_PCD) /* Non-cacheable */ +/* Page level cache control fields used to determine the PAT type */ +#define PG_PDE_CACHE (PG_PDE_PAT | PG_NC_PWT | PG_NC_PCD) +#define PG_PTE_CACHE (PG_PTE_PAT | PG_NC_PWT | PG_NC_PCD) + /* * Promotion to a 2MB (PDE) page mapping requires that the corresponding 4KB * (PTE) page mappings have identical settings for the following fields: @@ -308,7 +312,6 @@ void pmap_bootstrap(vm_paddr_t *); int pmap_change_attr(vm_offset_t, vm_size_t, int); void pmap_init_pat(void); void pmap_kenter(vm_offset_t va, vm_paddr_t pa); -void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); void *pmap_kenter_temporary(vm_paddr_t pa, int i); vm_paddr_t pmap_kextract(vm_offset_t); void pmap_kremove(vm_offset_t); Modified: stable/7/sys/i386/i386/pmap.c ============================================================================== --- stable/7/sys/i386/i386/pmap.c Thu Sep 3 13:54:58 2009 (r196780) +++ stable/7/sys/i386/i386/pmap.c Thu Sep 3 13:56:18 2009 (r196781) @@ -284,11 +284,15 @@ static boolean_t pmap_enter_pde(pmap_t p static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte); static void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); +static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static boolean_t pmap_is_modified_pvh(struct md_page *pvh); +static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); +static void pmap_pde_attr(pd_entry_t *pde, int cache_bits); static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); +static void pmap_pte_attr(pt_entry_t *pte, int cache_bits); static void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, vm_page_t *free); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, @@ -1132,7 +1136,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t p pte_store(pte, pa | PG_RW | PG_V | pgeflag); } -PMAP_INLINE void +static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; @@ -2245,32 +2249,62 @@ pmap_pv_insert_pde(pmap_t pmap, vm_offse } /* - * Tries to demote a 2- or 4MB page mapping. + * Fills a page table page with mappings to consecutive physical pages. + */ +static void +pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) +{ + pt_entry_t *pte; + + for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { + *pte = newpte; + newpte += PAGE_SIZE; + } +} + +/* + * Tries to demote a 2- or 4MB page mapping. If demotion fails, the + * 2- or 4MB page mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde, oldpde; pmap_t allpmaps_entry; - pt_entry_t *firstpte, newpte, *pte; + pt_entry_t *firstpte, newpte; vm_paddr_t mptepa; vm_page_t free, mpte; PMAP_LOCK_ASSERT(pmap, MA_OWNED); + oldpde = *pde; + KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), + ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); mpte = pmap_lookup_pt_page(pmap, va); if (mpte != NULL) pmap_remove_pt_page(pmap, mpte); else { - KASSERT((*pde & PG_W) == 0, + KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); - free = NULL; - pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); - pmap_invalidate_page(pmap, trunc_4mpage(va)); - pmap_free_zero_pages(free); - CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" - " in pmap %p", va, pmap); - return (FALSE); + + /* + * Invalidate the 2- or 4MB page mapping and return + * "failure" if the mapping was never accessed or the + * allocation of the new page table page fails. + */ + if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, + va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | + VM_ALLOC_WIRED)) == NULL) { + free = NULL; + pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); + pmap_invalidate_page(pmap, trunc_4mpage(va)); + pmap_free_zero_pages(free); + CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" + " in pmap %p", va, pmap); + return (FALSE); + } + if (va < VM_MAXUSER_ADDRESS) + pmap->pm_stats.resident_count++; } mptepa = VM_PAGE_TO_PHYS(mpte); @@ -2304,30 +2338,32 @@ pmap_demote_pde(pmap_t pmap, pd_entry_t } firstpte = PADDR2; } - oldpde = *pde; newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; - KASSERT((oldpde & (PG_A | PG_V)) == (PG_A | PG_V), - ("pmap_demote_pde: oldpde is missing PG_A and/or PG_V")); + KASSERT((oldpde & PG_A) != 0, + ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); - KASSERT((oldpde & PG_PS) != 0, - ("pmap_demote_pde: oldpde is missing PG_PS")); newpte = oldpde & ~PG_PS; if ((newpte & PG_PDE_PAT) != 0) newpte ^= PG_PDE_PAT | PG_PTE_PAT; /* - * If the mapping has changed attributes, update the page table - * entries. - */ + * If the page table page is new, initialize it. + */ + if (mpte->wire_count == 1) { + mpte->wire_count = NPTEPG; + pmap_fill_ptp(firstpte, newpte); + } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); + + /* + * If the mapping has changed attributes, update the page table + * entries. + */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) - for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { - *pte = newpte; - newpte += PAGE_SIZE; - } + pmap_fill_ptp(firstpte, newpte); /* * Demote the mapping. This pmap is locked. The old PDE has @@ -3432,62 +3468,57 @@ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { + pd_entry_t *pde; + vm_paddr_t pa, ptepa; vm_page_t p; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT(object->type == OBJT_DEVICE, ("pmap_object_init_pt: non-device object")); if (pseflag && - ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { - int i; - vm_page_t m[1]; - unsigned int ptepindex; - int npdes; - pd_entry_t ptepa; - - PMAP_LOCK(pmap); - if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) - goto out; - PMAP_UNLOCK(pmap); -retry: + (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { + if (!vm_object_populate(object, pindex, pindex + atop(size))) + return; p = vm_page_lookup(object, pindex); - if (p != NULL) { - if (vm_page_sleep_if_busy(p, FALSE, "init4p")) - goto retry; - } else { - p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); - if (p == NULL) - return; - m[0] = p; - - if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { - vm_page_lock_queues(); - vm_page_free(p); - vm_page_unlock_queues(); - return; - } - - p = vm_page_lookup(object, pindex); - vm_page_wakeup(p); - } + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + /* + * Abort the mapping if the first page is not physically + * aligned to a 2/4MB page boundary. + */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; - p->valid = VM_PAGE_BITS_ALL; + /* + * Skip the first page. Abort the mapping if the rest of + * the pages are not physically contiguous. + */ + p = TAILQ_NEXT(p, listq); + for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; + pa += PAGE_SIZE) { + KASSERT(p->valid == VM_PAGE_BITS_ALL, + ("pmap_object_init_pt: invalid page %p", p)); + if (pa != VM_PAGE_TO_PHYS(p)) + return; + p = TAILQ_NEXT(p, listq); + } + /* Map using 2/4MB pages. */ PMAP_LOCK(pmap); - pmap->pm_stats.resident_count += size >> PAGE_SHIFT; - npdes = size >> PDRSHIFT; - for(i = 0; i < npdes; i++) { - pde_store(&pmap->pm_pdir[ptepindex], - ptepa | PG_U | PG_RW | PG_V | PG_PS); - ptepa += NBPDR; - ptepindex += 1; + for (pa = ptepa; pa < ptepa + size; pa += NBPDR) { + pde = pmap_pde(pmap, addr); + if (*pde == 0) { + pde_store(pde, pa | PG_PS | PG_M | PG_A | + PG_U | PG_RW | PG_V); + pmap->pm_stats.resident_count += NBPDR / + PAGE_SIZE; + pmap_pde_mappings++; + } + /* Else continue on if the PDE is already valid. */ + addr += NBPDR; } - pmap_invalidate_all(pmap); -out: PMAP_UNLOCK(pmap); } } @@ -4326,6 +4357,40 @@ pmap_clear_reference(vm_page_t m) * Miscellaneous support routines follow */ +/* Adjust the cache mode for a 4KB page mapped via a PTE. */ +static __inline void +pmap_pte_attr(pt_entry_t *pte, int cache_bits) +{ + u_int opte, npte; + + /* + * The cache mode bits are all in the low 32-bits of the + * PTE, so we can just spin on updating the low 32-bits. + */ + do { + opte = *(u_int *)pte; + npte = opte & ~PG_PTE_CACHE; + npte |= cache_bits; + } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); +} + +/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ +static __inline void +pmap_pde_attr(pd_entry_t *pde, int cache_bits) +{ + u_int opde, npde; + + /* + * The cache mode bits are all in the low 32-bits of the + * PDE, so we can just spin on updating the low 32-bits. + */ + do { + opde = *(u_int *)pde; + npde = opde & ~PG_PDE_CACHE; + npde |= cache_bits; + } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); +} + /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This @@ -4389,61 +4454,117 @@ pmap_unmapdev(vm_offset_t va, vm_size_t kmem_free(kernel_map, base, size); } +/* + * Changes the specified virtual address range's memory type to that given by + * the parameter "mode". The specified virtual address range must be + * completely contained within either the kernel map. + * + * Returns zero if the change completed successfully, and either EINVAL or + * ENOMEM if the change failed. Specifically, EINVAL is returned if some part + * of the virtual address range was not mapped, and ENOMEM is returned if + * there was insufficient memory available to complete the change. + */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; - pt_entry_t *pte; - u_int opte, npte; pd_entry_t *pde; + pt_entry_t *pte; + int cache_bits_pte, cache_bits_pde; + boolean_t changed; base = trunc_page(va); offset = va & PAGE_MASK; size = roundup(offset + size, PAGE_SIZE); - /* Only supported on kernel virtual addresses. */ - if (base <= VM_MAXUSER_ADDRESS) + /* + * Only supported on kernel virtual addresses above the recursive map. + */ + if (base < VM_MIN_KERNEL_ADDRESS) return (EINVAL); - /* 4MB pages and pages that aren't mapped aren't supported. */ - for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { + cache_bits_pde = pmap_cache_bits(mode, 1); + cache_bits_pte = pmap_cache_bits(mode, 0); + changed = FALSE; + + /* + * Pages that aren't mapped aren't supported. Also break down + * 2/4MB pages into 4KB pages if required. + */ + PMAP_LOCK(kernel_pmap); + for (tmpva = base; tmpva < base + size; ) { pde = pmap_pde(kernel_pmap, tmpva); - if (*pde & PG_PS) - return (EINVAL); - if (*pde == 0) + if (*pde == 0) { + PMAP_UNLOCK(kernel_pmap); return (EINVAL); - pte = vtopte(va); - if (*pte == 0) + } + if (*pde & PG_PS) { + /* + * If the current 2/4MB page already has + * the required memory type, then we need not + * demote this page. Just increment tmpva to + * the next 2/4MB page frame. + */ + if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { + tmpva = trunc_4mpage(tmpva) + NBPDR; + continue; + } + + /* + * If the current offset aligns with a 2/4MB + * page frame and there is at least 2/4MB left + * within the range, then we need not break + * down this page into 4KB pages. + */ + if ((tmpva & PDRMASK) == 0 && + tmpva + PDRMASK < base + size) { + tmpva += NBPDR; + continue; + } + if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { + PMAP_UNLOCK(kernel_pmap); + return (ENOMEM); + } + } + pte = vtopte(tmpva); + if (*pte == 0) { + PMAP_UNLOCK(kernel_pmap); return (EINVAL); + } + tmpva += PAGE_SIZE; } + PMAP_UNLOCK(kernel_pmap); /* - * Ok, all the pages exist and are 4k, so run through them updating - * their cache mode. + * Ok, all the pages exist, so run through them updating their + * cache mode if required. */ - for (tmpva = base; size > 0; ) { - pte = vtopte(tmpva); - - /* - * The cache mode bits are all in the low 32-bits of the - * PTE, so we can just spin on updating the low 32-bits. - */ - do { - opte = *(u_int *)pte; - npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); - npte |= pmap_cache_bits(mode, 0); - } while (npte != opte && - !atomic_cmpset_int((u_int *)pte, opte, npte)); - tmpva += PAGE_SIZE; - size -= PAGE_SIZE; + for (tmpva = base; tmpva < base + size; ) { + pde = pmap_pde(kernel_pmap, tmpva); + if (*pde & PG_PS) { + if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { + pmap_pde_attr(pde, cache_bits_pde); + changed = TRUE; + } + tmpva = trunc_4mpage(tmpva) + NBPDR; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***