From owner-svn-src-all@FreeBSD.ORG Fri May 18 05:36:05 2012 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 7A701106564A; Fri, 18 May 2012 05:36:05 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 64A3A8FC08; Fri, 18 May 2012 05:36:05 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q4I5a5Tf036233; Fri, 18 May 2012 05:36:05 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q4I5a5Ks036230; Fri, 18 May 2012 05:36:05 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201205180536.q4I5a5Ks036230@svn.freebsd.org> From: Alan Cox Date: Fri, 18 May 2012 05:36:05 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r235598 - in head/sys/amd64: amd64 include X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 18 May 2012 05:36:05 -0000 Author: alc Date: Fri May 18 05:36:04 2012 New Revision: 235598 URL: http://svn.freebsd.org/changeset/base/235598 Log: Rename pmap_collect() to pmap_pv_reclaim() and rewrite it such that it no longer uses the active and inactive paging queues. Instead, the pmap now maintains an LRU-ordered list of pv entry pages, and pmap_pv_reclaim() uses this list to select pv entries for reclamation. Note: The old pmap_collect() tried to avoid reclaiming mappings for pages that have either a hold_count or a busy field that is non-zero. However, this isn't necessary for correctness, and the locking in pmap_collect() was insufficient to guarantee that such mappings weren't reclaimed. The new pmap_pv_reclaim() doesn't even try. Reviewed by: kib MFC after: 6 weeks Modified: head/sys/amd64/amd64/pmap.c head/sys/amd64/include/pmap.h Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Fri May 18 03:30:50 2012 (r235597) +++ head/sys/amd64/amd64/pmap.c Fri May 18 05:36:04 2012 (r235598) @@ -201,6 +201,7 @@ static u_int64_t DMPDPphys; /* phys addr /* * Data for the pv entry allocation mechanism */ +static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static long pv_entry_count; static struct md_page *pv_table; @@ -2028,71 +2029,136 @@ SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry "Current number of pv entry allocs"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); - -static int pmap_collect_inactive, pmap_collect_active; - -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, - "Current number times pmap_collect called on inactive queue"); -SYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, - "Current number times pmap_collect called on active queue"); #endif /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate - * another pv entry chunk. This is normally called to - * unmap inactive pages, and if necessary, active pages. + * another pv entry chunk. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ -static void -pmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) +static vm_page_t +pmap_pv_reclaim(pmap_t locked_pmap) { + struct pch newtail; + struct pv_chunk *pc; + struct md_page *pvh; pd_entry_t *pde; pmap_t pmap; pt_entry_t *pte, tpte; - pv_entry_t next_pv, pv; + pv_entry_t pv; vm_offset_t va; - vm_page_t m, free; - - TAILQ_FOREACH(m, &vpq->pl, pageq) { - if ((m->flags & PG_MARKER) != 0 || m->hold_count || m->busy) - continue; - TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { - va = pv->pv_va; - pmap = PV_PMAP(pv); + vm_page_t free, m; + uint64_t inuse, freemask; + int bit, field, freed; + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); + pmap = NULL; + free = m = NULL; + TAILQ_INIT(&newtail); + while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && free == NULL) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + if (pmap != pc->pc_pmap) { + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + pmap = pc->pc_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) PMAP_LOCK(pmap); - else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) + else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { + pmap = NULL; + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); continue; - pmap_resident_count_dec(pmap, 1); - pde = pmap_pde(pmap, va); - KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" - " a 2mpage in page %p's pv list", m)); - pte = pmap_pde_to_pte(pde, va); - tpte = pte_load_clear(pte); - KASSERT((tpte & PG_W) == 0, - ("pmap_collect: wired pte %#lx", tpte)); - if (tpte & PG_A) - vm_page_aflag_set(m, PGA_REFERENCED); - if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) - vm_page_dirty(m); - free = NULL; - pmap_unuse_pt(pmap, va, *pde, &free); - pmap_invalidate_page(pmap, va); - pmap_free_zero_pages(free); - TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); - free_pv_entry(pmap, pv); - if (pmap != locked_pmap) - PMAP_UNLOCK(pmap); + } } - if (TAILQ_EMPTY(&m->md.pv_list) && - TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)) - vm_page_aflag_clear(m, PGA_WRITEABLE); + + /* + * Destroy every non-wired, 4 KB page mapping in the chunk. + */ + freed = 0; + for (field = 0; field < _NPCM; field++) { + freemask = 0; + for (inuse = ~pc->pc_map[field] & pc_freemask[field]; + inuse != 0; inuse &= ~(1UL << bit)) { + bit = bsfq(inuse); + pv = &pc->pc_pventry[field * 64 + bit]; + va = pv->pv_va; + pde = pmap_pde(pmap, va); + if ((*pde & PG_PS) != 0) + continue; + pte = pmap_pde_to_pte(pde, va); + if ((*pte & PG_W) != 0) + continue; + tpte = pte_load_clear(pte); + if ((tpte & PG_G) != 0) + pmap_invalidate_page(pmap, va); + m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); + if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) + vm_page_dirty(m); + if ((tpte & PG_A) != 0) + vm_page_aflag_set(m, PGA_REFERENCED); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list) && + (m->flags & PG_FICTITIOUS) == 0) { + pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); + if (TAILQ_EMPTY(&pvh->pv_list)) { + vm_page_aflag_clear(m, + PGA_WRITEABLE); + } + } + pmap_unuse_pt(pmap, va, *pde, &free); + freemask |= 1UL << bit; + freed++; + } + pc->pc_map[field] |= freemask; + } + if (freed == 0) { + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + continue; + } + pmap_resident_count_dec(pmap, freed); + PV_STAT(pv_entry_frees += freed); + PV_STAT(pv_entry_spare += freed); + pv_entry_count -= freed; + TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && + pc->pc_map[2] == PC_FREE2) { + PV_STAT(pv_entry_spare -= _NPCPV); + PV_STAT(pc_chunk_count--); + PV_STAT(pc_chunk_frees++); + /* Entire chunk is free; return it. */ + m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); + dump_drop_page(m->phys_addr); + break; + } + TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); + /* One freed pv entry in locked_pmap is sufficient. */ + if (pmap == locked_pmap) + break; + } + TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); + if (pmap != NULL) { + pmap_invalidate_all(pmap); + if (pmap != locked_pmap) + PMAP_UNLOCK(pmap); + } + if (m == NULL && free != NULL) { + m = free; + free = m->right; + /* Recycle a freed page table page. */ + m->wire_count = 1; + atomic_add_int(&cnt.v_wire_count, 1); } + pmap_free_zero_pages(free); + return (m); } /* @@ -2125,6 +2191,7 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); PV_STAT(pv_entry_spare -= _NPCPV); PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); @@ -2142,7 +2209,6 @@ free_pv_entry(pmap_t pmap, pv_entry_t pv static pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try) { - struct vpgqueues *pq; int bit, field; pv_entry_t pv; struct pv_chunk *pc; @@ -2151,7 +2217,6 @@ get_pv_entry(pmap_t pmap, boolean_t try) PMAP_LOCK_ASSERT(pmap, MA_OWNED); mtx_assert(&vm_page_queue_mtx, MA_OWNED); PV_STAT(pv_entry_allocs++); - pq = NULL; retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { @@ -2171,35 +2236,26 @@ retry: TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } + if (pc != TAILQ_LAST(&pv_chunks, pch)) { + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); + } pv_entry_count++; PV_STAT(pv_entry_spare--); return (pv); } } /* No free items, allocate another chunk */ - m = vm_page_alloc(NULL, 0, (pq == &vm_page_queues[PQ_ACTIVE] ? - VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | + m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (try) { PV_STAT(pc_chunk_tryfail++); return (NULL); } - /* - * Reclaim pv entries: At first, destroy mappings to inactive - * pages. After that, if a pv chunk entry is still needed, - * destroy mappings to active pages. - */ - if (pq == NULL) { - PV_STAT(pmap_collect_inactive++); - pq = &vm_page_queues[PQ_INACTIVE]; - } else if (pq == &vm_page_queues[PQ_INACTIVE]) { - PV_STAT(pmap_collect_active++); - pq = &vm_page_queues[PQ_ACTIVE]; - } else - panic("get_pv_entry: allocation failed"); - pmap_collect(pmap, pq); - goto retry; + m = pmap_pv_reclaim(pmap); + if (m == NULL) + goto retry; } PV_STAT(pc_chunk_count++); PV_STAT(pc_chunk_allocs++); @@ -2209,6 +2265,7 @@ retry: pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; + TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); pv_entry_count++; @@ -4151,6 +4208,7 @@ pmap_remove_pages(pmap_t pmap) PV_STAT(pc_chunk_count--); PV_STAT(pc_chunk_frees++); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); + TAILQ_REMOVE(&pv_chunks, pc, pc_lru); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, 0); Modified: head/sys/amd64/include/pmap.h ============================================================================== --- head/sys/amd64/include/pmap.h Fri May 18 03:30:50 2012 (r235597) +++ head/sys/amd64/include/pmap.h Fri May 18 05:36:04 2012 (r235598) @@ -295,7 +295,7 @@ struct pv_chunk { pmap_t pc_pmap; TAILQ_ENTRY(pv_chunk) pc_list; uint64_t pc_map[_NPCM]; /* bitmap; 1 = free */ - uint64_t pc_spare[2]; + TAILQ_ENTRY(pv_chunk) pc_lru; struct pv_entry pc_pventry[_NPCPV]; };