From owner-svn-src-all@FreeBSD.ORG Sun Sep 8 21:30:53 2013 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [8.8.178.115]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTP id BBB07E54; Sun, 8 Sep 2013 21:30:53 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id 99CE922D1; Sun, 8 Sep 2013 21:30:53 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r88LUrTP076721; Sun, 8 Sep 2013 21:30:53 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r88LUrnf076720; Sun, 8 Sep 2013 21:30:53 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201309082130.r88LUrnf076720@svn.freebsd.org> From: Alan Cox Date: Sun, 8 Sep 2013 21:30:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r255409 - head/sys/amd64/amd64 X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 08 Sep 2013 21:30:53 -0000 Author: alc Date: Sun Sep 8 21:30:53 2013 New Revision: 255409 URL: http://svnweb.freebsd.org/changeset/base/255409 Log: Prior to r254304, we only began scanning the active page queue when the amount of free memory was close to the point at which we would begin reclaiming pages. Now, we continuously scan the active page queue, regardless of the amount of free memory. Consequently, we are continuously calling pmap_ts_referenced() on active pages. Prior to this change, pmap_ts_referenced() would always demote superpage mappings in order to obtain finer-grained reference information. This made sense because we were coming under memory pressure and would soon have to begin reclaiming pages. Now, however, with continuous scanning of the active page queue, these demotions are taking a toll on performance. For example, on one of my test machines, the running time for the HPCC Random Access benchmark (also known as GUPS) has increased by 54%. To address this problem, I have replaced the demotion with a heuristic for periodically clearing the reference flag on superpage mappings. Reviewed by: kib Approved by: re (glebius) Sponsored by: EMC / Isilon Storage Division Modified: head/sys/amd64/amd64/pmap.c Modified: head/sys/amd64/amd64/pmap.c ============================================================================== --- head/sys/amd64/amd64/pmap.c Sun Sep 8 21:10:48 2013 (r255408) +++ head/sys/amd64/amd64/pmap.c Sun Sep 8 21:30:53 2013 (r255409) @@ -2900,8 +2900,8 @@ pmap_demote_pde_locked(pmap_t pmap, pd_e oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); - mpte = pmap_lookup_pt_page(pmap, va); - if (mpte != NULL) + if ((oldpde & PG_A) != 0 && (mpte = pmap_lookup_pt_page(pmap, va)) != + NULL) pmap_remove_pt_page(pmap, mpte); else { KASSERT((oldpde & PG_W) == 0, @@ -5046,6 +5046,8 @@ retry: rw_runlock(&pvh_global_lock); } +#define PMAP_TS_REFERENCED_MAX 5 + /* * pmap_ts_referenced: * @@ -5062,25 +5064,29 @@ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; - pv_entry_t pv, pvf, pvn; + pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; - pd_entry_t oldpde, *pde; + pd_entry_t *pde; pt_entry_t *pte; - vm_offset_t va; - int rtval, pvh_gen, md_gen; + vm_paddr_t pa; + int cleared, md_gen, not_cleared, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); + cleared = 0; + pa = VM_PAGE_TO_PHYS(m); + lock = PHYS_TO_PV_LIST_LOCK(pa); + pvh = pa_to_pvh(pa); rw_rlock(&pvh_global_lock); - lock = VM_PAGE_TO_PV_LIST_LOCK(m); - pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); - rtval = 0; rw_wlock(lock); retry: - if ((m->flags & PG_FICTITIOUS) != 0) + not_cleared = 0; + if ((m->flags & PG_FICTITIOUS) != 0 || + (pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; - TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, pvn) { + pv = pvf; + do { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; @@ -5092,78 +5098,85 @@ retry: goto retry; } } - va = pv->pv_va; - pde = pmap_pde(pmap, va); - oldpde = *pde; - if ((oldpde & PG_A) != 0) { - if (pmap_demote_pde_locked(pmap, pde, va, &lock)) { - if ((oldpde & PG_W) == 0) { - /* - * Remove the mapping to a single page - * so that a subsequent access may - * repromote. Since the underlying - * page table page is fully populated, - * this removal never frees a page - * table page. - */ - va += VM_PAGE_TO_PHYS(m) - (oldpde & - PG_PS_FRAME); - pte = pmap_pde_to_pte(pde, va); - pmap_remove_pte(pmap, pte, va, *pde, - NULL, &lock); - pmap_invalidate_page(pmap, va); - rtval++; - if (rtval > 4) { - PMAP_UNLOCK(pmap); - goto out; - } - } - } - KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), - ("inconsistent pv lock %p %p for page %p", - lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); + pde = pmap_pde(pmap, pv->pv_va); + if ((*pde & PG_A) != 0) { + /* + * Since this reference bit is shared by 512 4KB + * pages, it should not be cleared every time it is + * tested. Apply a simple "hash" function on the + * physical page number, the virtual superpage number, + * and the pmap address to select one 4KB page out of + * the 512 on which testing the reference bit will + * result in clearing that reference bit. This + * function is designed to avoid the selection of the + * same 4KB page for every 2MB page mapping. + * + * On demotion, a mapping that hasn't been referenced + * is simply destroyed. To avoid the possibility of a + * subsequent page fault on a demoted wired mapping, + * always leave its reference bit set. Moreover, + * since the superpage is wired, the current state of + * its reference bit won't affect page replacement. + */ + if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ + (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && + (*pde & PG_W) == 0) { + atomic_clear_long(pde, PG_A); + pmap_invalidate_page(pmap, pv->pv_va); + cleared++; + } else + not_cleared++; } PMAP_UNLOCK(pmap); - } + /* Rotate the PV list if it has more than one entry. */ + if (TAILQ_NEXT(pv, pv_next) != NULL) { + TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); + TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); + pvh->pv_gen++; + } + if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) + goto out; + } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: - if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { - pvf = pv; - do { - pvn = TAILQ_NEXT(pv, pv_next); + if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) + goto out; + pv = pvf; + do { + pmap = PV_PMAP(pv); + if (!PMAP_TRYLOCK(pmap)) { + pvh_gen = pvh->pv_gen; + md_gen = m->md.pv_gen; + rw_wunlock(lock); + PMAP_LOCK(pmap); + rw_wlock(lock); + if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { + PMAP_UNLOCK(pmap); + goto retry; + } + } + pde = pmap_pde(pmap, pv->pv_va); + KASSERT((*pde & PG_PS) == 0, + ("pmap_ts_referenced: found a 2mpage in page %p's pv list", + m)); + pte = pmap_pde_to_pte(pde, pv->pv_va); + if ((*pte & PG_A) != 0) { + atomic_clear_long(pte, PG_A); + pmap_invalidate_page(pmap, pv->pv_va); + cleared++; + } + PMAP_UNLOCK(pmap); + /* Rotate the PV list if it has more than one entry. */ + if (TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; - pmap = PV_PMAP(pv); - if (!PMAP_TRYLOCK(pmap)) { - pvh_gen = pvh->pv_gen; - md_gen = m->md.pv_gen; - rw_wunlock(lock); - PMAP_LOCK(pmap); - rw_wlock(lock); - if (pvh_gen != pvh->pv_gen || - md_gen != m->md.pv_gen) { - PMAP_UNLOCK(pmap); - goto retry; - } - } - pde = pmap_pde(pmap, pv->pv_va); - KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:" - " found a 2mpage in page %p's pv list", m)); - pte = pmap_pde_to_pte(pde, pv->pv_va); - if ((*pte & PG_A) != 0) { - atomic_clear_long(pte, PG_A); - pmap_invalidate_page(pmap, pv->pv_va); - rtval++; - if (rtval > 4) - pvn = NULL; - } - PMAP_UNLOCK(pmap); - } while ((pv = pvn) != NULL && pv != pvf); - } + } + } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); - return (rtval); + return (cleared + not_cleared); } /*