From owner-svn-src-stable-8@FreeBSD.ORG Fri Oct 15 03:23:53 2010 Return-Path: Delivered-To: svn-src-stable-8@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id EFE73106564A; Fri, 15 Oct 2010 03:23:53 +0000 (UTC) (envelope-from alc@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id DD1968FC14; Fri, 15 Oct 2010 03:23:53 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o9F3NrJG047292; Fri, 15 Oct 2010 03:23:53 GMT (envelope-from alc@svn.freebsd.org) Received: (from alc@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o9F3Nrlf047288; Fri, 15 Oct 2010 03:23:53 GMT (envelope-from alc@svn.freebsd.org) Message-Id: <201010150323.o9F3Nrlf047288@svn.freebsd.org> From: Alan Cox Date: Fri, 15 Oct 2010 03:23:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r213887 - in stable/8/sys: amd64/amd64 i386/i386 X-BeenThere: svn-src-stable-8@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 8-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 15 Oct 2010 03:23:54 -0000 Author: alc Date: Fri Oct 15 03:23:53 2010 New Revision: 213887 URL: http://svn.freebsd.org/changeset/base/213887 Log: MFC r209789 Correctly maintain the per-cpu field "curpmap" on amd64 just like we do on i386. The consequences of not doing so on amd64 became apparent with the introduction of the COUNT_IPIS and COUNT_XINVLTLB_HITS options. Specifically, single-threaded applications were generating unnecessary IPIs to shoot-down the TLB on other processors. MFC r209887 Reduce the number of global TLB shootdowns generated by pmap_qenter(). Specifically, teach pmap_qenter() to recognize the case when it is being asked to replace a mapping with the very same mapping and not generate a shootdown. Modified: stable/8/sys/amd64/amd64/cpu_switch.S stable/8/sys/amd64/amd64/pmap.c stable/8/sys/i386/i386/pmap.c Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/xen/xenpci/ (props changed) Modified: stable/8/sys/amd64/amd64/cpu_switch.S ============================================================================== --- stable/8/sys/amd64/amd64/cpu_switch.S Fri Oct 15 02:58:49 2010 (r213886) +++ stable/8/sys/amd64/amd64/cpu_switch.S Fri Oct 15 03:23:53 2010 (r213887) @@ -69,16 +69,13 @@ * %rsi = newtd */ ENTRY(cpu_throw) + movl PCPU(CPUID),%eax testq %rdi,%rdi - jnz 1f - movq PCPU(IDLETHREAD),%rdi -1: - movq TD_PCB(%rdi),%r8 /* Old pcb */ - movl PCPU(CPUID), %eax + jz 1f /* release bit from old pm_active */ - movq TD_PROC(%rdi), %rdx /* oldtd->td_proc */ - movq P_VMSPACE(%rdx), %rdx /* proc->p_vmspace */ - LK btrl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* clear old */ + movq PCPU(CURPMAP),%rdx + LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ +1: movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ movq PCB_CR3(%r8),%rdx movq %rdx,%cr3 /* new address space */ @@ -140,15 +137,16 @@ swinact: movq %rcx,%cr3 /* new address space */ movl PCPU(CPUID), %eax /* Release bit from old pmap->pm_active */ - movq TD_PROC(%rdi), %rcx /* oldproc */ - movq P_VMSPACE(%rcx), %rcx - LK btrl %eax, VM_PMAP+PM_ACTIVE(%rcx) /* clear old */ + movq PCPU(CURPMAP),%rcx + LK btrl %eax,PM_ACTIVE(%rcx) /* clear old */ SETLK %rdx, TD_LOCK(%rdi) /* Release the old thread */ swact: /* Set bit in new pmap->pm_active */ movq TD_PROC(%rsi),%rdx /* newproc */ movq P_VMSPACE(%rdx), %rdx - LK btsl %eax, VM_PMAP+PM_ACTIVE(%rdx) /* set new */ + addq $VM_PMAP,%rdx + LK btsl %eax,PM_ACTIVE(%rdx) /* set new */ + movq %rdx,PCPU(CURPMAP) sw1: #if defined(SCHED_ULE) && defined(SMP) Modified: stable/8/sys/amd64/amd64/pmap.c ============================================================================== --- stable/8/sys/amd64/amd64/pmap.c Fri Oct 15 02:58:49 2010 (r213886) +++ stable/8/sys/amd64/amd64/pmap.c Fri Oct 15 03:23:53 2010 (r213887) @@ -1258,19 +1258,22 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *endpte, oldpte, *pte; + pt_entry_t *endpte, oldpte, pa, *pte; + vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { - oldpte |= *pte; - pte_store(pte, VM_PAGE_TO_PHYS(*ma) | PG_G | - pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V); + m = *ma++; + pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); + if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { + oldpte |= *pte; + pte_store(pte, pa | PG_G | PG_RW | PG_V); + } pte++; - ma++; } - if ((oldpte & PG_V) != 0) + if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } @@ -1500,6 +1503,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_root = NULL; pmap->pm_active = 0; + PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } @@ -4873,6 +4877,7 @@ if (oldpmap) /* XXX FIXME */ cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); td->td_pcb->pcb_cr3 = cr3; load_cr3(cr3); + PCPU_SET(curpmap, pmap); critical_exit(); } Modified: stable/8/sys/i386/i386/pmap.c ============================================================================== --- stable/8/sys/i386/i386/pmap.c Fri Oct 15 02:58:49 2010 (r213886) +++ stable/8/sys/i386/i386/pmap.c Fri Oct 15 03:23:53 2010 (r213887) @@ -1440,19 +1440,22 @@ pmap_map(vm_offset_t *virt, vm_paddr_t s void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { - pt_entry_t *endpte, oldpte, *pte; + pt_entry_t *endpte, oldpte, pa, *pte; + vm_page_t m; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { - oldpte |= *pte; - pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | - pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V); + m = *ma++; + pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); + if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { + oldpte |= *pte; + pte_store(pte, pa | pgeflag | PG_RW | PG_V); + } pte++; - ma++; } - if ((oldpte & PG_V) != 0) + if (__predict_false((oldpte & PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); }