From owner-freebsd-mips@FreeBSD.ORG Fri Nov 18 07:45:15 2011 Return-Path: Delivered-To: freebsd-mips@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id CD7FA1065670 for ; Fri, 18 Nov 2011 07:45:15 +0000 (UTC) (envelope-from c.jayachandran@gmail.com) Received: from mail-ey0-f182.google.com (mail-ey0-f182.google.com [209.85.215.182]) by mx1.freebsd.org (Postfix) with ESMTP id 4D6EE8FC13 for ; Fri, 18 Nov 2011 07:45:15 +0000 (UTC) Received: by eyd10 with SMTP id 10so4264260eyd.13 for ; Thu, 17 Nov 2011 23:45:14 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=mime-version:sender:date:x-google-sender-auth:message-id:subject :from:to:content-type:content-transfer-encoding; bh=T7+XBDiRdKhnWlyVS6XQyUT2hCiOfTJoZ67KO4pA+LA=; b=LovdviYiEP2cQazBVmBwOKN5WSIZijzsu32VX9qDbqhdlFVlR51L0e4EKwd73O5Ix3 /13nv7zAQBjzpl/aCMUzvyTjD+edtnaGMIW1nd2Ykz3f+gH6C2afD29oXI0lm7XO9DW3 E92tpSCyOoGwwWgQ7nayBJDrcWT87oAPkmypY= MIME-Version: 1.0 Received: by 10.180.105.102 with SMTP id gl6mr1817147wib.46.1321602312587; Thu, 17 Nov 2011 23:45:12 -0800 (PST) Sender: c.jayachandran@gmail.com Received: by 10.216.18.9 with HTTP; Thu, 17 Nov 2011 23:45:12 -0800 (PST) Date: Fri, 18 Nov 2011 13:15:12 +0530 X-Google-Sender-Auth: jbjiLLK7Qagxku4anqLNhqvpSLc Message-ID: From: "Jayachandran C." To: freebsd-mips@freebsd.org Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: quoted-printable Subject: pmap changes for mips SMP X-BeenThere: freebsd-mips@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: Porting FreeBSD to MIPS List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 18 Nov 2011 07:45:15 -0000 I've committed this change to MIPS pmap.c, this optimization gives significant improvement in fork/exec performance on SMP systems with large number of CPUs. I have stress tested it on XLR/XLP, but let me know if this causes regressions on any other platforms. Thanks, JC. ---------- Forwarded message ---------- From: Jayachandran C. Date: Thu, Nov 17, 2011 at 6:44 PM Subject: svn commit: r227623 - head/sys/mips/mips Author: jchandra Date: Thu Nov 17 13:14:59 2011 New Revision: 227623 URL: http://svn.freebsd.org/changeset/base/227623 Log: =A0Do pmap update only on active CPUs. =A0The pmap update_page/invalidate_page/invalidate_all operations has to be =A0done only on active cpus. In the simplest case, if the process is not =A0active on any other CPUs, we can just do the operation on the current CP= U. =A0This change replaces the call to smp_rendezvous() for these operations w= ith =A0smp_rendezvous_cpus() in case there more than one active CPU, or with a = direct =A0function call if there is just one active CPU. =A0This change give significant performance increase in fork/exec benchmark= s =A0on XLR/XLS/XLP with 32 cpus. =A0Reviewed by: =A0alc Modified: =A0head/sys/mips/mips/pmap.c Modified: head/sys/mips/mips/pmap.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D --- head/sys/mips/mips/pmap.c =A0 Thu Nov 17 13:14:07 2011 =A0 =A0 =A0 =A0(= r227622) +++ head/sys/mips/mips/pmap.c =A0 Thu Nov 17 13:14:59 2011 =A0 =A0 =A0 =A0(= r227623) @@ -181,7 +181,6 @@ static pt_entry_t init_pte_prot(vm_offse =A0#ifdef SMP =A0static void pmap_invalidate_page_action(void *arg); -static void pmap_invalidate_all_action(void *arg); =A0static void pmap_update_page_action(void *arg); =A0#endif @@ -622,119 +621,94 @@ pmap_init(void) =A0* Low level helper routines..... =A0***************************************************/ +#ifdef SMP =A0static __inline void -pmap_invalidate_all_local(pmap_t pmap) +pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) =A0{ - =A0 =A0 =A0 u_int cpuid; + =A0 =A0 =A0 int =A0 =A0 cpuid, cpu, self; + =A0 =A0 =A0 cpuset_t active_cpus; + =A0 =A0 =A0 sched_pin(); + =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 smp_rendezvous(NULL, fn, NULL, arg); + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out; + =A0 =A0 =A0 } + =A0 =A0 =A0 /* Force ASID update on inactive CPUs */ + =A0 =A0 =A0 CPU_FOREACH(cpu) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!CPU_ISSET(cpu, &pmap->pm_active)) + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpu].gen =3D 0; + =A0 =A0 =A0 } =A0 =A0 =A0 =A0cpuid =3D PCPU_GET(cpuid); + =A0 =A0 =A0 /* + =A0 =A0 =A0 =A0* XXX: barrier/locking for active? + =A0 =A0 =A0 =A0* + =A0 =A0 =A0 =A0* Take a snapshot of active here, any further changes are = ignored. + =A0 =A0 =A0 =A0* tlb update/invalidate should be harmless on inactive CPU= s + =A0 =A0 =A0 =A0*/ + =A0 =A0 =A0 active_cpus =3D pmap->pm_active; + =A0 =A0 =A0 self =3D CPU_ISSET(cpuid, &active_cpus); + =A0 =A0 =A0 CPU_CLR(cpuid, &active_cpus); + =A0 =A0 =A0 /* Optimize for the case where this cpu is the only active on= e */ + =A0 =A0 =A0 if (CPU_EMPTY(&active_cpus)) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (self) + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg); + =A0 =A0 =A0 } else { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (self) + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 CPU_SET(cpuid, &active_cpus); + =A0 =A0 =A0 =A0 =A0 =A0 =A0 smp_rendezvous_cpus(active_cpus, NULL, fn, NU= LL, arg); + =A0 =A0 =A0 } +out: + =A0 =A0 =A0 sched_unpin(); +} +#else /* !SMP */ +static __inline void +pmap_call_on_active_cpus(pmap_t pmap, void (*fn)(void *), void *arg) +{ + =A0 =A0 =A0 int =A0 =A0 cpuid; - =A0 =A0 =A0 if (pmap =3D=3D kernel_pmap) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_all(); + =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { + =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return; =A0 =A0 =A0 =A0} - =A0 =A0 =A0 if (CPU_ISSET(cpuid, &pmap->pm_active)) - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_all_user(pmap); - =A0 =A0 =A0 else + =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid); + =A0 =A0 =A0 if (!CPU_ISSET(cpuid, &pmap->pm_active)) =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pmap->pm_asid[cpuid].gen =3D 0; + =A0 =A0 =A0 else + =A0 =A0 =A0 =A0 =A0 =A0 =A0 fn(arg); =A0} +#endif /* SMP */ -#ifdef SMP =A0static void =A0pmap_invalidate_all(pmap_t pmap) =A0{ - =A0 =A0 =A0 smp_rendezvous(0, pmap_invalidate_all_action, 0, pmap); + =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, + =A0 =A0 =A0 =A0 =A0 (void (*)(void *))tlb_invalidate_all_user, pmap); =A0} -static void -pmap_invalidate_all_action(void *arg) -{ - - =A0 =A0 =A0 pmap_invalidate_all_local((pmap_t)arg); -} -#else -static void -pmap_invalidate_all(pmap_t pmap) -{ - - =A0 =A0 =A0 pmap_invalidate_all_local(pmap); -} -#endif - -static __inline void -pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) -{ - =A0 =A0 =A0 u_int cpuid; - - =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid); - - =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_invalidate_address(pmap, va); - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 if (pmap->pm_asid[cpuid].gen !=3D PCPU_GET(asid_generation)) - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 else if (!CPU_ISSET(cpuid, &pmap->pm_active)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpuid].gen =3D 0; - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 tlb_invalidate_address(pmap, va); -} - -#ifdef SMP =A0struct pmap_invalidate_page_arg { =A0 =A0 =A0 =A0pmap_t pmap; =A0 =A0 =A0 =A0vm_offset_t va; =A0}; =A0static void -pmap_invalidate_page(pmap_t pmap, vm_offset_t va) -{ - =A0 =A0 =A0 struct pmap_invalidate_page_arg arg; - - =A0 =A0 =A0 arg.pmap =3D pmap; - =A0 =A0 =A0 arg.va =3D va; - =A0 =A0 =A0 smp_rendezvous(0, pmap_invalidate_page_action, 0, &arg); -} - -static void =A0pmap_invalidate_page_action(void *arg) =A0{ =A0 =A0 =A0 =A0struct pmap_invalidate_page_arg *p =3D arg; - =A0 =A0 =A0 pmap_invalidate_page_local(p->pmap, p->va); + =A0 =A0 =A0 tlb_invalidate_address(p->pmap, p->va); =A0} -#else + =A0static void =A0pmap_invalidate_page(pmap_t pmap, vm_offset_t va) =A0{ + =A0 =A0 =A0 struct pmap_invalidate_page_arg arg; - =A0 =A0 =A0 pmap_invalidate_page_local(pmap, va); -} -#endif - -static __inline void -pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) -{ - =A0 =A0 =A0 u_int cpuid; - - =A0 =A0 =A0 cpuid =3D PCPU_GET(cpuid); - - =A0 =A0 =A0 if (is_kernel_pmap(pmap)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_update(pmap, va, pte); - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 if (pmap->pm_asid[cpuid].gen !=3D PCPU_GET(asid_generation)) - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 else if (!CPU_ISSET(cpuid, &pmap->pm_active)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap->pm_asid[cpuid].gen =3D 0; - =A0 =A0 =A0 =A0 =A0 =A0 =A0 return; - =A0 =A0 =A0 } - =A0 =A0 =A0 tlb_update(pmap, va, pte); + =A0 =A0 =A0 arg.pmap =3D pmap; + =A0 =A0 =A0 arg.va =3D va; + =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, pmap_invalidate_page_action, &= arg); =A0} -#ifdef SMP =A0struct pmap_update_page_arg { =A0 =A0 =A0 =A0pmap_t pmap; =A0 =A0 =A0 =A0vm_offset_t va; @@ -742,31 +716,23 @@ struct pmap_update_page_arg { =A0}; =A0static void -pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) -{ - =A0 =A0 =A0 struct pmap_update_page_arg arg; - - =A0 =A0 =A0 arg.pmap =3D pmap; - =A0 =A0 =A0 arg.va =3D va; - =A0 =A0 =A0 arg.pte =3D pte; - =A0 =A0 =A0 smp_rendezvous(0, pmap_update_page_action, 0, &arg); -} - -static void =A0pmap_update_page_action(void *arg) =A0{ =A0 =A0 =A0 =A0struct pmap_update_page_arg *p =3D arg; - =A0 =A0 =A0 pmap_update_page_local(p->pmap, p->va, p->pte); + =A0 =A0 =A0 tlb_update(p->pmap, p->va, p->pte); =A0} -#else + =A0static void =A0pmap_update_page(pmap_t pmap, vm_offset_t va, pt_entry_t pte) =A0{ + =A0 =A0 =A0 struct pmap_update_page_arg arg; - =A0 =A0 =A0 pmap_update_page_local(pmap, va, pte); + =A0 =A0 =A0 arg.pmap =3D pmap; + =A0 =A0 =A0 arg.va =3D va; + =A0 =A0 =A0 arg.pte =3D pte; + =A0 =A0 =A0 pmap_call_on_active_cpus(pmap, pmap_update_page_action, &arg)= ; =A0} -#endif =A0/* =A0* =A0 =A0 Routine: =A0 =A0 =A0 =A0pmap_extract @@ -3213,7 +3179,7 @@ pmap_emulate_modified(pmap_t pmap, vm_of =A0#ifdef SMP =A0 =A0 =A0 =A0/* It is possible that some other CPU changed m-bit */ =A0 =A0 =A0 =A0if (!pte_test(pte, PTE_V) || pte_test(pte, PTE_D)) { - =A0 =A0 =A0 =A0 =A0 =A0 =A0 pmap_update_page_local(pmap, va, *pte); + =A0 =A0 =A0 =A0 =A0 =A0 =A0 tlb_update(pmap, va, *pte); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0PMAP_UNLOCK(pmap); =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (0); =A0 =A0 =A0 =A0} @@ -3227,7 +3193,7 @@ pmap_emulate_modified(pmap_t pmap, vm_of =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return (1); =A0 =A0 =A0 =A0} =A0 =A0 =A0 =A0pte_set(pte, PTE_D); - =A0 =A0 =A0 pmap_update_page_local(pmap, va, *pte); + =A0 =A0 =A0 tlb_update(pmap, va, *pte); =A0 =A0 =A0 =A0pa =3D TLBLO_PTE_TO_PA(*pte); =A0 =A0 =A0 =A0if (!page_is_managed(pa)) =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0panic("pmap_emulate_modified: unmanaged page= ");