From owner-svn-src-stable-7@FreeBSD.ORG Wed Sep 22 20:04:04 2010 Return-Path: Delivered-To: svn-src-stable-7@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 5D45F1065769; Wed, 22 Sep 2010 20:04:04 +0000 (UTC) (envelope-from marius@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 4C5B08FC12; Wed, 22 Sep 2010 20:04:04 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id o8MK44dA041744; Wed, 22 Sep 2010 20:04:04 GMT (envelope-from marius@svn.freebsd.org) Received: (from marius@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id o8MK444o041739; Wed, 22 Sep 2010 20:04:04 GMT (envelope-from marius@svn.freebsd.org) Message-Id: <201009222004.o8MK444o041739@svn.freebsd.org> From: Marius Strobl Date: Wed, 22 Sep 2010 20:04:04 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-7@freebsd.org X-SVN-Group: stable-7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r213021 - in stable/7/sys/sparc64: include sparc64 X-BeenThere: svn-src-stable-7@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 7-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 22 Sep 2010 20:04:04 -0000 Author: marius Date: Wed Sep 22 20:04:03 2010 New Revision: 213021 URL: http://svn.freebsd.org/changeset/base/213021 Log: MFC: rr212709, r212730 Add a VIS-based block copy function for SPARC64 V and later, which additionally takes advantage of the prefetch cache of these CPUs. Unlike the uncommitted US-III version, which provide no measurable speedup or even resulted in a slight slowdown on certain CPUs models compared to using the US-I version with these, the SPARC64 version actually results in a slight improvement. Modified: stable/7/sys/sparc64/include/md_var.h stable/7/sys/sparc64/sparc64/machdep.c stable/7/sys/sparc64/sparc64/support.S Directory Properties: stable/7/sys/ (props changed) stable/7/sys/cddl/contrib/opensolaris/ (props changed) stable/7/sys/contrib/dev/acpica/ (props changed) stable/7/sys/contrib/pf/ (props changed) Modified: stable/7/sys/sparc64/include/md_var.h ============================================================================== --- stable/7/sys/sparc64/include/md_var.h Wed Sep 22 20:03:59 2010 (r213020) +++ stable/7/sys/sparc64/include/md_var.h Wed Sep 22 20:04:03 2010 (r213021) @@ -58,6 +58,8 @@ struct md_utrap *utrap_hold(struct md_ut cpu_block_copy_t spitfire_block_copy; cpu_block_zero_t spitfire_block_zero; +cpu_block_copy_t zeus_block_copy; +cpu_block_zero_t zeus_block_zero; extern cpu_block_copy_t *cpu_block_copy; extern cpu_block_zero_t *cpu_block_zero; Modified: stable/7/sys/sparc64/sparc64/machdep.c ============================================================================== --- stable/7/sys/sparc64/sparc64/machdep.c Wed Sep 22 20:03:59 2010 (r213020) +++ stable/7/sys/sparc64/sparc64/machdep.c Wed Sep 22 20:04:03 2010 (r213021) @@ -486,7 +486,6 @@ sparc64_init(caddr_t mdp, u_long o1, u_l if (cpu_use_vis) { switch (cpu_impl) { case CPU_IMPL_SPARC64: - case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: @@ -500,6 +499,10 @@ sparc64_init(caddr_t mdp, u_long o1, u_l cpu_block_copy = spitfire_block_copy; cpu_block_zero = spitfire_block_zero; break; + case CPU_IMPL_SPARC64V: + cpu_block_copy = zeus_block_copy; + cpu_block_zero = zeus_block_zero; + break; } } Modified: stable/7/sys/sparc64/sparc64/support.S ============================================================================== --- stable/7/sys/sparc64/sparc64/support.S Wed Sep 22 20:03:59 2010 (r213020) +++ stable/7/sys/sparc64/sparc64/support.S Wed Sep 22 20:04:03 2010 (r213021) @@ -661,8 +661,121 @@ ENTRY(spitfire_block_copy) END(spitfire_block_copy) /* + * void zeus_block_copy(void *src, void *dst, size_t len) + */ +ENTRY(zeus_block_copy) + prefetch [%o0 + (0 * 64)], 0 + + rdpr %pil, %o3 + wrpr %g0, PIL_TICK, %pil + + wr %g0, ASI_BLK_S, %asi + wr %g0, FPRS_FEF, %fprs + + sub PCB_REG, TF_SIZEOF, %o4 + ldx [%o4 + TF_FPRS], %o5 + andcc %o5, FPRS_FEF, %g0 + bz,a,pt %xcc, 1f + nop + stda %f0, [PCB_REG + PCB_UFP + (0 * 64)] %asi + stda %f16, [PCB_REG + PCB_UFP + (1 * 64)] %asi + stda %f32, [PCB_REG + PCB_UFP + (2 * 64)] %asi + stda %f48, [PCB_REG + PCB_UFP + (3 * 64)] %asi + membar #Sync + + andn %o5, FPRS_FEF, %o5 + stx %o5, [%o4 + TF_FPRS] + ldx [PCB_REG + PCB_FLAGS], %o4 + or %o4, PCB_FEF, %o4 + stx %o4, [PCB_REG + PCB_FLAGS] + +1: wrpr %o3, 0, %pil + + ldd [%o0 + (0 * 8)], %f0 + prefetch [%o0 + (1 * 64)], 0 + ldd [%o0 + (1 * 8)], %f2 + prefetch [%o0 + (2 * 64)], 0 + fmovd %f0, %f32 + ldd [%o0 + (2 * 8)], %f4 + prefetch [%o0 + (3 * 64)], 0 + fmovd %f2, %f34 + ldd [%o0 + (3 * 8)], %f6 + prefetch [%o0 + (4 * 64)], 1 + fmovd %f4, %f36 + ldd [%o0 + (4 * 8)], %f8 + prefetch [%o0 + (8 * 64)], 1 + fmovd %f6, %f38 + ldd [%o0 + (5 * 8)], %f10 + prefetch [%o0 + (12 * 64)], 1 + fmovd %f8, %f40 + ldd [%o0 + (6 * 8)], %f12 + prefetch [%o0 + (16 * 64)], 1 + fmovd %f10, %f42 + ldd [%o0 + (7 * 8)], %f14 + ldd [%o0 + (8 * 8)], %f0 + sub %o2, 64, %o2 + add %o0, 64, %o0 + prefetch [%o0 + (19 * 64)], 1 + ba,pt %xcc, 2f + prefetch [%o0 + (23 * 64)], 1 + .align 32 + +2: ldd [%o0 + (1 * 8)], %f2 + fmovd %f12, %f44 + ldd [%o0 + (2 * 8)], %f4 + fmovd %f14, %f46 + stda %f32, [%o1] %asi + ldd [%o0 + (3 * 8)], %f6 + fmovd %f0, %f32 + ldd [%o0 + (4 * 8)], %f8 + fmovd %f2, %f34 + ldd [%o0 + (5 * 8)], %f10 + fmovd %f4, %f36 + ldd [%o0 + (6 * 8)], %f12 + fmovd %f6, %f38 + ldd [%o0 + (7 * 8)], %f14 + fmovd %f8, %f40 + ldd [%o0 + (8 * 8)], %f0 + fmovd %f10, %f42 + sub %o2, 64, %o2 + prefetch [%o0 + (3 * 64)], 0 + add %o1, 64, %o1 + prefetch [%o0 + (24 * 64)], 1 + add %o0, 64, %o0 + cmp %o2, 64 + 8 + bgu,pt %xcc, 2b + prefetch [%o0 + (12 * 64)], 1 + ldd [%o0 + (1 * 8)], %f2 + fsrc1 %f12, %f44 + ldd [%o0 + (2 * 8)], %f4 + fsrc1 %f14, %f46 + stda %f32, [%o1] %asi + ldd [%o0 + (3 * 8)], %f6 + fsrc1 %f0, %f32 + ldd [%o0 + (4 * 8)], %f8 + fsrc1 %f2, %f34 + ldd [%o0 + (5 * 8)], %f10 + fsrc1 %f4, %f36 + ldd [%o0 + (6 * 8)], %f12 + fsrc1 %f6, %f38 + ldd [%o0 + (7 * 8)], %f14 + fsrc1 %f8, %f40 + add %o1, 64, %o1 + fsrc1 %f10, %f42 + fsrc1 %f12, %f44 + fsrc1 %f14, %f46 + stda %f32, [%o1] %asi + membar #Sync + + retl + wr %g0, 0, %fprs +END(zeus_block_copy) + +/* * void spitfire_block_zero(void *dst, size_t len) + * void zeus_block_zero(void *dst, size_t len) */ +ALTENTRY(zeus_block_zero) ENTRY(spitfire_block_zero) rdpr %pil, %o3 wrpr %g0, PIL_TICK, %pil