Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 25 Oct 2016 14:04:35 +0000 (UTC)
From:      Andrew Turner <andrew@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r307909 - in head/sys: arm64/arm64 conf
Message-ID:  <201610251404.u9PE4ZgB002315@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: andrew
Date: Tue Oct 25 14:04:35 2016
New Revision: 307909
URL: https://svnweb.freebsd.org/changeset/base/307909

Log:
  Import the Cortex String memcpy and memmove into the kernel. On ThunderX
  these show a 9-10% reduction in user and system time for a buildworld -j48.
  
  Obtained from:	ABT Systems Ltd
  MFC after:	1 week
  Sponsored by:	The FreeBSD Foundation

Added:
  head/sys/arm64/arm64/memcpy.S
     - copied, changed from r307901, head/contrib/cortex-strings/src/aarch64/memcpy.S
  head/sys/arm64/arm64/memmove.S
     - copied, changed from r307901, head/contrib/cortex-strings/src/aarch64/memmove.S
Deleted:
  head/sys/arm64/arm64/bcopy.c
Modified:
  head/sys/conf/files.arm64

Copied and modified: head/sys/arm64/arm64/memcpy.S (from r307901, head/contrib/cortex-strings/src/aarch64/memcpy.S)
==============================================================================
--- head/contrib/cortex-strings/src/aarch64/memcpy.S	Tue Oct 25 05:45:47 2016	(r307901, copy source)
+++ head/sys/arm64/arm64/memcpy.S	Tue Oct 25 14:04:35 2016	(r307909)
@@ -52,6 +52,9 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64, unaligned accesses.
@@ -83,14 +86,6 @@
 
 #define L(l) .L ## l
 
-	.macro def_fn f p2align=0
-	.text
-	.p2align \p2align
-	.global \f
-	.type \f, %function
-\f:
-	.endm
-
 /* Copies are split into 3 main cases: small copies of up to 16 bytes,
    medium copies of 17..96 bytes which are fully unrolled. Large copies
    of more than 96 bytes align the destination and use an unrolled loop
@@ -100,7 +95,7 @@
    well as non-overlapping copies.
 */
 
-def_fn memcpy p2align=6
+ENTRY(memcpy)
 	prfm	PLDL1KEEP, [src]
 	add	srcend, src, count
 	add	dstend, dstin, count
@@ -221,5 +216,4 @@ L(copy_long):
 	stp	B_l, B_h, [dstend, -32]
 	stp	C_l, C_h, [dstend, -16]
 	ret
-
-	.size	memcpy, . - memcpy
+END(memcpy)

Copied and modified: head/sys/arm64/arm64/memmove.S (from r307901, head/contrib/cortex-strings/src/aarch64/memmove.S)
==============================================================================
--- head/contrib/cortex-strings/src/aarch64/memmove.S	Tue Oct 25 05:45:47 2016	(r307901, copy source)
+++ head/sys/arm64/arm64/memmove.S	Tue Oct 25 14:04:35 2016	(r307909)
@@ -52,19 +52,14 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
 /* Assumptions:
  *
  * ARMv8-a, AArch64, unaligned accesses
  */
 
-	.macro def_fn f p2align=0
-	.text
-	.p2align \p2align
-	.global \f
-	.type \f, %function
-\f:
-	.endm
-
 /* Parameters and result.  */
 #define dstin	x0
 #define src	x1
@@ -89,7 +84,12 @@
    unrolled loop processes 64 bytes per iteration.
 */
 
-def_fn memmove, 6
+ENTRY(bcopy)
+	/* Switch the input pointers when called as bcopy */
+	mov	x3, x1
+	mov	x1, x0
+	mov	x0, x3
+EENTRY(memmove)
 	sub	tmp1, dstin, src
 	cmp	count, 96
 	ccmp	tmp1, count, 2, hi
@@ -146,5 +146,5 @@ def_fn memmove, 6
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstin]
 3:	ret
-
-	.size	memmove, . - memmove
+EEND(memmove)
+END(bcopy)

Modified: head/sys/conf/files.arm64
==============================================================================
--- head/sys/conf/files.arm64	Tue Oct 25 14:01:13 2016	(r307908)
+++ head/sys/conf/files.arm64	Tue Oct 25 14:04:35 2016	(r307909)
@@ -72,7 +72,6 @@ arm64/acpica/OsdEnvironment.c	optional	a
 arm64/acpica/acpi_wakeup.c	optional	acpi
 arm64/acpica/pci_cfgreg.c	optional	acpi	pci
 arm64/arm64/autoconf.c		standard
-arm64/arm64/bcopy.c		standard
 arm64/arm64/bus_machdep.c	standard
 arm64/arm64/bus_space_asm.S	standard
 arm64/arm64/busdma_bounce.c	standard
@@ -98,6 +97,8 @@ arm64/arm64/in_cksum.c		optional	inet | 
 arm64/arm64/locore.S		standard	no-obj
 arm64/arm64/machdep.c		standard
 arm64/arm64/mem.c		standard
+arm64/arm64/memcpy.S		standard
+arm64/arm64/memmove.S		standard
 arm64/arm64/minidump_machdep.c	standard
 arm64/arm64/mp_machdep.c	optional	smp
 arm64/arm64/nexus.c		standard
@@ -178,7 +179,6 @@ libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
-libkern/memmove.c		standard
 libkern/memset.c		standard
 cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S	optional zfs | dtrace compile-with "${CDDL_C}"
 cddl/dev/dtrace/aarch64/dtrace_asm.S			optional dtrace compile-with "${DTRACE_S}"



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201610251404.u9PE4ZgB002315>