Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 27 Sep 2018 14:05:45 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r338963 - in head/sys: amd64/amd64 conf
Message-ID:  <201809271405.w8RE5jxZ035293@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Sep 27 14:05:44 2018
New Revision: 338963
URL: https://svnweb.freebsd.org/changeset/base/338963

Log:
  amd64: implement memcmp in assembly
  
  Both the in-kernel C variant and libc asm variant have very poor performance.
  The former compiles to a single byte comparison loop, which breaks down even
  for small sizes. The latter uses rep cmpsq/b which turn out to have very poor
  throughput and are slower than a hand-coded 32-byte comparison loop.
  
  Depending on size this is about 3-4 times faster than the current routines.
  
  Reviewed by:	kib
  Approved by:	re (gjb)
  Differential Revision:	https://reviews.freebsd.org/D17328

Modified:
  head/sys/amd64/amd64/support.S
  head/sys/conf/files
  head/sys/conf/files.arm
  head/sys/conf/files.arm64
  head/sys/conf/files.i386
  head/sys/conf/files.mips
  head/sys/conf/files.powerpc
  head/sys/conf/files.riscv
  head/sys/conf/files.sparc64

Modified: head/sys/amd64/amd64/support.S
==============================================================================
--- head/sys/amd64/amd64/support.S	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/amd64/amd64/support.S	Thu Sep 27 14:05:44 2018	(r338963)
@@ -101,6 +101,100 @@ ENTRY(sse2_pagezero)
 END(sse2_pagezero)
 
 /*
+ * memcmpy(b1, b2, len)
+ *	   rdi,rsi,len
+ */
+ENTRY(memcmp)
+	PUSH_FRAME_POINTER
+	cmpq	$16,%rdx
+	jae	5f
+1:
+	testq	%rdx,%rdx
+	je	3f
+	xorl	%ecx,%ecx
+2:
+	movzbl	(%rdi,%rcx,1),%eax
+	movzbl	(%rsi,%rcx,1),%r8d
+	cmpb	%r8b,%al
+	jne	4f
+	addq    $1,%rcx
+	cmpq    %rcx,%rdx
+	jz	3f
+	movzbl	(%rdi,%rcx,1),%eax
+	movzbl	(%rsi,%rcx,1),%r8d
+	cmpb	%r8b,%al
+	jne	4f
+	addq	$1,%rcx
+	cmpq	%rcx,%rdx
+	jz	3f
+	movzbl	(%rdi,%rcx,1),%eax
+	movzbl	(%rsi,%rcx,1),%r8d
+	cmpb	%r8b,%al
+	jne	4f
+	addq	$1,%rcx
+	cmpq	%rcx,%rdx
+	jz	3f
+	movzbl	(%rdi,%rcx,1),%eax
+	movzbl	(%rsi,%rcx,1),%r8d
+	cmpb	%r8b,%al
+	jne	4f
+	addq	$1,%rcx
+	cmpq	%rcx,%rdx
+	jne	2b
+3:
+	xorl	%eax,%eax
+	POP_FRAME_POINTER
+	ret
+4:
+	subl	%r8d,%eax
+	POP_FRAME_POINTER
+	ret
+5:
+	cmpq	$32,%rdx
+	jae	7f
+6:
+	/*
+	 * 8 bytes
+	 */
+	movq    (%rdi),%r8
+	movq    (%rsi),%r9
+	cmpq    %r8,%r9
+	jne	1b
+	leaq	8(%rdi),%rdi
+	leaq	8(%rsi),%rsi
+	subq	$8,%rdx
+	cmpq	$8,%rdx
+	jae	6b
+	jl	1b
+	jmp	3b
+7:
+	/*
+	 * 32 bytes
+	 */
+	movq    (%rsi),%r8
+	movq    8(%rsi),%r9
+	subq    (%rdi),%r8
+	subq    8(%rdi),%r9
+	or	%r8,%r9
+	jnz	1b
+
+	movq    16(%rsi),%r8
+	movq    24(%rsi),%r9
+	subq    16(%rdi),%r8
+	subq    24(%rdi),%r9
+	or	%r8,%r9
+	jnz	1b
+
+	leaq    32(%rdi),%rdi
+	leaq    32(%rsi),%rsi
+	subq    $32,%rdx
+	cmpq    $32,%rdx
+	jae	7b
+	jnz	1b
+	jmp	3b
+END(memcmp)
+
+/*
  * memmove(dst, src, cnt)
  *         rdi, rsi, rdx
  * Adapted from bcopy written by:

Modified: head/sys/conf/files
==============================================================================
--- head/sys/conf/files	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files	Thu Sep 27 14:05:44 2018	(r338963)
@@ -4041,7 +4041,6 @@ libkern/murmur3_32.c		standard
 libkern/mcount.c		optional profiling-routine
 libkern/memcchr.c		standard
 libkern/memchr.c		standard
-libkern/memcmp.c		standard
 libkern/memmem.c		optional gdb
 libkern/qsort.c			standard
 libkern/qsort_r.c		standard

Modified: head/sys/conf/files.arm
==============================================================================
--- head/sys/conf/files.arm	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.arm	Thu Sep 27 14:05:44 2018	(r338963)
@@ -163,6 +163,7 @@ libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
 libkern/lshrdi3.c		standard
+libkern/memcmp.c		standard
 libkern/moddi3.c		standard
 libkern/qdivrem.c		standard
 libkern/ucmpdi2.c		standard

Modified: head/sys/conf/files.arm64
==============================================================================
--- head/sys/conf/files.arm64	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.arm64	Thu Sep 27 14:05:44 2018	(r338963)
@@ -244,6 +244,7 @@ libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
+libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/arm64/crc32c_armv8.S	standard
 cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S	optional zfs | dtrace compile-with "${CDDL_C}"

Modified: head/sys/conf/files.i386
==============================================================================
--- head/sys/conf/files.i386	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.i386	Thu Sep 27 14:05:44 2018	(r338963)
@@ -548,6 +548,7 @@ kern/subr_sfbuf.c		standard
 libkern/divdi3.c		standard
 libkern/ffsll.c			standard
 libkern/flsll.c			standard
+libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/moddi3.c		standard
 libkern/qdivrem.c		standard

Modified: head/sys/conf/files.mips
==============================================================================
--- head/sys/conf/files.mips	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.mips	Thu Sep 27 14:05:44 2018	(r338963)
@@ -65,6 +65,7 @@ libkern/cmpdi2.c			optional	mips | mipshf | mipsel | m
 libkern/ucmpdi2.c			optional	mips | mipshf | mipsel | mipselhf
 libkern/ashldi3.c			standard
 libkern/ashrdi3.c			standard
+libkern/memcmp.c			standard
 
 # cfe support
 dev/cfe/cfe_api.c			optional	cfe

Modified: head/sys/conf/files.powerpc
==============================================================================
--- head/sys/conf/files.powerpc	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.powerpc	Thu Sep 27 14:05:44 2018	(r338963)
@@ -98,6 +98,7 @@ libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
 libkern/lshrdi3.c		optional	powerpc | powerpcspe
+libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/moddi3.c		optional	powerpc | powerpcspe
 libkern/qdivrem.c		optional	powerpc | powerpcspe

Modified: head/sys/conf/files.riscv
==============================================================================
--- head/sys/conf/files.riscv	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.riscv	Thu Sep 27 14:05:44 2018	(r338963)
@@ -22,6 +22,7 @@ libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
+libkern/memcmp.c		standard
 libkern/memset.c		standard
 riscv/riscv/autoconf.c		standard
 riscv/riscv/bus_machdep.c	standard

Modified: head/sys/conf/files.sparc64
==============================================================================
--- head/sys/conf/files.sparc64	Thu Sep 27 13:54:09 2018	(r338962)
+++ head/sys/conf/files.sparc64	Thu Sep 27 14:05:44 2018	(r338963)
@@ -71,6 +71,7 @@ libkern/ffsll.c			standard
 libkern/fls.c			standard
 libkern/flsl.c			standard
 libkern/flsll.c			standard
+libkern/memcmp.c		standard
 sparc64/central/central.c	optional	central
 sparc64/ebus/ebus.c		optional	ebus
 sparc64/ebus/epic.c		optional	epic ebus



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809271405.w8RE5jxZ035293>