Date: Thu, 27 Sep 2018 14:05:45 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r338963 - in head/sys: amd64/amd64 conf Message-ID: <201809271405.w8RE5jxZ035293@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Thu Sep 27 14:05:44 2018 New Revision: 338963 URL: https://svnweb.freebsd.org/changeset/base/338963 Log: amd64: implement memcmp in assembly Both the in-kernel C variant and libc asm variant have very poor performance. The former compiles to a single byte comparison loop, which breaks down even for small sizes. The latter uses rep cmpsq/b which turn out to have very poor throughput and are slower than a hand-coded 32-byte comparison loop. Depending on size this is about 3-4 times faster than the current routines. Reviewed by: kib Approved by: re (gjb) Differential Revision: https://reviews.freebsd.org/D17328 Modified: head/sys/amd64/amd64/support.S head/sys/conf/files head/sys/conf/files.arm head/sys/conf/files.arm64 head/sys/conf/files.i386 head/sys/conf/files.mips head/sys/conf/files.powerpc head/sys/conf/files.riscv head/sys/conf/files.sparc64 Modified: head/sys/amd64/amd64/support.S ============================================================================== --- head/sys/amd64/amd64/support.S Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/amd64/amd64/support.S Thu Sep 27 14:05:44 2018 (r338963) @@ -101,6 +101,100 @@ ENTRY(sse2_pagezero) END(sse2_pagezero) /* + * memcmpy(b1, b2, len) + * rdi,rsi,len + */ +ENTRY(memcmp) + PUSH_FRAME_POINTER + cmpq $16,%rdx + jae 5f +1: + testq %rdx,%rdx + je 3f + xorl %ecx,%ecx +2: + movzbl (%rdi,%rcx,1),%eax + movzbl (%rsi,%rcx,1),%r8d + cmpb %r8b,%al + jne 4f + addq $1,%rcx + cmpq %rcx,%rdx + jz 3f + movzbl (%rdi,%rcx,1),%eax + movzbl (%rsi,%rcx,1),%r8d + cmpb %r8b,%al + jne 4f + addq $1,%rcx + cmpq %rcx,%rdx + jz 3f + movzbl (%rdi,%rcx,1),%eax + movzbl (%rsi,%rcx,1),%r8d + cmpb %r8b,%al + jne 4f + addq $1,%rcx + cmpq %rcx,%rdx + jz 3f + movzbl (%rdi,%rcx,1),%eax + movzbl (%rsi,%rcx,1),%r8d + cmpb %r8b,%al + jne 4f + addq $1,%rcx + cmpq %rcx,%rdx + jne 2b +3: + xorl %eax,%eax + POP_FRAME_POINTER + ret +4: + subl %r8d,%eax + POP_FRAME_POINTER + ret +5: + cmpq $32,%rdx + jae 7f +6: + /* + * 8 bytes + */ + movq (%rdi),%r8 + movq (%rsi),%r9 + cmpq %r8,%r9 + jne 1b + leaq 8(%rdi),%rdi + leaq 8(%rsi),%rsi + subq $8,%rdx + cmpq $8,%rdx + jae 6b + jl 1b + jmp 3b +7: + /* + * 32 bytes + */ + movq (%rsi),%r8 + movq 8(%rsi),%r9 + subq (%rdi),%r8 + subq 8(%rdi),%r9 + or %r8,%r9 + jnz 1b + + movq 16(%rsi),%r8 + movq 24(%rsi),%r9 + subq 16(%rdi),%r8 + subq 24(%rdi),%r9 + or %r8,%r9 + jnz 1b + + leaq 32(%rdi),%rdi + leaq 32(%rsi),%rsi + subq $32,%rdx + cmpq $32,%rdx + jae 7b + jnz 1b + jmp 3b +END(memcmp) + +/* * memmove(dst, src, cnt) * rdi, rsi, rdx * Adapted from bcopy written by: Modified: head/sys/conf/files ============================================================================== --- head/sys/conf/files Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files Thu Sep 27 14:05:44 2018 (r338963) @@ -4041,7 +4041,6 @@ libkern/murmur3_32.c standard libkern/mcount.c optional profiling-routine libkern/memcchr.c standard libkern/memchr.c standard -libkern/memcmp.c standard libkern/memmem.c optional gdb libkern/qsort.c standard libkern/qsort_r.c standard Modified: head/sys/conf/files.arm ============================================================================== --- head/sys/conf/files.arm Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.arm Thu Sep 27 14:05:44 2018 (r338963) @@ -163,6 +163,7 @@ libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/lshrdi3.c standard +libkern/memcmp.c standard libkern/moddi3.c standard libkern/qdivrem.c standard libkern/ucmpdi2.c standard Modified: head/sys/conf/files.arm64 ============================================================================== --- head/sys/conf/files.arm64 Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.arm64 Thu Sep 27 14:05:44 2018 (r338963) @@ -244,6 +244,7 @@ libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard +libkern/memcmp.c standard libkern/memset.c standard libkern/arm64/crc32c_armv8.S standard cddl/contrib/opensolaris/common/atomic/aarch64/opensolaris_atomic.S optional zfs | dtrace compile-with "${CDDL_C}" Modified: head/sys/conf/files.i386 ============================================================================== --- head/sys/conf/files.i386 Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.i386 Thu Sep 27 14:05:44 2018 (r338963) @@ -548,6 +548,7 @@ kern/subr_sfbuf.c standard libkern/divdi3.c standard libkern/ffsll.c standard libkern/flsll.c standard +libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard Modified: head/sys/conf/files.mips ============================================================================== --- head/sys/conf/files.mips Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.mips Thu Sep 27 14:05:44 2018 (r338963) @@ -65,6 +65,7 @@ libkern/cmpdi2.c optional mips | mipshf | mipsel | m libkern/ucmpdi2.c optional mips | mipshf | mipsel | mipselhf libkern/ashldi3.c standard libkern/ashrdi3.c standard +libkern/memcmp.c standard # cfe support dev/cfe/cfe_api.c optional cfe Modified: head/sys/conf/files.powerpc ============================================================================== --- head/sys/conf/files.powerpc Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.powerpc Thu Sep 27 14:05:44 2018 (r338963) @@ -98,6 +98,7 @@ libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard libkern/lshrdi3.c optional powerpc | powerpcspe +libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c optional powerpc | powerpcspe libkern/qdivrem.c optional powerpc | powerpcspe Modified: head/sys/conf/files.riscv ============================================================================== --- head/sys/conf/files.riscv Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.riscv Thu Sep 27 14:05:44 2018 (r338963) @@ -22,6 +22,7 @@ libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard +libkern/memcmp.c standard libkern/memset.c standard riscv/riscv/autoconf.c standard riscv/riscv/bus_machdep.c standard Modified: head/sys/conf/files.sparc64 ============================================================================== --- head/sys/conf/files.sparc64 Thu Sep 27 13:54:09 2018 (r338962) +++ head/sys/conf/files.sparc64 Thu Sep 27 14:05:44 2018 (r338963) @@ -71,6 +71,7 @@ libkern/ffsll.c standard libkern/fls.c standard libkern/flsl.c standard libkern/flsll.c standard +libkern/memcmp.c standard sparc64/central/central.c optional central sparc64/ebus/ebus.c optional ebus sparc64/ebus/epic.c optional epic ebus
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201809271405.w8RE5jxZ035293>