Date: Sat, 26 Jun 2021 16:28:25 GMT From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: a3503647f776 - stable/12 - amd64: move memcmp checks upfront Message-ID: <202106261628.15QGSPg1035235@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/12 has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=a3503647f776a96ae8e65b6225cc4f29ad573bf9 commit a3503647f776a96ae8e65b6225cc4f29ad573bf9 Author: Mateusz Guzik <mjg@FreeBSD.org> AuthorDate: 2021-01-31 15:46:18 +0000 Commit: Mateusz Guzik <mjg@FreeBSD.org> CommitDate: 2021-06-26 15:54:58 +0000 amd64: move memcmp checks upfront This is a tradeoff which saves jumps for smaller sizes while making the 8-16 range slower (roughly in line with the other cases). Tested with glibc test suite. For example size 3 (most common with vfs namecache) (ops/s): before: 407086026 after: 461391995 The regressed range of 8-16 (with 8 as example): before: 540850489 after: 461671032 (cherry picked from commit f1be262ec11c1c35e6485f432415b5b52adb505d) --- lib/libc/amd64/string/memcmp.S | 50 ++++++++++++++++++++++------------------ sys/amd64/amd64/support.S | 52 +++++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S index 231ab2175804..04c32bebe439 100644 --- a/lib/libc/amd64/string/memcmp.S +++ b/lib/libc/amd64/string/memcmp.S @@ -39,9 +39,25 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -51,9 +67,8 @@ ENTRY(memcmp) cmpq %r8,%r9 jne 10081608f ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -63,9 +78,8 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 10040804f ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -75,15 +89,7 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 1f ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -104,7 +110,7 @@ ALIGN_TEXT cmpq %r8,%r9 jne 10163224f ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -134,7 +140,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -146,29 +152,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 93d2d17150cc..1a08315c2c46 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -117,9 +117,26 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + POP_FRAME_POINTER + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -130,9 +147,8 @@ ENTRY(memcmp) jne 10081608f POP_FRAME_POINTER ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -143,9 +159,8 @@ ENTRY(memcmp) jne 10040804f POP_FRAME_POINTER ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -156,16 +171,7 @@ ENTRY(memcmp) jne 1f POP_FRAME_POINTER ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - POP_FRAME_POINTER - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -187,7 +193,7 @@ ALIGN_TEXT jne 10163224f POP_FRAME_POINTER ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -218,7 +224,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -230,29 +236,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202106261628.15QGSPg1035235>