From owner-dev-commits-src-all@freebsd.org Sat Jun 26 16:28:25 2021 Return-Path: Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 8A0DC655C71; Sat, 26 Jun 2021 16:28:25 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4GBznj3Nbbz3Ch2; Sat, 26 Jun 2021 16:28:25 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 5B95522180; Sat, 26 Jun 2021 16:28:25 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 15QGSPDb035236; Sat, 26 Jun 2021 16:28:25 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 15QGSPg1035235; Sat, 26 Jun 2021 16:28:25 GMT (envelope-from git) Date: Sat, 26 Jun 2021 16:28:25 GMT Message-Id: <202106261628.15QGSPg1035235@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org From: Mateusz Guzik Subject: git: a3503647f776 - stable/12 - amd64: move memcmp checks upfront MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: mjg X-Git-Repository: src X-Git-Refname: refs/heads/stable/12 X-Git-Reftype: branch X-Git-Commit: a3503647f776a96ae8e65b6225cc4f29ad573bf9 Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for all branches of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 26 Jun 2021 16:28:25 -0000 The branch stable/12 has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=a3503647f776a96ae8e65b6225cc4f29ad573bf9 commit a3503647f776a96ae8e65b6225cc4f29ad573bf9 Author: Mateusz Guzik AuthorDate: 2021-01-31 15:46:18 +0000 Commit: Mateusz Guzik CommitDate: 2021-06-26 15:54:58 +0000 amd64: move memcmp checks upfront This is a tradeoff which saves jumps for smaller sizes while making the 8-16 range slower (roughly in line with the other cases). Tested with glibc test suite. For example size 3 (most common with vfs namecache) (ops/s): before: 407086026 after: 461391995 The regressed range of 8-16 (with 8 as example): before: 540850489 after: 461671032 (cherry picked from commit f1be262ec11c1c35e6485f432415b5b52adb505d) --- lib/libc/amd64/string/memcmp.S | 50 ++++++++++++++++++++++------------------ sys/amd64/amd64/support.S | 52 +++++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S index 231ab2175804..04c32bebe439 100644 --- a/lib/libc/amd64/string/memcmp.S +++ b/lib/libc/amd64/string/memcmp.S @@ -39,9 +39,25 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -51,9 +67,8 @@ ENTRY(memcmp) cmpq %r8,%r9 jne 10081608f ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -63,9 +78,8 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 10040804f ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -75,15 +89,7 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 1f ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -104,7 +110,7 @@ ALIGN_TEXT cmpq %r8,%r9 jne 10163224f ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -134,7 +140,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -146,29 +152,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 93d2d17150cc..1a08315c2c46 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -117,9 +117,26 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + POP_FRAME_POINTER + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -130,9 +147,8 @@ ENTRY(memcmp) jne 10081608f POP_FRAME_POINTER ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -143,9 +159,8 @@ ENTRY(memcmp) jne 10040804f POP_FRAME_POINTER ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -156,16 +171,7 @@ ENTRY(memcmp) jne 1f POP_FRAME_POINTER ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - POP_FRAME_POINTER - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -187,7 +193,7 @@ ALIGN_TEXT jne 10163224f POP_FRAME_POINTER ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -218,7 +224,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -230,29 +236,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d