From owner-dev-commits-src-all@freebsd.org Sun Jan 31 16:07:31 2021 Return-Path: Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 80E404FB3E7; Sun, 31 Jan 2021 16:07:31 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4DTGDz2l99z3m8D; Sun, 31 Jan 2021 16:07:31 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4E61231A7; Sun, 31 Jan 2021 16:07:31 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 10VG7VDa081382; Sun, 31 Jan 2021 16:07:31 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 10VG7VZ8081381; Sun, 31 Jan 2021 16:07:31 GMT (envelope-from git) Date: Sun, 31 Jan 2021 16:07:31 GMT Message-Id: <202101311607.10VG7VZ8081381@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Mateusz Guzik Subject: git: f1be262ec11c - main - amd64: move memcmp checks upfront MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: mjg X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: f1be262ec11c1c35e6485f432415b5b52adb505d Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-all@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for all branches of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 31 Jan 2021 16:07:31 -0000 The branch main has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=f1be262ec11c1c35e6485f432415b5b52adb505d commit f1be262ec11c1c35e6485f432415b5b52adb505d Author: Mateusz Guzik AuthorDate: 2021-01-31 15:46:18 +0000 Commit: Mateusz Guzik CommitDate: 2021-01-31 16:07:20 +0000 amd64: move memcmp checks upfront This is a tradeoff which saves jumps for smaller sizes while making the 8-16 range slower (roughly in line with the other cases). Tested with glibc test suite. For example size 3 (most common with vfs namecache) (ops/s): before: 407086026 after: 461391995 The regressed range of 8-16 (with 8 as example): before: 540850489 after: 461671032 --- lib/libc/amd64/string/memcmp.S | 50 ++++++++++++++++++++++------------------ sys/amd64/amd64/support.S | 52 +++++++++++++++++++++++------------------- 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S index 67c7df280679..0c8121f9d885 100644 --- a/lib/libc/amd64/string/memcmp.S +++ b/lib/libc/amd64/string/memcmp.S @@ -45,9 +45,25 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -57,9 +73,8 @@ ENTRY(memcmp) cmpq %r8,%r9 jne 10081608f ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -69,9 +84,8 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 10040804f ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -81,15 +95,7 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 1f ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -110,7 +116,7 @@ ALIGN_TEXT cmpq %r8,%r9 jne 10163224f ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -140,7 +146,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -152,29 +158,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 49baa50ac294..b623fba277db 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -93,9 +93,26 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + POP_FRAME_POINTER + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -106,9 +123,8 @@ ENTRY(memcmp) jne 10081608f POP_FRAME_POINTER ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -119,9 +135,8 @@ ENTRY(memcmp) jne 10040804f POP_FRAME_POINTER ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -132,16 +147,7 @@ ENTRY(memcmp) jne 1f POP_FRAME_POINTER ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - POP_FRAME_POINTER - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -163,7 +169,7 @@ ALIGN_TEXT jne 10163224f POP_FRAME_POINTER ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -194,7 +200,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -206,29 +212,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d