From owner-dev-commits-src-all@freebsd.org  Sat Jun 26 16:28:25 2021
Return-Path: <owner-dev-commits-src-all@freebsd.org>
Delivered-To: dev-commits-src-all@mailman.nyi.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1])
 by mailman.nyi.freebsd.org (Postfix) with ESMTP id 8A0DC655C71;
 Sat, 26 Jun 2021 16:28:25 +0000 (UTC) (envelope-from git@FreeBSD.org)
Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org
 [IPv6:2610:1c1:1:606c::19:3])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256
 client-signature RSA-PSS (4096 bits) client-digest SHA256)
 (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK))
 by mx1.freebsd.org (Postfix) with ESMTPS id 4GBznj3Nbbz3Ch2;
 Sat, 26 Jun 2021 16:28:25 +0000 (UTC) (envelope-from git@FreeBSD.org)
Received: from gitrepo.freebsd.org (gitrepo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:5])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256)
 (Client did not present a certificate)
 by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 5B95522180;
 Sat, 26 Jun 2021 16:28:25 +0000 (UTC) (envelope-from git@FreeBSD.org)
Received: from gitrepo.freebsd.org ([127.0.1.44])
 by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 15QGSPDb035236;
 Sat, 26 Jun 2021 16:28:25 GMT (envelope-from git@gitrepo.freebsd.org)
Received: (from git@localhost)
 by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 15QGSPg1035235;
 Sat, 26 Jun 2021 16:28:25 GMT (envelope-from git)
Date: Sat, 26 Jun 2021 16:28:25 GMT
Message-Id: <202106261628.15QGSPg1035235@gitrepo.freebsd.org>
To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org,
 dev-commits-src-branches@FreeBSD.org
From: Mateusz Guzik <mjg@FreeBSD.org>
Subject: git: a3503647f776 - stable/12 - amd64: move memcmp checks upfront
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
X-Git-Committer: mjg
X-Git-Repository: src
X-Git-Refname: refs/heads/stable/12
X-Git-Reftype: branch
X-Git-Commit: a3503647f776a96ae8e65b6225cc4f29ad573bf9
Auto-Submitted: auto-generated
X-BeenThere: dev-commits-src-all@freebsd.org
X-Mailman-Version: 2.1.34
Precedence: list
List-Id: Commit messages for all branches of the src repository
 <dev-commits-src-all.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/dev-commits-src-all>, 
 <mailto:dev-commits-src-all-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/dev-commits-src-all/>
List-Post: <mailto:dev-commits-src-all@freebsd.org>
List-Help: <mailto:dev-commits-src-all-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/dev-commits-src-all>, 
 <mailto:dev-commits-src-all-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sat, 26 Jun 2021 16:28:25 -0000

The branch stable/12 has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=a3503647f776a96ae8e65b6225cc4f29ad573bf9

commit a3503647f776a96ae8e65b6225cc4f29ad573bf9
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2021-01-31 15:46:18 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2021-06-26 15:54:58 +0000

    amd64: move memcmp checks upfront
    
    This is a tradeoff which saves jumps for smaller sizes while making
    the 8-16 range slower (roughly in line with the other cases).
    
    Tested with glibc test suite.
    
    For example size 3 (most common with vfs namecache) (ops/s):
    before: 407086026
    after:  461391995
    
    The regressed range of 8-16 (with 8 as example):
    before: 540850489
    after:  461671032
    
    (cherry picked from commit f1be262ec11c1c35e6485f432415b5b52adb505d)
---
 lib/libc/amd64/string/memcmp.S | 50 ++++++++++++++++++++++------------------
 sys/amd64/amd64/support.S      | 52 +++++++++++++++++++++++-------------------
 2 files changed, 57 insertions(+), 45 deletions(-)

diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S
index 231ab2175804..04c32bebe439 100644
--- a/lib/libc/amd64/string/memcmp.S
+++ b/lib/libc/amd64/string/memcmp.S
@@ -39,9 +39,25 @@ ENTRY(memcmp)
 	cmpq	$16,%rdx
 	ja	101632f
 
-100816:
 	cmpb	$8,%dl
-	jl	100408f
+	jg	100816f
+
+	cmpb	$4,%dl
+	jg	100408f
+
+	cmpb	$2,%dl
+	jge	100204f
+
+	cmpb	$1,%dl
+	jl	100000f
+	movzbl	(%rdi),%eax
+	movzbl	(%rsi),%r8d
+	subl	%r8d,%eax
+100000:
+	ret
+
+	ALIGN_TEXT
+100816:
 	movq	(%rdi),%r8
 	movq	(%rsi),%r9
 	cmpq	%r8,%r9
@@ -51,9 +67,8 @@ ENTRY(memcmp)
 	cmpq	%r8,%r9
 	jne	10081608f
 	ret
+	ALIGN_TEXT
 100408:
-	cmpb	$4,%dl
-	jl	100204f
 	movl	(%rdi),%r8d
 	movl	(%rsi),%r9d
 	cmpl	%r8d,%r9d
@@ -63,9 +78,8 @@ ENTRY(memcmp)
 	cmpl	%r8d,%r9d
 	jne	10040804f
 	ret
+	ALIGN_TEXT
 100204:
-	cmpb	$2,%dl
-	jl	100001f
 	movzwl	(%rdi),%r8d
 	movzwl	(%rsi),%r9d
 	cmpl	%r8d,%r9d
@@ -75,15 +89,7 @@ ENTRY(memcmp)
 	cmpl	%r8d,%r9d
 	jne	1f
 	ret
-100001:
-	cmpb	$1,%dl
-	jl	100000f
-	movzbl	(%rdi),%eax
-	movzbl	(%rsi),%r8d
-	subl	%r8d,%eax
-100000:
-	ret
-ALIGN_TEXT
+	ALIGN_TEXT
 101632:
 	cmpq	$32,%rdx
 	ja	103200f
@@ -104,7 +110,7 @@ ALIGN_TEXT
 	cmpq	%r8,%r9
 	jne	10163224f
 	ret
-ALIGN_TEXT
+	ALIGN_TEXT
 103200:
 	movq	(%rdi),%r8
 	movq	8(%rdi),%r9
@@ -134,7 +140,7 @@ ALIGN_TEXT
  *
  * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
  */
-ALIGN_TEXT
+	ALIGN_TEXT
 10320016:
 	leaq	16(%rdi),%rdi
 	leaq	16(%rsi),%rsi
@@ -146,29 +152,29 @@ ALIGN_TEXT
 	leaq	8(%rdi),%rdi
 	leaq	8(%rsi),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10081608:
 10163224:
 	leaq	-8(%rdi,%rdx),%rdi
 	leaq	-8(%rsi,%rdx),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10163216:
 	leaq	-16(%rdi,%rdx),%rdi
 	leaq	-16(%rsi,%rdx),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10163208:
 	leaq	8(%rdi),%rdi
 	leaq	8(%rsi),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10040804:
 	leaq	-4(%rdi,%rdx),%rdi
 	leaq	-4(%rsi,%rdx),%rsi
 	jmp	1f
 
-ALIGN_TEXT
+	ALIGN_TEXT
 80:
 	movl	(%rdi),%r8d
 	movl	(%rsi),%r9d
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 93d2d17150cc..1a08315c2c46 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -117,9 +117,26 @@ ENTRY(memcmp)
 	cmpq	$16,%rdx
 	ja	101632f
 
-100816:
 	cmpb	$8,%dl
-	jl	100408f
+	jg	100816f
+
+	cmpb	$4,%dl
+	jg	100408f
+
+	cmpb	$2,%dl
+	jge	100204f
+
+	cmpb	$1,%dl
+	jl	100000f
+	movzbl	(%rdi),%eax
+	movzbl	(%rsi),%r8d
+	subl	%r8d,%eax
+100000:
+	POP_FRAME_POINTER
+	ret
+
+	ALIGN_TEXT
+100816:
 	movq	(%rdi),%r8
 	movq	(%rsi),%r9
 	cmpq	%r8,%r9
@@ -130,9 +147,8 @@ ENTRY(memcmp)
 	jne	10081608f
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
 100408:
-	cmpb	$4,%dl
-	jl	100204f
 	movl	(%rdi),%r8d
 	movl	(%rsi),%r9d
 	cmpl	%r8d,%r9d
@@ -143,9 +159,8 @@ ENTRY(memcmp)
 	jne	10040804f
 	POP_FRAME_POINTER
 	ret
+	ALIGN_TEXT
 100204:
-	cmpb	$2,%dl
-	jl	100001f
 	movzwl	(%rdi),%r8d
 	movzwl	(%rsi),%r9d
 	cmpl	%r8d,%r9d
@@ -156,16 +171,7 @@ ENTRY(memcmp)
 	jne	1f
 	POP_FRAME_POINTER
 	ret
-100001:
-	cmpb	$1,%dl
-	jl	100000f
-	movzbl	(%rdi),%eax
-	movzbl	(%rsi),%r8d
-	subl	%r8d,%eax
-100000:
-	POP_FRAME_POINTER
-	ret
-ALIGN_TEXT
+	ALIGN_TEXT
 101632:
 	cmpq	$32,%rdx
 	ja	103200f
@@ -187,7 +193,7 @@ ALIGN_TEXT
 	jne	10163224f
 	POP_FRAME_POINTER
 	ret
-ALIGN_TEXT
+	ALIGN_TEXT
 103200:
 	movq	(%rdi),%r8
 	movq	8(%rdi),%r9
@@ -218,7 +224,7 @@ ALIGN_TEXT
  *
  * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes).
  */
-ALIGN_TEXT
+	ALIGN_TEXT
 10320016:
 	leaq	16(%rdi),%rdi
 	leaq	16(%rsi),%rsi
@@ -230,29 +236,29 @@ ALIGN_TEXT
 	leaq	8(%rdi),%rdi
 	leaq	8(%rsi),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10081608:
 10163224:
 	leaq	-8(%rdi,%rdx),%rdi
 	leaq	-8(%rsi,%rdx),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10163216:
 	leaq	-16(%rdi,%rdx),%rdi
 	leaq	-16(%rsi,%rdx),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10163208:
 	leaq	8(%rdi),%rdi
 	leaq	8(%rsi),%rsi
 	jmp	80f
-ALIGN_TEXT
+	ALIGN_TEXT
 10040804:
 	leaq	-4(%rdi,%rdx),%rdi
 	leaq	-4(%rsi,%rdx),%rsi
 	jmp	1f
 
-ALIGN_TEXT
+	ALIGN_TEXT
 80:
 	movl	(%rdi),%r8d
 	movl	(%rsi),%r9d