From nobody Thu Sep  8 13:32:25 2022
X-Original-To: dev-commits-src-main@mlmmj.nyi.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1])
	by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4MNg615TySz4c795;
	Thu,  8 Sep 2022 13:32:25 +0000 (UTC)
	(envelope-from git@FreeBSD.org)
Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256
	 client-signature RSA-PSS (4096 bits) client-digest SHA256)
	(Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK))
	by mx1.freebsd.org (Postfix) with ESMTPS id 4MNg6135qJz3FR5;
	Thu,  8 Sep 2022 13:32:25 +0000 (UTC)
	(envelope-from git@FreeBSD.org)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim;
	t=1662643945;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:mime-version:mime-version:content-type:content-type:
	 content-transfer-encoding:content-transfer-encoding;
	bh=sTaumo8zZrTDBLWTdfZHPzB08rVgg8g5JPM0AxMwQX8=;
	b=DzQZ5hHkvYTjKYlX8R6pyT8atz/kE/at0+ocKyP6x5C2rCSMAsLBlEJgwnpB52ARuv1U8w
	t5DKPeXGX/NyJoHYBP3Q1OYxFukkh98WZdO9MaIvFHtJw708WntR6mkyxC3h3RJy402VK/
	NfgCR8d87lYn8ojSEi9DaBz0HyNAhs5xVUYuN3gz1uOovCCQMJt/AW/yJNZJLBAdA4tnhd
	KLOYEl6Crsd13hoE3jRSliXpEFm9przfdGpmjLOJPGcpVvWxWghd7MZgtjGvX60kDytbT4
	TK80pf9PQUAz8wpqY5vl+eVJhOFuzNo3M3OmcO/Zi9uV7NPoa7Q70ASWu9FFfw==
Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256)
	(Client did not present a certificate)
	by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4MNg612D6HztPQ;
	Thu,  8 Sep 2022 13:32:25 +0000 (UTC)
	(envelope-from git@FreeBSD.org)
Received: from gitrepo.freebsd.org ([127.0.1.44])
	by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 288DWP3h072238;
	Thu, 8 Sep 2022 13:32:25 GMT
	(envelope-from git@gitrepo.freebsd.org)
Received: (from git@localhost)
	by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 288DWPYX072237;
	Thu, 8 Sep 2022 13:32:25 GMT
	(envelope-from git)
Date: Thu, 8 Sep 2022 13:32:25 GMT
Message-Id: <202209081332.288DWPYX072237@gitrepo.freebsd.org>
To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org,
        dev-commits-src-main@FreeBSD.org
From: Andrew Turner <andrew@FreeBSD.org>
Subject: git: 51a1bf7ba7eb - main - Import an optimized arm64 memcmp into the kernel
List-Id: Commit messages for the main branch of the src repository <dev-commits-src-main.freebsd.org>
List-Archive: https://lists.freebsd.org/archives/dev-commits-src-main
List-Help: <mailto:dev-commits-src-main+help@freebsd.org>
List-Post: <mailto:dev-commits-src-main@freebsd.org>
List-Subscribe: <mailto:dev-commits-src-main+subscribe@freebsd.org>
List-Unsubscribe: <mailto:dev-commits-src-main+unsubscribe@freebsd.org>
Sender: owner-dev-commits-src-main@freebsd.org
X-BeenThere: dev-commits-src-main@freebsd.org
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
X-Git-Committer: andrew
X-Git-Repository: src
X-Git-Refname: refs/heads/main
X-Git-Reftype: branch
X-Git-Commit: 51a1bf7ba7eb79c760161a2054c113978dce38cb
Auto-Submitted: auto-generated
ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org;
	s=dkim; t=1662643945;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:mime-version:mime-version:content-type:content-type:
	 content-transfer-encoding:content-transfer-encoding;
	bh=sTaumo8zZrTDBLWTdfZHPzB08rVgg8g5JPM0AxMwQX8=;
	b=sX8QMmcU/MNhBA1CINrz6seDhT9dzx+Z9R3msMr60YbffgVHYiQt2UtupqS4p/iwqP7ki3
	ubNPkNfDjFGOa0OZfNNdNUxmzZw7rtTXtZxtNXzbvIKCj/0u5yKu0pglgwQ8ucUtPcyNvR
	dadDgKNCkIunVKN7o/8kyncS7aah766zwq/VwN4mUg54ffgYEnzqmu+hKO5+uShNXxfTB0
	cAl/1EuGTd5qLzZj5fdN/GEnoMSa8sQvBeeT3xZjrXjFOGKAVGRPEA1+ui78BLboxTf/9e
	JISQEV9q7Q87hSxKJd99iPZE1UH7ys422Z9363JUB4ydQAKjGK6DygDZ99QNQg==
ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1662643945; a=rsa-sha256; cv=none;
	b=gvKeV1ArGUhczH46g2wO+Pzzf2qfscc4p69CNZtU/OYZp9ER5tJZcdkH8FtaKfG+/RMapP
	ZjZ9AoPWZfqUnineCKj5YgytU2Q30txcu61uC3hdndVtiXPz3bbFTmcGkQaq6qbXZgoRO9
	UAG+yrGlLOD3V9r8OpLH+qkW6bl7sNHxKOjA5gJwb/OyjaxxoXiLovjoqCcdq89nYSWcsX
	xPg0YI+Wfphojl7Ek3svzZj0apPify1mbqcS/wsyfjga2bYOmTBw4yPt5bYSRqmB54/cCF
	R0hpoWPDSRl/hDUj91WyW2jCsYIt9WM/n9Ww+WyL7YbBtSrQhCScpBvLw54aTg==
ARC-Authentication-Results: i=1;
	mx1.freebsd.org;
	none
X-ThisMailContainsUnwantedMimeParts: N

The branch main has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=51a1bf7ba7eb79c760161a2054c113978dce38cb

commit 51a1bf7ba7eb79c760161a2054c113978dce38cb
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2022-09-07 11:12:30 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2022-09-08 13:29:37 +0000

    Import an optimized arm64 memcmp into the kernel
    
    Bring in a version of the Arm Optimized Routines memcpy from before
    the VFP registers were used.
    
    Imported with modification from:
    https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S
    
    Sponsored by:   The FreeBSD Foundation
---
 sys/arm64/arm64/memcmp.S | 136 +++++++++++++++++++++++++++++++++++++++++++++++
 sys/conf/files.arm64     |   3 +-
 2 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/sys/arm64/arm64/memcmp.S b/sys/arm64/arm64/memcmp.S
new file mode 100644
index 000000000000..8517a181f3f3
--- /dev/null
+++ b/sys/arm64/arm64/memcmp.S
@@ -0,0 +1,136 @@
+/* memcmp - compare memory
+ *
+ * Copyright (c) 2013-2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ */
+
+#include <machine/asm.h>
+
+#define L(l) .L ## l
+
+/* Parameters and result.  */
+#define src1		x0
+#define src2		x1
+#define limit		x2
+#define result		w0
+
+/* Internal variables.  */
+#define data1		x3
+#define data1w		w3
+#define data1h		x4
+#define data2		x5
+#define data2w		w5
+#define data2h		x6
+#define tmp1		x7
+#define tmp2		x8
+
+ENTRY (memcmp)
+	subs	limit, limit, 8
+	b.lo	L(less8)
+
+	ldr	data1, [src1], 8
+	ldr	data2, [src2], 8
+	cmp	data1, data2
+	b.ne	L(return)
+
+	subs	limit, limit, 8
+	b.gt	L(more16)
+
+	ldr	data1, [src1, limit]
+	ldr	data2, [src2, limit]
+	b	L(return)
+
+L(more16):
+	ldr	data1, [src1], 8
+	ldr	data2, [src2], 8
+	cmp	data1, data2
+	bne	L(return)
+
+	/* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+	   strings.  */
+	subs	limit, limit, 16
+	b.ls	L(last_bytes)
+
+	/* We overlap loads between 0-32 bytes at either side of SRC1 when we
+	   try to align, so limit it only to strings larger than 128 bytes.  */
+	cmp	limit, 96
+	b.ls	L(loop16)
+
+	/* Align src1 and adjust src2 with bytes not yet done.  */
+	and	tmp1, src1, 15
+	add	limit, limit, tmp1
+	sub	src1, src1, tmp1
+	sub	src2, src2, tmp1
+
+	/* Loop performing 16 bytes per iteration using aligned src1.
+	   Limit is pre-decremented by 16 and must be larger than zero.
+	   Exit if <= 16 bytes left to do or if the data is not equal.  */
+	.p2align 4
+L(loop16):
+	ldp	data1, data1h, [src1], 16
+	ldp	data2, data2h, [src2], 16
+	subs	limit, limit, 16
+	ccmp	data1, data2, 0, hi
+	ccmp	data1h, data2h, 0, eq
+	b.eq	L(loop16)
+
+	cmp	data1, data2
+	bne	L(return)
+	mov	data1, data1h
+	mov	data2, data2h
+	cmp	data1, data2
+	bne	L(return)
+
+	/* Compare last 1-16 bytes using unaligned access.  */
+L(last_bytes):
+	add	src1, src1, limit
+	add	src2, src2, limit
+	ldp	data1, data1h, [src1]
+	ldp	data2, data2h, [src2]
+	cmp     data1, data2
+	bne	L(return)
+	mov	data1, data1h
+	mov	data2, data2h
+	cmp	data1, data2
+
+	/* Compare data bytes and set return value to 0, -1 or 1.  */
+L(return):
+#ifndef __AARCH64EB__
+	rev	data1, data1
+	rev	data2, data2
+#endif
+	cmp     data1, data2
+L(ret_eq):
+	cset	result, ne
+	cneg	result, result, lo
+	ret
+
+	.p2align 4
+	/* Compare up to 8 bytes.  Limit is [-8..-1].  */
+L(less8):
+	adds	limit, limit, 4
+	b.lo	L(less4)
+	ldr	data1w, [src1], 4
+	ldr	data2w, [src2], 4
+	cmp	data1w, data2w
+	b.ne	L(return)
+	sub	limit, limit, 4
+L(less4):
+	adds	limit, limit, 4
+	beq	L(ret_eq)
+L(byte_loop):
+	ldrb	data1w, [src1], 1
+	ldrb	data2w, [src2], 1
+	subs	limit, limit, 1
+	ccmp	data1w, data2w, 0, ne	/* NZCV = 0b0000.  */
+	b.eq	L(byte_loop)
+	sub	result, data1w, data2w
+	ret
+
+END (memcmp)
+
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index a647d4e32230..d01b3f674e9a 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -9,8 +9,6 @@ kern/pic_if.m					optional intrng
 kern/subr_devmap.c				standard
 kern/subr_intr.c				optional intrng
 kern/subr_physmem.c				standard
-libkern/memcmp.c				standard	\
-	compile-with "${NORMAL_C:N-fsanitize*}"
 libkern/memset.c				standard	\
 	compile-with "${NORMAL_C:N-fsanitize*}"
 libkern/strlen.c		standard
@@ -60,6 +58,7 @@ arm64/arm64/locore.S				standard no-obj
 arm64/arm64/machdep.c				standard
 arm64/arm64/machdep_boot.c			standard
 arm64/arm64/mem.c				standard
+arm64/arm64/memcmp.S				standard
 arm64/arm64/memcpy.S				standard
 arm64/arm64/minidump_machdep.c			standard
 arm64/arm64/mp_machdep.c			optional smp