From nobody Thu Sep 8 13:32:25 2022 X-Original-To: dev-commits-src-all@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4MNg615TySz4c795; Thu, 8 Sep 2022 13:32:25 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4MNg6135qJz3FR5; Thu, 8 Sep 2022 13:32:25 +0000 (UTC) (envelope-from git@FreeBSD.org) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1662643945; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=sTaumo8zZrTDBLWTdfZHPzB08rVgg8g5JPM0AxMwQX8=; b=DzQZ5hHkvYTjKYlX8R6pyT8atz/kE/at0+ocKyP6x5C2rCSMAsLBlEJgwnpB52ARuv1U8w t5DKPeXGX/NyJoHYBP3Q1OYxFukkh98WZdO9MaIvFHtJw708WntR6mkyxC3h3RJy402VK/ NfgCR8d87lYn8ojSEi9DaBz0HyNAhs5xVUYuN3gz1uOovCCQMJt/AW/yJNZJLBAdA4tnhd KLOYEl6Crsd13hoE3jRSliXpEFm9przfdGpmjLOJPGcpVvWxWghd7MZgtjGvX60kDytbT4 TK80pf9PQUAz8wpqY5vl+eVJhOFuzNo3M3OmcO/Zi9uV7NPoa7Q70ASWu9FFfw== Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4MNg612D6HztPQ; Thu, 8 Sep 2022 13:32:25 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 288DWP3h072238; Thu, 8 Sep 2022 13:32:25 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 288DWPYX072237; Thu, 8 Sep 2022 13:32:25 GMT (envelope-from git) Date: Thu, 8 Sep 2022 13:32:25 GMT Message-Id: <202209081332.288DWPYX072237@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Andrew Turner Subject: git: 51a1bf7ba7eb - main - Import an optimized arm64 memcmp into the kernel List-Id: Commit messages for all branches of the src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-all List-Help: List-Post: List-Subscribe: List-Unsubscribe: Sender: owner-dev-commits-src-all@freebsd.org X-BeenThere: dev-commits-src-all@freebsd.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: andrew X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: 51a1bf7ba7eb79c760161a2054c113978dce38cb Auto-Submitted: auto-generated ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1662643945; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=sTaumo8zZrTDBLWTdfZHPzB08rVgg8g5JPM0AxMwQX8=; b=sX8QMmcU/MNhBA1CINrz6seDhT9dzx+Z9R3msMr60YbffgVHYiQt2UtupqS4p/iwqP7ki3 ubNPkNfDjFGOa0OZfNNdNUxmzZw7rtTXtZxtNXzbvIKCj/0u5yKu0pglgwQ8ucUtPcyNvR dadDgKNCkIunVKN7o/8kyncS7aah766zwq/VwN4mUg54ffgYEnzqmu+hKO5+uShNXxfTB0 cAl/1EuGTd5qLzZj5fdN/GEnoMSa8sQvBeeT3xZjrXjFOGKAVGRPEA1+ui78BLboxTf/9e JISQEV9q7Q87hSxKJd99iPZE1UH7ys422Z9363JUB4ydQAKjGK6DygDZ99QNQg== ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1662643945; a=rsa-sha256; cv=none; b=gvKeV1ArGUhczH46g2wO+Pzzf2qfscc4p69CNZtU/OYZp9ER5tJZcdkH8FtaKfG+/RMapP ZjZ9AoPWZfqUnineCKj5YgytU2Q30txcu61uC3hdndVtiXPz3bbFTmcGkQaq6qbXZgoRO9 UAG+yrGlLOD3V9r8OpLH+qkW6bl7sNHxKOjA5gJwb/OyjaxxoXiLovjoqCcdq89nYSWcsX xPg0YI+Wfphojl7Ek3svzZj0apPify1mbqcS/wsyfjga2bYOmTBw4yPt5bYSRqmB54/cCF R0hpoWPDSRl/hDUj91WyW2jCsYIt9WM/n9Ww+WyL7YbBtSrQhCScpBvLw54aTg== ARC-Authentication-Results: i=1; mx1.freebsd.org; none X-ThisMailContainsUnwantedMimeParts: N The branch main has been updated by andrew: URL: https://cgit.FreeBSD.org/src/commit/?id=51a1bf7ba7eb79c760161a2054c113978dce38cb commit 51a1bf7ba7eb79c760161a2054c113978dce38cb Author: Andrew Turner AuthorDate: 2022-09-07 11:12:30 +0000 Commit: Andrew Turner CommitDate: 2022-09-08 13:29:37 +0000 Import an optimized arm64 memcmp into the kernel Bring in a version of the Arm Optimized Routines memcpy from before the VFP registers were used. Imported with modification from: https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S Sponsored by: The FreeBSD Foundation --- sys/arm64/arm64/memcmp.S | 136 +++++++++++++++++++++++++++++++++++++++++++++++ sys/conf/files.arm64 | 3 +- 2 files changed, 137 insertions(+), 2 deletions(-) diff --git a/sys/arm64/arm64/memcmp.S b/sys/arm64/arm64/memcmp.S new file mode 100644 index 000000000000..8517a181f3f3 --- /dev/null +++ b/sys/arm64/arm64/memcmp.S @@ -0,0 +1,136 @@ +/* memcmp - compare memory + * + * Copyright (c) 2013-2020, Arm Limited. + * SPDX-License-Identifier: MIT + */ + +/* Assumptions: + * + * ARMv8-a, AArch64, unaligned accesses. + */ + +#include + +#define L(l) .L ## l + +/* Parameters and result. */ +#define src1 x0 +#define src2 x1 +#define limit x2 +#define result w0 + +/* Internal variables. */ +#define data1 x3 +#define data1w w3 +#define data1h x4 +#define data2 x5 +#define data2w w5 +#define data2h x6 +#define tmp1 x7 +#define tmp2 x8 + +ENTRY (memcmp) + subs limit, limit, 8 + b.lo L(less8) + + ldr data1, [src1], 8 + ldr data2, [src2], 8 + cmp data1, data2 + b.ne L(return) + + subs limit, limit, 8 + b.gt L(more16) + + ldr data1, [src1, limit] + ldr data2, [src2, limit] + b L(return) + +L(more16): + ldr data1, [src1], 8 + ldr data2, [src2], 8 + cmp data1, data2 + bne L(return) + + /* Jump directly to comparing the last 16 bytes for 32 byte (or less) + strings. */ + subs limit, limit, 16 + b.ls L(last_bytes) + + /* We overlap loads between 0-32 bytes at either side of SRC1 when we + try to align, so limit it only to strings larger than 128 bytes. */ + cmp limit, 96 + b.ls L(loop16) + + /* Align src1 and adjust src2 with bytes not yet done. */ + and tmp1, src1, 15 + add limit, limit, tmp1 + sub src1, src1, tmp1 + sub src2, src2, tmp1 + + /* Loop performing 16 bytes per iteration using aligned src1. + Limit is pre-decremented by 16 and must be larger than zero. + Exit if <= 16 bytes left to do or if the data is not equal. */ + .p2align 4 +L(loop16): + ldp data1, data1h, [src1], 16 + ldp data2, data2h, [src2], 16 + subs limit, limit, 16 + ccmp data1, data2, 0, hi + ccmp data1h, data2h, 0, eq + b.eq L(loop16) + + cmp data1, data2 + bne L(return) + mov data1, data1h + mov data2, data2h + cmp data1, data2 + bne L(return) + + /* Compare last 1-16 bytes using unaligned access. */ +L(last_bytes): + add src1, src1, limit + add src2, src2, limit + ldp data1, data1h, [src1] + ldp data2, data2h, [src2] + cmp data1, data2 + bne L(return) + mov data1, data1h + mov data2, data2h + cmp data1, data2 + + /* Compare data bytes and set return value to 0, -1 or 1. */ +L(return): +#ifndef __AARCH64EB__ + rev data1, data1 + rev data2, data2 +#endif + cmp data1, data2 +L(ret_eq): + cset result, ne + cneg result, result, lo + ret + + .p2align 4 + /* Compare up to 8 bytes. Limit is [-8..-1]. */ +L(less8): + adds limit, limit, 4 + b.lo L(less4) + ldr data1w, [src1], 4 + ldr data2w, [src2], 4 + cmp data1w, data2w + b.ne L(return) + sub limit, limit, 4 +L(less4): + adds limit, limit, 4 + beq L(ret_eq) +L(byte_loop): + ldrb data1w, [src1], 1 + ldrb data2w, [src2], 1 + subs limit, limit, 1 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ + b.eq L(byte_loop) + sub result, data1w, data2w + ret + +END (memcmp) + diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64 index a647d4e32230..d01b3f674e9a 100644 --- a/sys/conf/files.arm64 +++ b/sys/conf/files.arm64 @@ -9,8 +9,6 @@ kern/pic_if.m optional intrng kern/subr_devmap.c standard kern/subr_intr.c optional intrng kern/subr_physmem.c standard -libkern/memcmp.c standard \ - compile-with "${NORMAL_C:N-fsanitize*}" libkern/memset.c standard \ compile-with "${NORMAL_C:N-fsanitize*}" libkern/strlen.c standard @@ -60,6 +58,7 @@ arm64/arm64/locore.S standard no-obj arm64/arm64/machdep.c standard arm64/arm64/machdep_boot.c standard arm64/arm64/mem.c standard +arm64/arm64/memcmp.S standard arm64/arm64/memcpy.S standard arm64/arm64/minidump_machdep.c standard arm64/arm64/mp_machdep.c optional smp