From nobody Sat Aug 9 20:14:10 2025 X-Original-To: dev-commits-src-main@mlmmj.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mlmmj.nyi.freebsd.org (Postfix) with ESMTP id 4bzsYb1HFGz64JF5; Sat, 09 Aug 2025 20:14:11 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R10" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4bzsYZ6lbdz4Ql5; Sat, 09 Aug 2025 20:14:10 +0000 (UTC) (envelope-from git@FreeBSD.org) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1754770451; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=LiLRV6AnOeoM8o1g60Xieg165nVZOTthJiZjlOvv6R0=; b=JGKDJks2rEzkUiI1OpET5uIzGBLPaavK38LGVwZfORxnYEOOY78LXgi4wPzFGo3XikRIM5 JPAmKBawMBxmgpfkyKqMwHVADM7DzbZtCUzJ5zlCe2beSEFV96RPblDvU8AqLWzhpwtMFP oahJdozGuBhfchDU7neDCx8mhq8iBz0ffJrTZbfq/6Muvvwz+W6QWq425PMW+VR6XMIr7J dj/YHmSqE4GGpfg0vqtLZ56gyJL0rGN7gQFddJKh7uRkv2sAtxPPNZbvuEt+CFJiwNw5MS 9/UGS3Y2p44bsHdDSNn6IYMPlftXC2N8LUsX2wEG4dW8i6tVWAKUkQjjhe8sxA== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=freebsd.org; s=dkim; t=1754770451; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding; bh=LiLRV6AnOeoM8o1g60Xieg165nVZOTthJiZjlOvv6R0=; b=blZ4TbIW1+ZDK+Tw+t0OUrpsn7eGfasjQlhkdFGgZcsvr75/RCOB11TUiBS/AfJfWhsaLD 9c+Ji1IKvFOWsRVJ+77GXqlh7ZjZoRCud1dYalPbX6DdXfJdISwH85ZEy3yLU3suwNlRgW z+BFkaGPPKQXrUsMkw6hhp3e2lfn3Fn9lpIJCRpHOKHkw4AETYOK+UtCUFsksF+kvSAnH0 SJXQ9TcEBOXXEsyYn2bKDxf50EouXMhce90RFHB+WV+crvp1A7KpCxb/a59Tt7Io7VB6mV LOzI3zzPA5Eit4pWaZC6bBDgkW+882OMupTf9OUOVBx/rPOBsXSHwgXUtC6KxQ== ARC-Authentication-Results: i=1; mx1.freebsd.org; none ARC-Seal: i=1; s=dkim; d=freebsd.org; t=1754770451; a=rsa-sha256; cv=none; b=WUXVz03A0IwmyQuBFddZNPTxz5PzY2/tvpwUCMs+oGPR9IklxVLgMR7DgtEGIWsATZ5oQo OrWUV8ZyRSdPvTz9wVNItDYuonSIl/ZBASLUULkmHx+pXr4285lo5pDsAaex1+WSxY+Ldx rlaRYD5LiSyNo/81QOaWZytwPQuo6KpXSKQ+wc7wlTNlxcn8TUNYJXL//PZEvkaKujhY/x YBhWUnRq8sa+43/wqtcugpOEfaS6Jd2+2pcRsnEAsjpmb4Cv0DKGMz15OXCcbhE6ZICjoh BO5u1G3U18Y1Kw/E3yeNfFOAdzGk/6LvHuVhY6wmc26xaFt/uNwPz8Dc3Xqu3w== Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4bzsYZ5z9kzcQp; Sat, 09 Aug 2025 20:14:10 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.18.1/8.18.1) with ESMTP id 579KEAbl086163; Sat, 9 Aug 2025 20:14:10 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.18.1/8.18.1/Submit) id 579KEA7b086160; Sat, 9 Aug 2025 20:14:10 GMT (envelope-from git) Date: Sat, 9 Aug 2025 20:14:10 GMT Message-Id: <202508092014.579KEA7b086160@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Robert Clausecker Subject: git: 4b15965daa99 - main - libc/amd64: rewrite memrchr() baseline impl. to read the string from the back List-Id: Commit messages for the main branch of the src repository List-Archive: https://lists.freebsd.org/archives/dev-commits-src-main List-Help: List-Post: List-Subscribe: List-Unsubscribe: X-BeenThere: dev-commits-src-main@freebsd.org Sender: owner-dev-commits-src-main@FreeBSD.org MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: fuz X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: 4b15965daa99044daf184221b7c283bf7f2d7e66 Auto-Submitted: auto-generated The branch main has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=4b15965daa99044daf184221b7c283bf7f2d7e66 commit 4b15965daa99044daf184221b7c283bf7f2d7e66 Author: Robert Clausecker AuthorDate: 2025-07-29 20:12:32 +0000 Commit: Robert Clausecker CommitDate: 2025-08-09 20:13:27 +0000 libc/amd64: rewrite memrchr() baseline impl. to read the string from the back This ensures O(1) behaviour if the character is a constant offset from the end of the string, regardless of how long the string is. Reported by: Mikael Simonsson Reviewed by: benni PR: 288321 MFC after: 1 month --- lib/libc/amd64/string/memrchr.S | 152 +++++++++++++++++++--------------------- 1 file changed, 74 insertions(+), 78 deletions(-) diff --git a/lib/libc/amd64/string/memrchr.S b/lib/libc/amd64/string/memrchr.S index 4f6c5a238daa..f1ba48d6bb41 100644 --- a/lib/libc/amd64/string/memrchr.S +++ b/lib/libc/amd64/string/memrchr.S @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2023 Robert Clausecker + * Copyright (c) 2023, 2025 Robert Clausecker */ #include @@ -71,95 +71,91 @@ ARCHENTRY(memrchr, scalar) ARCHEND(memrchr, scalar) ARCHENTRY(memrchr, baseline) - movd %esi, %xmm4 - test %rdx, %rdx # empty buffer? - jz .L0 # if yes, return immediately + test %rdx, %rdx # empty input? + je .Lnomatchb + + + lea (%rdi, %rdx, 1), %ecx # pointer to end of buffer + lea -1(%rdi, %rdx, 1), %rdx # pointer to last char in buffer + movd %esi, %xmm2 + and $~0x1f, %rdx # pointer to final 32 buffer bytes + movdqa (%rdx), %xmm0 # load last 32 bytes + movdqa 16(%rdx), %xmm1 + + punpcklbw %xmm2, %xmm2 # c -> cc - punpcklbw %xmm4, %xmm4 # c -> cc - mov %edi, %ecx - punpcklwd %xmm4, %xmm4 # cc -> cccc - and $~0xf, %rdi # align source pointer - pshufd $0, %xmm4, %xmm4 # cccc -> cccccccccccccccc - and $0xf, %ecx - movdqa %xmm4, %xmm0 mov $-1, %r8d - pcmpeqb (%rdi), %xmm0 # compare aligned head - shl %cl, %r8d # mask of bytes in the head of the buffer - pmovmskb %xmm0, %eax + neg %ecx + mov %r8d, %r9d + shr %cl, %r8d # mask with zeroes after the string - sub $16, %rcx - and %r8d, %eax # match mask - add %rcx, %rdx # advance past head - cmc - jbe .Lrunt # did the string end in the buffer? + punpcklwd %xmm2, %xmm2 # cc -> cccc - mov %rdi, %rsi # pointer to matching chunk - add $16, %rdi - sub $16, %rdx # enough left for another round? - jbe 1f + mov %edi, %ecx + mov %r9d, %eax + shl %cl, %r9d # mask with zeroes before the string - /* main loop unrolled twice */ - ALIGN_TEXT -0: movdqa %xmm4, %xmm0 - pcmpeqb (%rdi), %xmm0 - pmovmskb %xmm0, %r8d + pshufd $0, %xmm2, %xmm2 # cccc -> cccccccccccccccc - cmp $16, %rdx # enough left for second chunk? - jbe 2f + cmp %rdx, %rdi # tail is beginning of buffer? + cmovae %r9d, %eax # if yes, do combined head/tail processing + and %r8d, %eax # mak of bytes in tail part of string - movdqa %xmm4, %xmm0 - pcmpeqb 16(%rdi), %xmm0 + /* process tail */ + pcmpeqb %xmm2, %xmm1 + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm1, %esi pmovmskb %xmm0, %ecx + shl $16, %esi + or %esi, %ecx # locations of matches + and %ecx, %eax # any match inside buffer? + jnz .Lprecisematchb - lea 16(%rdi), %r9 - test %ecx, %ecx # match found in second chunk? - cmovz %r8d, %ecx # if not, use match data from first chunk - cmovz %rdi, %r9 - - test %ecx, %ecx # any match found? - cmovnz %ecx, %eax # if yes, overwrite previously found match - cmovnz %r9, %rsi - - add $32, %rdi # advance to next iteration - sub $32, %rdx # advance to next chunks - ja 0b - - /* process remaining 1--16 bytes */ -1: pcmpeqb (%rdi), %xmm4 - mov $0xffff, %r8d - xor %ecx, %ecx - sub %edx, %ecx # number of bytes to be masked out - pmovmskb %xmm4, %r9d - shr %cl, %r8d # mask of bytes to be kept in the buffer - and %r9d, %r8d - cmovnz %r8d, %eax - cmovnz %rdi, %rsi - bsr %eax, %eax - lea (%rsi, %rax, 1), %rsi # pointer to match (or junk) - cmovnz %rsi, %rax # if any match was found, return it - ret + cmp %rdx, %rdi # did the buffer begin here? + jae .Lnomatchb # if yes, we are done - /* end of chunk reached within first half iteration */ -2: test %r8d, %r8d # match in previous chunk? - cmovnz %r8d, %eax # if yes, overwrite previous chunks - cmovnz %rdi, %rsi - add $16, %rdi # point to tail - sub $16, %edx - jmp 1b # handle tail the same otherwise - - /* runt: string ends within head, edx has negated amount of invalid head bytes */ -.Lrunt: mov $0xffff, %r8d - xor %ecx, %ecx - sub %edx, %ecx - shr %cl, %r8d - and %r8d, %eax - bsr %eax, %eax - lea (%rdi, %rax, 1), %rdi - cmovnz %rdi, %rax + /* main loop */ + ALIGN_TEXT +0: movdqa -32(%rdx), %xmm0 # load previous string chunk + movdqa -16(%rdx), %xmm1 + sub $32, %rdx # beginning of string reached? + cmp %rdx, %rdi + jae .Ltailb + + pcmpeqb %xmm2, %xmm0 + pcmpeqb %xmm2, %xmm1 + por %xmm1, %xmm0 # match in either half? + pmovmskb %xmm0, %eax + test %eax, %eax + jz 0b + +.Lmatchb: + pcmpeqb (%rdx), %xmm2 # redo comparison of first 16 bytes + pmovmskb %xmm1, %ecx + pmovmskb %xmm2, %eax + shl $16, %ecx + or %ecx, %eax # location of matches + +.Lprecisematchb: + bsr %eax, %eax # find location of match + add %rdx, %rax # point to matching byte ret - /* empty buffer: return a null pointer */ -.L0: xor %eax, %eax +.Ltailb: + pcmpeqb %xmm2, %xmm1 + pcmpeqb %xmm2, %xmm0 + pmovmskb %xmm1, %ecx + pmovmskb %xmm0, %eax + shl $16, %ecx + or %ecx, %eax # location of matches + and %r9d, %eax # mask out matches before buffer + bsr %eax, %edi # location of match + lea (%rdx, %rdi, 1), %rdx # pointer to match (if any) + cmovnz %rdx, %rax # point to match if present, + ret # else null pointer + +.Lnomatchb: + xor %eax, %eax # return null pointer ret ARCHEND(memrchr, baseline)