Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 26 Mar 2026 11:42:14 +0000
From:      Robert Clausecker <fuz@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 253f15c016ca - main - libc/amd64/strrchr.S: rewrite and fix scalar implementation
Message-ID:  <69c51b96.3d400.1aff0881@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by fuz:

URL: https://cgit.FreeBSD.org/src/commit/?id=253f15c016ca699906f78b8e522a3f7ed675929b

commit 253f15c016ca699906f78b8e522a3f7ed675929b
Author:     Robert Clausecker <fuz@FreeBSD.org>
AuthorDate: 2026-03-22 12:37:06 +0000
Commit:     Robert Clausecker <fuz@FreeBSD.org>
CommitDate: 2026-03-26 11:41:49 +0000

    libc/amd64/strrchr.S: rewrite and fix scalar implementation
    
    The original scalar implementation of strrchr() had incorrect
    logic that failed if the character searched for was the NUL
    character.  It was also possibly affected by the issue fixed
    in 3d8ef251a for strchrnul().
    
    Rewrite the function with logic that actually works.  We defer
    checking for the character until after we have checked for NUL.
    When we encounter the final NUL byte, we mask out the characters
    beyond the tail before checking for a match.
    
    This bug only affects users running on amd64 with ARCHLEVEL=scalar
    (cf. simd(7)).  The default configuration is not affected.
    
    The bug was unfortunately not caught by the unit test inherited
    from NetBSD.  An extended unit test catching the issue is proposed
    in D56037.
    
    PR:             293915
    Reported by:    safonov.paul@gmail.com
    Tested by:      safonov.paul@gmail.com
    Fixes:          2ed514a220edbac6ca5ec9f40a3e0b3f2804796d
    See also:       https://reviews.freebsd.org/D56037
    MFC after:      1 week
---
 lib/libc/amd64/string/strrchr.S | 78 ++++++++++++++---------------------------
 1 file changed, 26 insertions(+), 52 deletions(-)

diff --git a/lib/libc/amd64/string/strrchr.S b/lib/libc/amd64/string/strrchr.S
index e397bbcd3478..29587456752a 100644
--- a/lib/libc/amd64/string/strrchr.S
+++ b/lib/libc/amd64/string/strrchr.S
@@ -1,5 +1,6 @@
 /*-
  * Copyright (c) 2023 The FreeBSD Foundation
+ * Copyright (c) 2026 Robert Clausecker <fuz@FreeBSD.org>
  *
  * This software was developed by Robert Clausecker <fuz@FreeBSD.org>
  * under sponsorship from the FreeBSD Foundation.
@@ -65,77 +66,50 @@ ARCHENTRY(strrchr, scalar)
 	xor	%rax, %rcx		# str ^ c
 	or	%r10, %rax		# ensure str != 0 before string
 	or	%r10, %rcx		# ensure str^c != 0 before string
-	bswap	%rcx			# in reverse order, to find last match
-	mov	%rdi, %r10		# location of initial mismatch (if any)
-	xor	%r11, %r11		# initial mismatch (none)
+	xor	%r11, %r11		# vector of last match (0 -> no match)
 	add	$8, %rdi		# advance to next iteration
 	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
 	not	%rax			# ~str
 	and	%rdx, %rax		# (str - 0x01..01) & ~str
-	and	%r9, %rax		# not including junk bits
-	jnz	1f			# end of string?
-
-	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
-	not	%rcx			# ~(str ^ c)
-	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
-	and	%r9, %rcx		# not including junk bits
-	mov	%rcx, %r11		# remember mismatch in head
-	jmp	0f
-
-	/* main loop unrolled twice */
-	ALIGN_TEXT
-3:	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
-	not	%rcx			# ~(str ^ c)
-	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
-	and	%r9, %rcx		# not including junk bits
-	lea	-8(%rdi), %rdx
-	cmovnz	%rdx, %r10		# remember location of current mismatch
-	cmovnz	%rcx, %r11
-
-0:	mov	(%rdi), %rax		# str
-	mov	%rsi, %rcx
-	xor	%rax, %rcx		# str ^ c
-	bswap	%rcx			# in reverse order, to find last match
-	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
-	not	%rax			# ~str
-	and	%rdx, %rax		# (str - 0x01..01) & ~str
-	and	%r9, %rax		# not including junk bits
+	and	%r9, %rax		# NUL bytes in str, not including junk bits
 	jnz	2f			# end of string?
 
+	/* main loop */
+	ALIGN_TEXT
+3:	mov	(%rdi), %rax		# str
+	bswap	%rcx			# (str ^ c) in reverse order, to find last match
 	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
 	not	%rcx			# ~(str ^ c)
 	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
-	and	%r9, %rcx		# not including junk bits
-	cmovnz	%rdi, %r10		# remember location of current mismatch
-	cmovnz	%rcx, %r11
+	and	%r9, %rcx		# matches in str, not including junk bits
+	cmovnz	%rdi, %r11		# if match found, update match vector
+	cmovnz	%rcx, %r10		# ... and match pointer
 
-	mov	8(%rdi), %rax		# str
-	add	$16, %rdi
+	add	$8, %rdi		# advance to next iteration
 	mov	%rsi, %rcx
 	xor	%rax, %rcx		# str ^ c
-	bswap	%rcx
 	lea	(%rax, %r8, 1), %rdx	# str - 0x01..01
 	not	%rax			# ~str
 	and	%rdx, %rax		# (str - 0x01..01) & ~str
-	and	%r9, %rax		# not including junk bits
+	and	%r9, %rax		# NUL bytes in str, not including junk bits
 	jz	3b			# end of string?
 
-	/* NUL found */
-1:	sub	$8, %rdi		# undo advance past buffer
-2:	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
+	/* NUL found, check for match in tail */
+2:	mov	%rax, %rdx
+	neg	%rax
+	xor	%rdx, %rax		# all bytes behind the NUL byte
+	or	%rax, %rcx		# (str ^ c) without matches behind NUL byte
+	bswap	%rcx			# (src ^ c) in reverse order, to find last match
+	lea	(%rcx, %r8, 1), %rdx	# (str ^ c) - 0x01..01
 	not	%rcx			# ~(str ^ c)
 	and	%rdx, %rcx		# ((str ^ c - 0x01..01) & ~(str ^ c)
-	and	%r9, %rcx		# not including junk bits
-	lea	-1(%rax), %rdx
-	xor	%rdx, %rax		# mask of bytes in the string
-	bswap	%rdx			# in reverse order
-	and	%rdx, %rcx		# c found in the tail?
-	cmovnz	%rdi, %r10
-	cmovnz	%rcx, %r11
-	bswap	%r11			# unreverse byte order
-	bsr	%r11, %rcx		# last location of c in (R10)
-	shr	$3, %rcx		# as byte offset
-	lea	(%r10, %rcx, 1), %rax	# pointer to match
+	and	%r9, %rcx		# matches in str, not including junk bits
+	cmovnz	%rdi, %r11		# if match found, update match vector
+	cmovnz	%rcx, %r10		# ... and match pointer
+	tzcnt	%r11, %rcx		# location of last match
+	lea	-1(%r10), %rax		# address of last character in vector
+	shr	$3, %ecx		# as byte offset
+	sub	%rcx, %rax		# subtract character offset
 	test	%r11, %r11		# was there actually a match?
 	cmovz	%r11, %rax		# if not, return null pointer
 	ret


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69c51b96.3d400.1aff0881>