Date: Sun, 30 Nov 2025 01:45:45 +0000 From: Robert Clausecker <fuz@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Cc: Strahinja =?utf-8?Q?Stani=C5=A1?==?utf-8?Q?i=C4=87?= <strajabot@FreeBSD.org> Subject: git: 8a02704131b8 - stable/15 - libc: scalar strrchr() in RISC-V assembly Message-ID: <692ba1c9.29cae.10676243@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch stable/15 has been updated by fuz: URL: https://cgit.FreeBSD.org/src/commit/?id=8a02704131b84826f4a327097361199d9762a471 commit 8a02704131b84826f4a327097361199d9762a471 Author: Strahinja Stanišić <strajabot@FreeBSD.org> AuthorDate: 2024-10-24 16:18:07 +0000 Commit: Robert Clausecker <fuz@FreeBSD.org> CommitDate: 2025-11-30 00:43:05 +0000 libc: scalar strrchr() in RISC-V assembly Implements strrchr in RISC-V assembly, leading to the following improvements (performance measured on SiFive HF105-001) os: FreeBSD arch: riscv │ strrchr_baseline │ strrchr_scalar │ │ sec/op │ sec/op vs base │ Short 837.2µ ± 1% 574.6µ ± 1% -31.37% (p=0.000 n=20+21) Mid 639.7µ ± 0% 269.7µ ± 0% -57.84% (p=0.000 n=20+21) Long 589.1µ ± 0% 176.7µ ± 0% -70.01% (p=0.000 n=20+21) geomean 680.8µ 301.4µ -55.73% │ strrchr_baseline │ strrchr_scalar │ │ MiB/s │ MiB/s vs base │ Short 149.3 ± 1% 217.6 ± 1% +45.71% (p=0.000 n=20+21) Mid 195.4 ± 0% 463.6 ± 0% +137.22% (p=0.000 n=20+21) Long 212.2 ± 0% 707.4 ± 0% +233.40% (p=0.000 n=20+21) geomean 183.6 414.7 +125.88% MFC after: 1 month MFC to: stable/15 Approved by: mhorne, markj (mentor) Sponsored by: Google LLC (GSoC 2024) Differential Revision: https://reviews.freebsd.org/D47275 (cherry picked from commit df21a004be237a1dccd03c7b47254625eea62fa9) --- lib/libc/riscv/string/Makefile.inc | 2 + lib/libc/riscv/string/strrchr.S | 124 +++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/lib/libc/riscv/string/Makefile.inc b/lib/libc/riscv/string/Makefile.inc new file mode 100644 index 000000000000..a9cf8bf52481 --- /dev/null +++ b/lib/libc/riscv/string/Makefile.inc @@ -0,0 +1,2 @@ +MDSRCS+= \ + strrchr.S diff --git a/lib/libc/riscv/string/strrchr.S b/lib/libc/riscv/string/strrchr.S new file mode 100644 index 000000000000..51f34ca21fac --- /dev/null +++ b/lib/libc/riscv/string/strrchr.S @@ -0,0 +1,124 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2024 Strahinja Stanisic <strajabot@FreeBSD.org> + */ + +#include <machine/asm.h> + +/* + * a0 - const char *s + * a1 - int c + */ +ENTRY(strrchr) + /* + * a0 - const char *ptr_align + * a1 - temporary + * a2 - temporary + * a3 - temporary + * a4 - temporary + * a5 - const char[8] cccccccc + * a6 - const uint64_t *save_align + * a7 - const uint64_t save_iter + * t0 - const uintr64_t REP8_0X01 + * t1 - const uintr64_t REP8_0X80 + */ + + /* + * save_align = 0 + * save_iter = 0xFFFFFFFFFFFFFF00 + * REP8_0X01 = 0x0101010101010101 + * cccccccc = (char)c * REP8_0X01 + * REP8_0X80 = (REP8_0X80 << 7) << ((str % 8) * 8) + * ptr_align = str - str % 8 + */ + li t0, 0x01010101 + li a6, 0 + slli a2, a0, 3 + slli t1, t0, 32 + li a7, 0xFFFFFFFFFFFFFF00 + or t0, t0, t1 + andi a1, a1, 0xFF + slli t1, t0, 7 + andi a0, a0, ~0b111 + mul a5, a1, t0 + sll t1, t1, a2 + +.Lloop: /* do { */ + ld a1, 0(a0) /* a1 -> data = *ptr_align */ + not a3, a1 /* a3 -> nhz = ~data */ + xor a2, a1, a5 /* a2 -> iter = data ^ cccccccc */ + sub a1, a1, t0 /* a1 -> hz = data - REP8_0X01 */ + not a4, a2 /* a4 -> nhc = ~iter */ + and a1, a1, a3 /* hz = hz & nhz */ + sub a3, a2, t0 /* a3 -> hc = iter - REP8_0X01 */ + and a1, a1, t1 /* hz = hz & REP8_0X80 */ + and a3, a3, a4 /* hc = hc & nhc */ + addi a4, a1, -1 /* a4 -> mask_end = hz - 1 */ + and a3, a3, t1 /* hc = hc & REP8_0X80 */ + xor a4, a4, a1 /* mask_end = mask_end ^ hz */ + addi a0, a0, 8 /* ptr_align = ptr_align + 8 */ + and a3, a3, a4 /* hc = hc & mask_end */ + slli t1, t0, 7 /* REP8_0X80 = REP8_0X01 << 7 */ + not a4, a4 /* mask_end = ~mask_end */ + + beqz a3, .Lskip_save /* if(!hc) goto skip_save */ + or a2, a2, a4 /* iter = iter | mask_end */ + addi a6, a0, -8 /* save_align = ptr_align - 8 */ + mv a7, a2 /* save_iter = iter */ + +.Lskip_save: + beqz a1, .Lloop /* } while(!hz) */ + +.Lfind_char: + /* + * a1 -> iter = save_iter + * a2 -> mask_iter = 0xFF00000000000000 + * a3 -> match_off = 7 + */ + li a2, 0xFF + mv a1, a7 + slli a2, a2, 56 + li a3, 7 + + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + and a0, a1, a2 + srli a2, a2, 8 + beqz a0, .Lret + + addi a3, a3, -1 + +.Lret: + /* return save_align + match_offset */ + add a0, a6, a3 + ret +END(strrchr)help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?692ba1c9.29cae.10676243>
