Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 6 Jul 2023 17:48:31 GMT
From:      Mitchell Horne <mhorne@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: a89262079edb - main - Consistently provide ffs/fls using builtins
Message-ID:  <202307061748.366HmVsO015343@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by mhorne:

URL: https://cgit.FreeBSD.org/src/commit/?id=a89262079edba80ba6133e81af1e14ab7386f53a

commit a89262079edba80ba6133e81af1e14ab7386f53a
Author:     Mitchell Horne <mhorne@FreeBSD.org>
AuthorDate: 2023-07-06 17:45:39 +0000
Commit:     Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2023-07-06 17:46:41 +0000

    Consistently provide ffs/fls using builtins
    
    Use of compiler builtin ffs/ctz functions will result in optimized
    instruction sequences when possible, and fall back to calling a function
    provided by the compiler run-time library. We have slowly shifted our
    platforms to take advantage of these builtins in 60645781d613 (arm64),
    1c76d3a9fbef (arm), 9e319462a03a (powerpc, partial).
    
    Some platforms still rely on the libkern implementations of these
    functions provided by libkern, namely riscv, powerpc (ffs*, flsll), and
    i386 (ffsll and flsll). These routines are slow, as they perform a
    linear search for the bit in question. Even on platforms lacking
    dedicated bit-search instructions, such as riscv, the compiler library
    will provide better-optimized routines, e.g. by using binary search.
    
    Consolidate all definitions of these functions (whether currently using
    builtins or not) to libkern.h. This should result in equivalent or
    better performing routines in all cases.
    
    One wart in all of this is the existing HAVE_INLINE_F*** macros, which
    we use in a few places to conditionally avoid the slow libkern routines.
    These aren't easily removed in one commit. For now, provide these
    defines unconditionally, but marked for removal after subsequent
    cleanup.
    
    Removal of the now unused libkern routines will follow in the next
    commit.
    
    Reviewed by:    dougm, imp (previous version)
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D40698
---
 sys/amd64/include/cpufunc.h   | 37 ---------------------
 sys/arm/include/cpufunc.h     | 57 --------------------------------
 sys/arm64/include/cpufunc.h   | 58 ---------------------------------
 sys/conf/files.i386           |  2 --
 sys/conf/files.powerpc        |  4 ---
 sys/conf/files.riscv          |  6 ----
 sys/i386/include/cpufunc.h    | 42 ------------------------
 sys/powerpc/include/cpufunc.h | 14 --------
 sys/sys/libkern.h             | 75 ++++++++++++++++++++++++++++++++-----------
 9 files changed, 57 insertions(+), 238 deletions(-)

diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h
index 09f63b865948..5f252336e7b8 100644
--- a/sys/amd64/include/cpufunc.h
+++ b/sys/amd64/include/cpufunc.h
@@ -127,43 +127,6 @@ enable_intr(void)
 	__asm __volatile("sti");
 }
 
-#ifdef _KERNEL
-
-#define	HAVE_INLINE_FFS
-#define	ffs(x)		__builtin_ffs(x)
-
-#define	HAVE_INLINE_FFSL
-#define	ffsl(x)		__builtin_ffsl(x)
-
-#define	HAVE_INLINE_FFSLL
-#define	ffsll(x)	__builtin_ffsll(x)
-
-#define	HAVE_INLINE_FLS
-
-static __inline __pure2 int
-fls(int mask)
-{
-	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
-}
-
-#define	HAVE_INLINE_FLSL
-
-static __inline __pure2 int
-flsl(long mask)
-{
-	return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1);
-}
-
-#define	HAVE_INLINE_FLSLL
-
-static __inline __pure2 int
-flsll(long long mask)
-{
-	return (flsl((long)mask));
-}
-
-#endif /* _KERNEL */
-
 static __inline void
 halt(void)
 {
diff --git a/sys/arm/include/cpufunc.h b/sys/arm/include/cpufunc.h
index a5db3486aff1..0df52a5e7cf3 100644
--- a/sys/arm/include/cpufunc.h
+++ b/sys/arm/include/cpufunc.h
@@ -183,63 +183,6 @@ void cpu_reset		(void) __attribute__((__noreturn__));
 extern int	arm_dcache_align;
 extern int	arm_dcache_align_mask;
 
-
-#define	HAVE_INLINE_FFS
-
-static __inline __pure2 int
-ffs(int mask)
-{
-
-	return (__builtin_ffs(mask));
-}
-
-#define	HAVE_INLINE_FFSL
-
-static __inline __pure2 int
-ffsl(long mask)
-{
-
-	return (__builtin_ffsl(mask));
-}
-
-#define	HAVE_INLINE_FFSLL
-
-static __inline __pure2 int
-ffsll(long long mask)
-{
-
-	return (__builtin_ffsll(mask));
-}
-
-#define	HAVE_INLINE_FLS
-
-static __inline __pure2 int
-fls(int mask)
-{
-
-	return (mask == 0 ? 0 :
-	    8 * sizeof(mask) - __builtin_clz((u_int)mask));
-}
-
-#define	HAVE_INLINE_FLSL
-
-static __inline __pure2 int
-flsl(long mask)
-{
-
-	return (mask == 0 ? 0 :
-	    8 * sizeof(mask) - __builtin_clzl((u_long)mask));
-}
-
-#define	HAVE_INLINE_FLSLL
-
-static __inline __pure2 int
-flsll(long long mask)
-{
-
-	return (mask == 0 ? 0 :
-	    8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
-}
 #else	/* !_KERNEL */
 
 static __inline void
diff --git a/sys/arm64/include/cpufunc.h b/sys/arm64/include/cpufunc.h
index 691474bac90e..f2de8a0e560c 100644
--- a/sys/arm64/include/cpufunc.h
+++ b/sys/arm64/include/cpufunc.h
@@ -37,64 +37,6 @@ breakpoint(void)
 }
 
 #ifdef _KERNEL
-
-#define	HAVE_INLINE_FFS
-
-static __inline __pure2 int
-ffs(int mask)
-{
-
-	return (__builtin_ffs(mask));
-}
-
-#define	HAVE_INLINE_FFSL
-
-static __inline __pure2 int
-ffsl(long mask)
-{
-
-	return (__builtin_ffsl(mask));
-}
-
-#define	HAVE_INLINE_FFSLL
-
-static __inline __pure2 int
-ffsll(long long mask)
-{
-
-	return (__builtin_ffsll(mask));
-}
-
-#define	HAVE_INLINE_FLS
-
-static __inline __pure2 int
-fls(int mask)
-{
-
-	return (mask == 0 ? 0 :
-	    8 * sizeof(mask) - __builtin_clz((u_int)mask));
-}
-
-#define	HAVE_INLINE_FLSL
-
-static __inline __pure2 int
-flsl(long mask)
-{
-
-	return (mask == 0 ? 0 :
-	    8 * sizeof(mask) - __builtin_clzl((u_long)mask));
-}
-
-#define	HAVE_INLINE_FLSLL
-
-static __inline __pure2 int
-flsll(long long mask)
-{
-
-	return (mask == 0 ? 0 :
-	    8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
-}
-
 #include <machine/armreg.h>
 
 void pan_enable(void);
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index cd1aeb4850b7..1d4567824b1b 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -141,8 +141,6 @@ kern/imgact_aout.c		optional compat_aout
 kern/subr_sfbuf.c		standard
 libkern/divdi3.c		standard
 libkern/divmoddi4.c		standard
-libkern/ffsll.c			standard
-libkern/flsll.c			standard
 libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/moddi3.c		standard
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index 4a88e66800ae..927c27416d88 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -189,10 +189,6 @@ libkern/ashrdi3.c		optional	powerpc | powerpcspe
 libkern/bcopy.c			standard
 libkern/cmpdi2.c		optional	powerpc | powerpcspe
 libkern/divdi3.c		optional	powerpc | powerpcspe
-libkern/ffs.c			standard
-libkern/ffsl.c			standard
-libkern/ffsll.c			standard
-libkern/flsll.c			standard
 libkern/lshrdi3.c		optional	powerpc | powerpcspe
 libkern/memcmp.c		standard
 libkern/memset.c		standard
diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv
index 192a309c989e..aadeb2b0917a 100644
--- a/sys/conf/files.riscv
+++ b/sys/conf/files.riscv
@@ -23,12 +23,6 @@ kern/subr_dummy_vdso_tc.c	standard
 kern/subr_intr.c		standard
 kern/subr_physmem.c		standard
 libkern/bcopy.c			standard
-libkern/ffs.c			standard
-libkern/ffsl.c			standard
-libkern/ffsll.c			standard
-libkern/fls.c			standard
-libkern/flsl.c			standard
-libkern/flsll.c			standard
 libkern/memcmp.c		standard
 libkern/memset.c		standard
 libkern/strcmp.c		standard
diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h
index 2b517b12fba0..e107816afced 100644
--- a/sys/i386/include/cpufunc.h
+++ b/sys/i386/include/cpufunc.h
@@ -180,48 +180,6 @@ sfence(void)
 	__asm __volatile("sfence" : : : "memory");
 }
 
-#ifdef _KERNEL
-
-#define	HAVE_INLINE_FFS
-
-static __inline __pure2 int
-ffs(int mask)
-{
-	/*
-	 * Note that gcc-2's builtin ffs would be used if we didn't declare
-	 * this inline or turn off the builtin.  The builtin is faster but
-	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later
-	 * versions.
-	 */
-	 return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1);
-}
-
-#define	HAVE_INLINE_FFSL
-
-static __inline __pure2 int
-ffsl(long mask)
-{
-	return (ffs((int)mask));
-}
-
-#define	HAVE_INLINE_FLS
-
-static __inline __pure2 int
-fls(int mask)
-{
-	return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1);
-}
-
-#define	HAVE_INLINE_FLSL
-
-static __inline __pure2 int
-flsl(long mask)
-{
-	return (fls((int)mask));
-}
-
-#endif /* _KERNEL */
-
 static __inline void
 halt(void)
 {
diff --git a/sys/powerpc/include/cpufunc.h b/sys/powerpc/include/cpufunc.h
index d04efb63288b..c5148b009246 100644
--- a/sys/powerpc/include/cpufunc.h
+++ b/sys/powerpc/include/cpufunc.h
@@ -258,20 +258,6 @@ get_pcpu(void)
 	return (ret);
 }
 
-#define	HAVE_INLINE_FLS
-static __inline __pure2 int
-fls(int mask)
-{
-	return (mask ? 32 - __builtin_clz(mask) : 0);
-}
-
-#define HAVE_INLINE_FLSL
-static __inline __pure2 int
-flsl(long mask)
-{
-	return (mask ? (8 * sizeof(long) - __builtin_clzl(mask)) : 0);
-}
-
 /* "NOP" operations to signify priorities to the kernel. */
 static __inline void
 nop_prio_vlow(void)
diff --git a/sys/sys/libkern.h b/sys/sys/libkern.h
index 8adeeb499984..83522ce7ddbf 100644
--- a/sys/sys/libkern.h
+++ b/sys/sys/libkern.h
@@ -132,24 +132,63 @@ void	 arc4rand(void *, u_int, int);
 int	 timingsafe_bcmp(const void *, const void *, size_t);
 void	*bsearch(const void *, const void *, size_t,
 	    size_t, int (*)(const void *, const void *));
-#ifndef	HAVE_INLINE_FFS
-int	 ffs(int);
-#endif
-#ifndef	HAVE_INLINE_FFSL
-int	 ffsl(long);
-#endif
-#ifndef	HAVE_INLINE_FFSLL
-int	 ffsll(long long);
-#endif
-#ifndef	HAVE_INLINE_FLS
-int	 fls(int);
-#endif
-#ifndef	HAVE_INLINE_FLSL
-int	 flsl(long);
-#endif
-#ifndef	HAVE_INLINE_FLSLL
-int	 flsll(long long);
-#endif
+
+/*
+ * MHTODO: remove the 'HAVE_INLINE_FOO' defines once use of these flags has
+ * been purged everywhere. For now we provide them unconditionally.
+ */
+#define	HAVE_INLINE_FFS
+#define	HAVE_INLINE_FFSL
+#define	HAVE_INLINE_FFSLL
+#define	HAVE_INLINE_FLS
+#define	HAVE_INLINE_FLSL
+#define	HAVE_INLINE_FLSLL
+
+static __inline __pure2 int
+ffs(int mask)
+{
+
+	return (__builtin_ffs((u_int)mask));
+}
+
+static __inline __pure2 int
+ffsl(long mask)
+{
+
+	return (__builtin_ffsl((u_long)mask));
+}
+
+static __inline __pure2 int
+ffsll(long long mask)
+{
+
+	return (__builtin_ffsll((unsigned long long)mask));
+}
+
+static __inline __pure2 int
+fls(int mask)
+{
+
+	return (mask == 0 ? 0 :
+	    8 * sizeof(mask) - __builtin_clz((u_int)mask));
+}
+
+static __inline __pure2 int
+flsl(long mask)
+{
+
+	return (mask == 0 ? 0 :
+	    8 * sizeof(mask) - __builtin_clzl((u_long)mask));
+}
+
+static __inline __pure2 int
+flsll(long long mask)
+{
+
+	return (mask == 0 ? 0 :
+	    8 * sizeof(mask) - __builtin_clzll((unsigned long long)mask));
+}
+
 #define	bitcount64(x)	__bitcount64((uint64_t)(x))
 #define	bitcount32(x)	__bitcount32((uint32_t)(x))
 #define	bitcount16(x)	__bitcount16((uint16_t)(x))



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202307061748.366HmVsO015343>