Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 3 Nov 2012 17:22:12 +1100 (EST)
From:      Peter Jeremy <peter@rulingia.com>
To:        FreeBSD-gnats-submit@FreeBSD.org
Subject:   kern/173322: [patch] Inline atomic operations in modules
Message-ID:  <201211030622.qA36MC6U070602@server.rulingia.com>
Resent-Message-ID: <201211030640.qA36e1QB029480@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         173322
>Category:       kern
>Synopsis:       [patch] Inline atomic operations in modules
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          change-request
>Submitter-Id:   current-users
>Arrival-Date:   Sat Nov 03 06:40:00 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Peter Jeremy
>Release:        FreeBSD 8.3-STABLE amd64
>Organization:
FreeBSD
>Environment:
System: FreeBSD server.rulingia.com 8.3-STABLE FreeBSD 8.3-STABLE #18 r237444M: Sun Jul 8 10:47:08 EST 2012 root@server.rulingia.com:/var/obj/usr/src/sys/server amd64

>Description:
	In r49999, atomic operations on x86 were changed so that they
	were not inlined within kernel modules but instead generated calls
	to kernel functions.  This allowed kernel modules to run on both
	UP and SMP systems without incurring the overhead of unnecessary
	LOCK instructions, though at the expense of having call/return
	overheads.

	At the time, this seemed a reasonable tradeoff - SMP systems
	were not common and LOCK prefixes were extremely slow on some
	CPUs.

	SMP systems are now far more common - probably more common than
	UP systems on the desktop or in servers.  Whilst LOCK prefixes
	are still slow, the impact of LOCK prefixes is probably lower
	now than a decade ago.

	Overall, it would seem that the gains from inlining the atomic
	operations (saving the call/return overheads) outweigh the
	cost of unnecessary LOCK prefixes on UP systems (though this
	still needs to be measured).

	Note that all architectures except for x86 always inline
	atomic operations and do not provide callable versions.

>How-To-Repeat:
	Code inspection.

>Fix:
	This patch changes i386 and amd64 atomic.h to provide inline
	versions of atomic operations when invoked to build KLDs.
	It retains the provision to create non-inlined versions of
	the atomic operations to support existing KLDs.  (My suggestion
	is that, if this change is worthwhile, this change be MFC'd
	then the head code be updated to remove the non-inlined
	atomic operations and that change not be MFC'd - which would
	align x86 with all other architectures).  Note that this patch
	has been compile tested (and the resultant object code quickly
	sanity checked), it has not been tested yet.

Index: head/sys/amd64/amd64/atomic.c
===================================================================
--- head/sys/amd64/amd64/atomic.c	(revision 242498)
+++ head/sys/amd64/amd64/atomic.c	(working copy)
@@ -33,11 +33,11 @@
  */
 #include <sys/types.h>
 
-/* Firstly make atomic.h generate prototypes as it will for kernel modules */
-#define KLD_MODULE
+/* Firstly make atomic.h generate prototypes */
+#define WANT_PROTOTYPES
 #include <machine/atomic.h>
 #undef _MACHINE_ATOMIC_H_	/* forget we included it */
-#undef KLD_MODULE
+#undef WANT_PROTOTYPES
 #undef ATOMIC_ASM
 
 /* Make atomic.h generate public functions */
Index: head/sys/amd64/include/atomic.h
===================================================================
--- head/sys/amd64/include/atomic.h	(revision 242498)
+++ head/sys/amd64/include/atomic.h	(working copy)
@@ -64,14 +64,13 @@
  */
 
 /*
- * The above functions are expanded inline in the statically-linked
- * kernel.  Lock prefixes are generated if an SMP kernel is being
- * built.
- *
- * Kernel modules call real functions which are built into the kernel.
- * This allows kernel modules to be portable between UP and SMP systems.
+ * The above functions are all expanded inline if the compiler
+ * supports the gcc extensions to asm().  Lock prefixes are generated
+ * except for UP kernels (ie kernel modules and userland references
+ * always have lock prefixes).  This allows kernel modules and
+ * userland code to be portable between UP and SMP systems.
  */
-#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
+#if defined(WANT_PROTOTYPES) || !defined(__GNUCLIKE_ASM)
 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
@@ -86,13 +85,13 @@
 #define	ATOMIC_STORE(TYPE)					\
 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
-#else /* !KLD_MODULE && __GNUCLIKE_ASM */
+#else /* !WANT_PROTOTYPES && __GNUCLIKE_ASM */
 
 /*
- * For userland, always use lock prefixes so that the binaries will run
- * on both SMP and !SMP systems.
+ * For userland and kernel modules, always use lock prefixes so that
+ * the binaries will run on both SMP and !SMP systems.
  */
-#if defined(SMP) || !defined(_KERNEL)
+#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
 #define	MPLOCKED	"lock ; "
 #else
 #define	MPLOCKED
@@ -231,7 +230,7 @@
 }							\
 struct __hack
 
-#if defined(_KERNEL) && !defined(SMP)
+#if defined(_KERNEL) && !defined(SMP) && !defined(KLD_MODULE)
 
 #define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
@@ -245,7 +244,7 @@
 }							\
 struct __hack
 
-#else /* !(_KERNEL && !SMP) */
+#else /* !(_KERNEL && !SMP && !KLD_MODULE) */
 
 #define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
@@ -263,9 +262,9 @@
 }							\
 struct __hack
 
-#endif /* _KERNEL && !SMP */
+#endif /* _KERNEL && !SMP && !KLD_MODULE */
 
-#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
+#endif /* WANT_PROTOTYPES || !__GNUCLIKE_ASM */
 
 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
Index: head/sys/i386/i386/atomic.c
===================================================================
--- head/sys/i386/i386/atomic.c	(revision 242498)
+++ head/sys/i386/i386/atomic.c	(working copy)
@@ -33,11 +33,11 @@
  */
 #include <sys/types.h>
 
-/* Firstly make atomic.h generate prototypes as it will for kernel modules */
-#define KLD_MODULE
+/* Firstly make atomic.h generate prototypes */
+#define WANT_PROTOTYPES
 #include <machine/atomic.h>
 #undef _MACHINE_ATOMIC_H_	/* forget we included it */
-#undef KLD_MODULE
+#undef WANT_PROTOTYPES
 #undef ATOMIC_ASM
 
 /* Make atomic.h generate public functions */
Index: head/sys/i386/include/atomic.h
===================================================================
--- head/sys/i386/include/atomic.h	(revision 242498)
+++ head/sys/i386/include/atomic.h	(working copy)
@@ -64,14 +64,13 @@
  */
 
 /*
- * The above functions are expanded inline in the statically-linked
- * kernel.  Lock prefixes are generated if an SMP kernel is being
- * built.
- *
- * Kernel modules call real functions which are built into the kernel.
- * This allows kernel modules to be portable between UP and SMP systems.
+ * The above functions are all expanded inline if the compiler
+ * supports the gcc extensions to asm().  Lock prefixes are generated
+ * except for UP kernels (ie kernel modules and userland references
+ * always have lock prefixes).  This allows kernel modules and
+ * userland code to be portable between UP and SMP systems.
  */
-#if defined(KLD_MODULE) || !defined(__GNUCLIKE_ASM)
+#if defined(WANT_PROTOTYPES) || !defined(__GNUCLIKE_ASM)
 #define	ATOMIC_ASM(NAME, TYPE, OP, CONS, V)			\
 void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v);	\
 void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
@@ -84,13 +83,13 @@
 #define	ATOMIC_STORE(TYPE)					\
 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
-#else /* !KLD_MODULE && __GNUCLIKE_ASM */
+#else /* !WANT_PROTOTYPES && __GNUCLIKE_ASM */
 
 /*
- * For userland, always use lock prefixes so that the binaries will run
- * on both SMP and !SMP systems.
+ * For userland and kernel modules, always use lock prefixes so that
+ * the binaries will run on both SMP and !SMP systems.
  */
-#if defined(SMP) || !defined(_KERNEL)
+#if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
 #define	MPLOCKED	"lock ; "
 #else
 #define	MPLOCKED
@@ -301,7 +300,7 @@
 }							\
 struct __hack
 
-#if defined(_KERNEL) && !defined(SMP)
+#if defined(_KERNEL) && !defined(SMP) && !defined(KLD_MODULE)
 
 #define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
@@ -315,7 +314,7 @@
 }							\
 struct __hack
 
-#else /* !(_KERNEL && !SMP) */
+#else /* !(_KERNEL && !SMP && !KLD_MODULE) */
 
 #define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
@@ -333,9 +332,9 @@
 }							\
 struct __hack
 
-#endif /* _KERNEL && !SMP */
+#endif /* _KERNEL && !SMP && !KLD_MODULE */
 
-#endif /* KLD_MODULE || !__GNUCLIKE_ASM */
+#endif /* WANT_PROTOTYPES || !__GNUCLIKE_ASM */
 
 ATOMIC_ASM(set,	     char,  "orb %b1,%0",  "iq",  v);
 ATOMIC_ASM(clear,    char,  "andb %b1,%0", "iq", ~v);
>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211030622.qA36MC6U070602>