From owner-svn-src-stable-9@FreeBSD.ORG  Sat Jun 16 13:22:55 2012
Return-Path: <owner-svn-src-stable-9@FreeBSD.ORG>
Delivered-To: svn-src-stable-9@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52])
	by hub.freebsd.org (Postfix) with ESMTP id BF1451065674;
	Sat, 16 Jun 2012 13:22:55 +0000 (UTC) (envelope-from kib@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id A832B8FC12;
	Sat, 16 Jun 2012 13:22:55 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q5GDMt00046368;
	Sat, 16 Jun 2012 13:22:55 GMT (envelope-from kib@svn.freebsd.org)
Received: (from kib@localhost)
	by svn.freebsd.org (8.14.4/8.14.4/Submit) id q5GDMtee046365;
	Sat, 16 Jun 2012 13:22:55 GMT (envelope-from kib@svn.freebsd.org)
Message-Id: <201206161322.q5GDMtee046365@svn.freebsd.org>
From: Konstantin Belousov <kib@FreeBSD.org>
Date: Sat, 16 Jun 2012 13:22:55 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
X-SVN-Group: stable-9
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r237161 - in stable/9/sys: amd64/include i386/include
X-BeenThere: svn-src-stable-9@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 9-stable src tree
	<svn-src-stable-9.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9>, 
	<mailto:svn-src-stable-9-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-stable-9>
List-Post: <mailto:svn-src-stable-9@freebsd.org>
List-Help: <mailto:svn-src-stable-9-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-stable-9>, 
	<mailto:svn-src-stable-9-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sat, 16 Jun 2012 13:22:55 -0000

Author: kib
Date: Sat Jun 16 13:22:55 2012
New Revision: 237161
URL: http://svn.freebsd.org/changeset/base/237161

Log:
  MFC r236456:
  Use plain store for atomic_store_rel on x86, instead of implicitly
  locked xchg instruction.  IA32 memory model guarantees that store has
  release semantic, since stores cannot pass loads or stores.

Modified:
  stable/9/sys/amd64/include/atomic.h
  stable/9/sys/i386/include/atomic.h
Directory Properties:
  stable/9/sys/   (props changed)

Modified: stable/9/sys/amd64/include/atomic.h
==============================================================================
--- stable/9/sys/amd64/include/atomic.h	Sat Jun 16 13:11:10 2012	(r237160)
+++ stable/9/sys/amd64/include/atomic.h	Sat Jun 16 13:22:55 2012	(r237161)
@@ -81,8 +81,9 @@ int	atomic_cmpset_long(volatile u_long *
 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
 u_long	atomic_fetchadd_long(volatile u_long *p, u_long v);
 
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
-u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p);	\
+#define	ATOMIC_LOAD(TYPE, LOP)					\
+u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
+#define	ATOMIC_STORE(TYPE)					\
 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
 #else /* !KLD_MODULE && __GNUCLIKE_ASM */
@@ -210,37 +211,43 @@ atomic_fetchadd_long(volatile u_long *p,
 	return (v);
 }
 
-#if defined(_KERNEL) && !defined(SMP)
-
 /*
- * We assume that a = b will do atomic loads and stores.  However, on a
- * PentiumPro or higher, reads may pass writes, so for that case we have
- * to use a serializing instruction (i.e. with LOCK) to do the load in
- * SMP kernels.  For UP kernels, however, the cache of the single processor
- * is always consistent, so we only need to take care of compiler.
+ * We assume that a = b will do atomic loads and stores.  Due to the
+ * IA32 memory model, a simple store guarantees release semantics.
+ *
+ * However, loads may pass stores, so for atomic_load_acq we have to
+ * ensure a Store/Load barrier to do the load in SMP kernels.  We use
+ * "lock cmpxchg" as recommended by the AMD Software Optimization
+ * Guide, and not mfence.  For UP kernels, however, the cache of the
+ * single processor is always consistent, so we only need to take care
+ * of the compiler.
  */
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
+#define	ATOMIC_STORE(TYPE)				\
+static __inline void					\
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile("" : : : "memory");		\
+	*p = v;						\
+}							\
+struct __hack
+
+#if defined(_KERNEL) && !defined(SMP)
+
+#define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
 	u_##TYPE tmp;					\
 							\
 	tmp = *p;					\
-	__asm __volatile ("" : : : "memory");		\
+	__asm __volatile("" : : : "memory");		\
 	return (tmp);					\
 }							\
-							\
-static __inline void					\
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{							\
-	__asm __volatile ("" : : : "memory");		\
-	*p = v;						\
-}							\
 struct __hack
 
 #else /* !(_KERNEL && !SMP) */
 
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
+#define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
@@ -254,19 +261,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE
 							\
 	return (res);					\
 }							\
-							\
-/*							\
- * The XCHG instruction asserts LOCK automagically.	\
- */							\
-static __inline void					\
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{							\
-	__asm __volatile(SOP				\
-	: "=m" (*p),			/* 0 */		\
-	  "+r" (v)			/* 1 */		\
-	: "m" (*p)			/* 2 */		\
-	: "memory");					\
-}							\
 struct __hack
 
 #endif /* _KERNEL && !SMP */
@@ -293,13 +287,19 @@ ATOMIC_ASM(clear,    long,  "andq %1,%0"
 ATOMIC_ASM(add,	     long,  "addq %1,%0",  "ir",  v);
 ATOMIC_ASM(subtract, long,  "subq %1,%0",  "ir",  v);
 
-ATOMIC_STORE_LOAD(char,	"cmpxchgb %b0,%1", "xchgb %b1,%0");
-ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
-ATOMIC_STORE_LOAD(int,	"cmpxchgl %0,%1",  "xchgl %1,%0");
-ATOMIC_STORE_LOAD(long,	"cmpxchgq %0,%1",  "xchgq %1,%0");
+ATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
+ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
+ATOMIC_LOAD(int,   "cmpxchgl %0,%1");
+ATOMIC_LOAD(long,  "cmpxchgq %0,%1");
+
+ATOMIC_STORE(char);
+ATOMIC_STORE(short);
+ATOMIC_STORE(int);
+ATOMIC_STORE(long);
 
 #undef ATOMIC_ASM
-#undef ATOMIC_STORE_LOAD
+#undef ATOMIC_LOAD
+#undef ATOMIC_STORE
 
 #ifndef WANT_FUNCTIONS
 

Modified: stable/9/sys/i386/include/atomic.h
==============================================================================
--- stable/9/sys/i386/include/atomic.h	Sat Jun 16 13:11:10 2012	(r237160)
+++ stable/9/sys/i386/include/atomic.h	Sat Jun 16 13:22:55 2012	(r237161)
@@ -32,9 +32,9 @@
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
-#define	mb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory")
-#define	wmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory")
-#define	rmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory")
+#define	mb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
+#define	wmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
+#define	rmb()	__asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc")
 
 /*
  * Various simple operations on memory, each of which is atomic in the
@@ -79,8 +79,9 @@ void atomic_##NAME##_barr_##TYPE(volatil
 int	atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
 u_int	atomic_fetchadd_int(volatile u_int *p, u_int v);
 
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)			\
-u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p);	\
+#define	ATOMIC_LOAD(TYPE, LOP)					\
+u_##TYPE	atomic_load_acq_##TYPE(volatile u_##TYPE *p)
+#define	ATOMIC_STORE(TYPE)					\
 void		atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
 
 #else /* !KLD_MODULE && __GNUCLIKE_ASM */
@@ -280,16 +281,29 @@ atomic_fetchadd_int(volatile u_int *p, u
 	return (v);
 }
 
-#if defined(_KERNEL) && !defined(SMP)
-
 /*
- * We assume that a = b will do atomic loads and stores.  However, on a
- * PentiumPro or higher, reads may pass writes, so for that case we have
- * to use a serializing instruction (i.e. with LOCK) to do the load in
- * SMP kernels.  For UP kernels, however, the cache of the single processor
- * is always consistent, so we only need to take care of compiler.
+ * We assume that a = b will do atomic loads and stores.  Due to the
+ * IA32 memory model, a simple store guarantees release semantics.
+ *
+ * However, loads may pass stores, so for atomic_load_acq we have to
+ * ensure a Store/Load barrier to do the load in SMP kernels.  We use
+ * "lock cmpxchg" as recommended by the AMD Software Optimization
+ * Guide, and not mfence.  For UP kernels, however, the cache of the
+ * single processor is always consistent, so we only need to take care
+ * of the compiler.
  */
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
+#define	ATOMIC_STORE(TYPE)				\
+static __inline void					\
+atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
+{							\
+	__asm __volatile("" : : : "memory");		\
+	*p = v;						\
+}							\
+struct __hack
+
+#if defined(_KERNEL) && !defined(SMP)
+
+#define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
@@ -299,18 +313,11 @@ atomic_load_acq_##TYPE(volatile u_##TYPE
 	__asm __volatile("" : : : "memory");		\
 	return (tmp);					\
 }							\
-							\
-static __inline void					\
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{							\
-	__asm __volatile("" : : : "memory");		\
-	*p = v;						\
-}							\
 struct __hack
 
 #else /* !(_KERNEL && !SMP) */
 
-#define	ATOMIC_STORE_LOAD(TYPE, LOP, SOP)		\
+#define	ATOMIC_LOAD(TYPE, LOP)				\
 static __inline u_##TYPE				\
 atomic_load_acq_##TYPE(volatile u_##TYPE *p)		\
 {							\
@@ -324,19 +331,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE
 							\
 	return (res);					\
 }							\
-							\
-/*							\
- * The XCHG instruction asserts LOCK automagically.	\
- */							\
-static __inline void					\
-atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
-{							\
-	__asm __volatile(SOP				\
-	: "=m" (*p),			/* 0 */		\
-	  "+r" (v)			/* 1 */		\
-	: "m" (*p)			/* 2 */		\
-	: "memory");					\
-}							\
 struct __hack
 
 #endif /* _KERNEL && !SMP */
@@ -363,13 +357,19 @@ ATOMIC_ASM(clear,    long,  "andl %1,%0"
 ATOMIC_ASM(add,	     long,  "addl %1,%0",  "ir",  v);
 ATOMIC_ASM(subtract, long,  "subl %1,%0",  "ir",  v);
 
-ATOMIC_STORE_LOAD(char,	"cmpxchgb %b0,%1", "xchgb %b1,%0");
-ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0");
-ATOMIC_STORE_LOAD(int,	"cmpxchgl %0,%1",  "xchgl %1,%0");
-ATOMIC_STORE_LOAD(long,	"cmpxchgl %0,%1",  "xchgl %1,%0");
+ATOMIC_LOAD(char,  "cmpxchgb %b0,%1");
+ATOMIC_LOAD(short, "cmpxchgw %w0,%1");
+ATOMIC_LOAD(int,   "cmpxchgl %0,%1");
+ATOMIC_LOAD(long,  "cmpxchgl %0,%1");
+
+ATOMIC_STORE(char);
+ATOMIC_STORE(short);
+ATOMIC_STORE(int);
+ATOMIC_STORE(long);
 
 #undef ATOMIC_ASM
-#undef ATOMIC_STORE_LOAD
+#undef ATOMIC_LOAD
+#undef ATOMIC_STORE
 
 #ifndef WANT_FUNCTIONS