From owner-svn-src-stable-9@FreeBSD.ORG Sat Jun 16 13:22:55 2012 Return-Path: Delivered-To: svn-src-stable-9@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id BF1451065674; Sat, 16 Jun 2012 13:22:55 +0000 (UTC) (envelope-from kib@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id A832B8FC12; Sat, 16 Jun 2012 13:22:55 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q5GDMt00046368; Sat, 16 Jun 2012 13:22:55 GMT (envelope-from kib@svn.freebsd.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.4/8.14.4/Submit) id q5GDMtee046365; Sat, 16 Jun 2012 13:22:55 GMT (envelope-from kib@svn.freebsd.org) Message-Id: <201206161322.q5GDMtee046365@svn.freebsd.org> From: Konstantin Belousov Date: Sat, 16 Jun 2012 13:22:55 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r237161 - in stable/9/sys: amd64/include i386/include X-BeenThere: svn-src-stable-9@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: SVN commit messages for only the 9-stable src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 16 Jun 2012 13:22:55 -0000 Author: kib Date: Sat Jun 16 13:22:55 2012 New Revision: 237161 URL: http://svn.freebsd.org/changeset/base/237161 Log: MFC r236456: Use plain store for atomic_store_rel on x86, instead of implicitly locked xchg instruction. IA32 memory model guarantees that store has release semantic, since stores cannot pass loads or stores. Modified: stable/9/sys/amd64/include/atomic.h stable/9/sys/i386/include/atomic.h Directory Properties: stable/9/sys/ (props changed) Modified: stable/9/sys/amd64/include/atomic.h ============================================================================== --- stable/9/sys/amd64/include/atomic.h Sat Jun 16 13:11:10 2012 (r237160) +++ stable/9/sys/amd64/include/atomic.h Sat Jun 16 13:22:55 2012 (r237161) @@ -81,8 +81,9 @@ int atomic_cmpset_long(volatile u_long * u_int atomic_fetchadd_int(volatile u_int *p, u_int v); u_long atomic_fetchadd_long(volatile u_long *p, u_long v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ +#define ATOMIC_LOAD(TYPE, LOP) \ +u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) +#define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ @@ -210,37 +211,43 @@ atomic_fetchadd_long(volatile u_long *p, return (v); } -#if defined(_KERNEL) && !defined(SMP) - /* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we only need to take care of compiler. + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("" : : : "memory"); \ + *p = v; \ +} \ +struct __hack + +#if defined(_KERNEL) && !defined(SMP) + +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ u_##TYPE tmp; \ \ tmp = *p; \ - __asm __volatile ("" : : : "memory"); \ + __asm __volatile("" : : : "memory"); \ return (tmp); \ } \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile ("" : : : "memory"); \ - *p = v; \ -} \ struct __hack #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -254,19 +261,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE \ return (res); \ } \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ -} \ struct __hack #endif /* _KERNEL && !SMP */ @@ -293,13 +287,19 @@ ATOMIC_ASM(clear, long, "andq %1,%0" ATOMIC_ASM(add, long, "addq %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subq %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgq %0,%1", "xchgq %1,%0"); +ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); +ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); +ATOMIC_LOAD(int, "cmpxchgl %0,%1"); +ATOMIC_LOAD(long, "cmpxchgq %0,%1"); + +ATOMIC_STORE(char); +ATOMIC_STORE(short); +ATOMIC_STORE(int); +ATOMIC_STORE(long); #undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#undef ATOMIC_LOAD +#undef ATOMIC_STORE #ifndef WANT_FUNCTIONS Modified: stable/9/sys/i386/include/atomic.h ============================================================================== --- stable/9/sys/i386/include/atomic.h Sat Jun 16 13:11:10 2012 (r237160) +++ stable/9/sys/i386/include/atomic.h Sat Jun 16 13:22:55 2012 (r237161) @@ -32,9 +32,9 @@ #error this file needs sys/cdefs.h as a prerequisite #endif -#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") -#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") -#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory") +#define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") +#define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") +#define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") /* * Various simple operations on memory, each of which is atomic in the @@ -79,8 +79,9 @@ void atomic_##NAME##_barr_##TYPE(volatil int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src); u_int atomic_fetchadd_int(volatile u_int *p, u_int v); -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ -u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p); \ +#define ATOMIC_LOAD(TYPE, LOP) \ +u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p) +#define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) #else /* !KLD_MODULE && __GNUCLIKE_ASM */ @@ -280,16 +281,29 @@ atomic_fetchadd_int(volatile u_int *p, u return (v); } -#if defined(_KERNEL) && !defined(SMP) - /* - * We assume that a = b will do atomic loads and stores. However, on a - * PentiumPro or higher, reads may pass writes, so for that case we have - * to use a serializing instruction (i.e. with LOCK) to do the load in - * SMP kernels. For UP kernels, however, the cache of the single processor - * is always consistent, so we only need to take care of compiler. + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __asm __volatile("" : : : "memory"); \ + *p = v; \ +} \ +struct __hack + +#if defined(_KERNEL) && !defined(SMP) + +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -299,18 +313,11 @@ atomic_load_acq_##TYPE(volatile u_##TYPE __asm __volatile("" : : : "memory"); \ return (tmp); \ } \ - \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile("" : : : "memory"); \ - *p = v; \ -} \ struct __hack #else /* !(_KERNEL && !SMP) */ -#define ATOMIC_STORE_LOAD(TYPE, LOP, SOP) \ +#define ATOMIC_LOAD(TYPE, LOP) \ static __inline u_##TYPE \ atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ { \ @@ -324,19 +331,6 @@ atomic_load_acq_##TYPE(volatile u_##TYPE \ return (res); \ } \ - \ -/* \ - * The XCHG instruction asserts LOCK automagically. \ - */ \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __asm __volatile(SOP \ - : "=m" (*p), /* 0 */ \ - "+r" (v) /* 1 */ \ - : "m" (*p) /* 2 */ \ - : "memory"); \ -} \ struct __hack #endif /* _KERNEL && !SMP */ @@ -363,13 +357,19 @@ ATOMIC_ASM(clear, long, "andl %1,%0" ATOMIC_ASM(add, long, "addl %1,%0", "ir", v); ATOMIC_ASM(subtract, long, "subl %1,%0", "ir", v); -ATOMIC_STORE_LOAD(char, "cmpxchgb %b0,%1", "xchgb %b1,%0"); -ATOMIC_STORE_LOAD(short,"cmpxchgw %w0,%1", "xchgw %w1,%0"); -ATOMIC_STORE_LOAD(int, "cmpxchgl %0,%1", "xchgl %1,%0"); -ATOMIC_STORE_LOAD(long, "cmpxchgl %0,%1", "xchgl %1,%0"); +ATOMIC_LOAD(char, "cmpxchgb %b0,%1"); +ATOMIC_LOAD(short, "cmpxchgw %w0,%1"); +ATOMIC_LOAD(int, "cmpxchgl %0,%1"); +ATOMIC_LOAD(long, "cmpxchgl %0,%1"); + +ATOMIC_STORE(char); +ATOMIC_STORE(short); +ATOMIC_STORE(int); +ATOMIC_STORE(long); #undef ATOMIC_ASM -#undef ATOMIC_STORE_LOAD +#undef ATOMIC_LOAD +#undef ATOMIC_STORE #ifndef WANT_FUNCTIONS