Date: Fri, 16 Aug 2013 00:04:58 +0000 (UTC) From: Jung-uk Kim <jkim@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r254392 - in projects/atomic64/sys/i386: i386 include Message-ID: <201308160004.r7G04wGA081581@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: jkim Date: Fri Aug 16 00:04:58 2013 New Revision: 254392 URL: http://svnweb.freebsd.org/changeset/base/254392 Log: - Optimize atomic_cmpset_64_i386(). - Move all 64-bit atomic functions to atomic.h. Suggested by: bde Modified: projects/atomic64/sys/i386/i386/machdep.c projects/atomic64/sys/i386/include/atomic.h Modified: projects/atomic64/sys/i386/i386/machdep.c ============================================================================== --- projects/atomic64/sys/i386/i386/machdep.c Fri Aug 16 00:00:36 2013 (r254391) +++ projects/atomic64/sys/i386/i386/machdep.c Fri Aug 16 00:04:58 2013 (r254392) @@ -1548,56 +1548,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); -static int cpu_ident_cmxchg8b = 0; - -static void -cpu_probe_cmpxchg8b(void) -{ - - if ((cpu_feature & CPUID_CX8) != 0) - cpu_ident_cmxchg8b = 1; -} - -int -atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src) -{ - - if (cpu_ident_cmxchg8b) - return (atomic_cmpset_64_i586(dst, expect, src)); - else - return (atomic_cmpset_64_i386(dst, expect, src)); -} - -uint64_t -atomic_load_acq_64(volatile uint64_t *p) -{ - - if (cpu_ident_cmxchg8b) - return (atomic_load_acq_64_i586(p)); - else - return (atomic_load_acq_64_i386(p)); -} - -void -atomic_store_rel_64(volatile uint64_t *p, uint64_t v) -{ - - if (cpu_ident_cmxchg8b) - atomic_store_rel_64_i586(p, v); - else - atomic_store_rel_64_i386(p, v); -} - -uint64_t -atomic_swap_64(volatile uint64_t *p, uint64_t v) -{ - - if (cpu_ident_cmxchg8b) - return (atomic_swap_64_i586(p, v)); - else - return (atomic_swap_64_i386(p, v)); -} - /* * Reset registers to default values on exec. */ @@ -2859,7 +2809,6 @@ init386(first) thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; cpu_probe_amdc1e(); - cpu_probe_cmpxchg8b(); } #else @@ -3150,7 +3099,6 @@ init386(first) thread0.td_frame = &proc0_tf; cpu_probe_amdc1e(); - cpu_probe_cmpxchg8b(); #ifdef FDT x86_init_fdt(); Modified: projects/atomic64/sys/i386/include/atomic.h ============================================================================== --- projects/atomic64/sys/i386/include/atomic.h Fri Aug 16 00:00:36 2013 (r254391) +++ projects/atomic64/sys/i386/include/atomic.h Fri Aug 16 00:04:58 2013 (r254392) @@ -32,6 +32,11 @@ #error this file needs sys/cdefs.h as a prerequisite #endif +#ifdef _KERNEL +#include <machine/md_var.h> +#include <machine/specialreg.h> +#endif + #define mb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") #define wmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") #define rmb() __asm __volatile("lock; addl $0,(%%esp)" : : : "memory", "cc") @@ -87,6 +92,11 @@ u_##TYPE atomic_load_acq_##TYPE(volatile #define ATOMIC_STORE(TYPE) \ void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) +int atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t); +uint64_t atomic_load_acq_64(volatile uint64_t *); +void atomic_store_rel_64(volatile uint64_t *, uint64_t); +uint64_t atomic_swap_64(volatile uint64_t *, uint64_t); + #else /* !KLD_MODULE && __GNUCLIKE_ASM */ /* @@ -124,7 +134,155 @@ atomic_##NAME##_barr_##TYPE(volatile u_# } \ struct __hack -#if defined(_KERNEL) && !defined(WANT_FUNCTIONS) +/* + * Atomic compare and set, used by the mutex functions + * + * if (*dst == expect) *dst = src (all 32 bit words) + * + * Returns 0 on failure, non-zero on success + */ + +#ifdef CPU_DISABLE_CMPXCHG + +static __inline int +atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) +{ + int res; + register_t lock; + + res = 0; + ATOMIC_LOCK_I386(lock); + if (*dst == expect) { + *dst = src; + res = 1; + } + ATOMIC_UNLOCK_I386(lock); + return (res); +} + +#else /* !CPU_DISABLE_CMPXCHG */ + +static __inline int +atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " cmpxchgl %3, %1 ; " + " sete %0 ; " + "# atomic_cmpset_int" + : "=q" (res), /* 0 */ + "+m" (*dst), /* 1 */ + "+a" (expect) /* 2 */ + : "r" (src) /* 3 */ + : "memory", "cc"); + + return (res); +} + +#endif /* CPU_DISABLE_CMPXCHG */ + +/* + * Atomically add the value of v to the integer pointed to by p and return + * the previous value of *p. + */ +static __inline u_int +atomic_fetchadd_int(volatile u_int *p, u_int v) +{ + + __asm __volatile( + " " MPLOCKED " " + " xaddl %0, %1 ; " + "# atomic_fetchadd_int" + : "+r" (v), /* 0 (result) */ + "+m" (*p) /* 1 */ + : : "cc"); + return (v); +} + +static __inline int +atomic_testandset_int(volatile u_int *p, u_int v) +{ + u_char res; + + __asm __volatile( + " " MPLOCKED " " + " btsl %2, %1 ; " + " setc %0 ; " + "# atomic_testandset_int" + : "=q" (res), /* 0 */ + "+m" (*p) /* 1 */ + : "Ir" (v & 0x1f) /* 2 */ + : "cc"); + return (res); +} + +/* + * We assume that a = b will do atomic loads and stores. Due to the + * IA32 memory model, a simple store guarantees release semantics. + * + * However, loads may pass stores, so for atomic_load_acq we have to + * ensure a Store/Load barrier to do the load in SMP kernels. We use + * "lock cmpxchg" as recommended by the AMD Software Optimization + * Guide, and not mfence. For UP kernels, however, the cache of the + * single processor is always consistent, so we only need to take care + * of the compiler. + */ +#define ATOMIC_STORE(TYPE) \ +static __inline void \ +atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ +{ \ + __compiler_membar(); \ + *p = v; \ +} \ +struct __hack + +#if defined(_KERNEL) && !defined(SMP) + +#define ATOMIC_LOAD(TYPE, LOP) \ +static __inline u_##TYPE \ +atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ +{ \ + u_##TYPE tmp; \ + \ + tmp = *p; \ + __compiler_membar(); \ + return (tmp); \ +} \ +struct __hack + +#else /* !(_KERNEL && !SMP) */ + +#define ATOMIC_LOAD(TYPE, LOP) \ +static __inline u_##TYPE \ +atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ +{ \ + u_##TYPE res; \ + \ + __asm __volatile(MPLOCKED LOP \ + : "=a" (res), /* 0 */ \ + "+m" (*p) /* 1 */ \ + : : "memory", "cc"); \ + \ + return (res); \ +} \ +struct __hack + +#endif /* _KERNEL && !SMP */ + +#ifdef _KERNEL + +#ifdef WANT_FUNCTIONS +int atomic_cmpset_64_i386(volatile uint64_t *, uint64_t, uint64_t); +int atomic_cmpset_64_i586(volatile uint64_t *, uint64_t, uint64_t); +uint64_t atomic_load_acq_64_i386(volatile uint64_t *); +uint64_t atomic_load_acq_64_i586(volatile uint64_t *); +void atomic_store_rel_64_i386(volatile uint64_t *, uint64_t); +void atomic_store_rel_64_i586(volatile uint64_t *, uint64_t); +uint64_t atomic_swap_64_i386(volatile uint64_t *, uint64_t); +uint64_t atomic_swap_64_i586(volatile uint64_t *, uint64_t); +#endif /* I486 does not support SMP or CMPXCHG8B. */ static __inline int @@ -139,14 +297,12 @@ atomic_cmpset_64_i386(volatile uint64_t " xorl %1, %%eax ; " " xorl %2, %%edx ; " " orl %%edx, %%eax ; " - " jnz 1f ; " + " sete %%al ; " + " movzbl %%al, %%eax ; " + " jne 1f ; " " movl %3, %1 ; " " movl %4, %2 ; " - " movl $1, %%eax ; " - " jmp 2f ; " "1: " - " xorl %%eax, %%eax ; " - "2: " " popfl" : "+A" (expect), /* 0 */ "+m" (*p), /* 1 */ @@ -288,147 +444,47 @@ atomic_swap_64_i586(volatile uint64_t *p return (v); } -#endif /* _KERNEL && !WANT_FUNCTIONS */ - -/* - * Atomic compare and set, used by the mutex functions - * - * if (*dst == expect) *dst = src (all 32 bit words) - * - * Returns 0 on failure, non-zero on success - */ - -#ifdef CPU_DISABLE_CMPXCHG - static __inline int -atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) +atomic_cmpset_64(volatile uint64_t *dst, uint64_t expect, uint64_t src) { - int res; - register_t lock; - res = 0; - ATOMIC_LOCK_I386(lock); - if (*dst == expect) { - *dst = src; - res = 1; - } - ATOMIC_UNLOCK_I386(lock); - return (res); + if ((cpu_feature & CPUID_CX8) == 0) + return (atomic_cmpset_64_i386(dst, expect, src)); + else + return (atomic_cmpset_64_i586(dst, expect, src)); } -#else /* !CPU_DISABLE_CMPXCHG */ - -static __inline int -atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src) +static __inline uint64_t +atomic_load_acq_64(volatile uint64_t *p) { - u_char res; - - __asm __volatile( - " " MPLOCKED " " - " cmpxchgl %3, %1 ; " - " sete %0 ; " - "# atomic_cmpset_int" - : "=q" (res), /* 0 */ - "+m" (*dst), /* 1 */ - "+a" (expect) /* 2 */ - : "r" (src) /* 3 */ - : "memory", "cc"); - return (res); + if ((cpu_feature & CPUID_CX8) == 0) + return (atomic_load_acq_64_i386(p)); + else + return (atomic_load_acq_64_i586(p)); } -#endif /* CPU_DISABLE_CMPXCHG */ - -#undef ATOMIC_LOCK_I386 -#undef ATOMIC_UNLOCK_I386 - -/* - * Atomically add the value of v to the integer pointed to by p and return - * the previous value of *p. - */ -static __inline u_int -atomic_fetchadd_int(volatile u_int *p, u_int v) +static __inline void +atomic_store_rel_64(volatile uint64_t *p, uint64_t v) { - __asm __volatile( - " " MPLOCKED " " - " xaddl %0, %1 ; " - "# atomic_fetchadd_int" - : "+r" (v), /* 0 (result) */ - "+m" (*p) /* 1 */ - : : "cc"); - return (v); + if ((cpu_feature & CPUID_CX8) == 0) + atomic_store_rel_64_i386(p, v); + else + atomic_store_rel_64_i586(p, v); } -static __inline int -atomic_testandset_int(volatile u_int *p, u_int v) +static __inline uint64_t +atomic_swap_64(volatile uint64_t *p, uint64_t v) { - u_char res; - __asm __volatile( - " " MPLOCKED " " - " btsl %2, %1 ; " - " setc %0 ; " - "# atomic_testandset_int" - : "=q" (res), /* 0 */ - "+m" (*p) /* 1 */ - : "Ir" (v & 0x1f) /* 2 */ - : "cc"); - return (res); + if ((cpu_feature & CPUID_CX8) == 0) + return (atomic_swap_64_i386(p, v)); + else + return (atomic_swap_64_i586(p, v)); } -/* - * We assume that a = b will do atomic loads and stores. Due to the - * IA32 memory model, a simple store guarantees release semantics. - * - * However, loads may pass stores, so for atomic_load_acq we have to - * ensure a Store/Load barrier to do the load in SMP kernels. We use - * "lock cmpxchg" as recommended by the AMD Software Optimization - * Guide, and not mfence. For UP kernels, however, the cache of the - * single processor is always consistent, so we only need to take care - * of the compiler. - */ -#define ATOMIC_STORE(TYPE) \ -static __inline void \ -atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\ -{ \ - __compiler_membar(); \ - *p = v; \ -} \ -struct __hack - -#if defined(_KERNEL) && !defined(SMP) - -#define ATOMIC_LOAD(TYPE, LOP) \ -static __inline u_##TYPE \ -atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ -{ \ - u_##TYPE tmp; \ - \ - tmp = *p; \ - __compiler_membar(); \ - return (tmp); \ -} \ -struct __hack - -#else /* !(_KERNEL && !SMP) */ - -#define ATOMIC_LOAD(TYPE, LOP) \ -static __inline u_##TYPE \ -atomic_load_acq_##TYPE(volatile u_##TYPE *p) \ -{ \ - u_##TYPE res; \ - \ - __asm __volatile(MPLOCKED LOP \ - : "=a" (res), /* 0 */ \ - "+m" (*p) /* 1 */ \ - : : "memory", "cc"); \ - \ - return (res); \ -} \ -struct __hack - -#endif /* _KERNEL && !SMP */ +#endif /* _KERNEL */ #endif /* KLD_MODULE || !__GNUCLIKE_ASM */ @@ -468,13 +524,6 @@ ATOMIC_STORE(long); #ifndef WANT_FUNCTIONS -#ifdef _KERNEL -int atomic_cmpset_64(volatile uint64_t *, uint64_t, uint64_t); -uint64_t atomic_load_acq_64(volatile uint64_t *); -void atomic_store_rel_64(volatile uint64_t *, uint64_t); -uint64_t atomic_swap_64(volatile uint64_t *, uint64_t); -#endif - static __inline int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src) {
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308160004.r7G04wGA081581>