Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 16 Mar 2017 06:35:53 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r315377 - in stable/11/sys: kern sys
Message-ID:  <201703160635.v2G6Zrca082601@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Mar 16 06:35:53 2017
New Revision: 315377
URL: https://svnweb.freebsd.org/changeset/base/315377

Log:
  MFC r313269,r313270,r313271,r313272,r313274,r313278,r313279,r313996,r314474
  
  mtx: switch to fcmpset
  
  The found value is passed to locking routines in order to reduce cacheline
  accesses.
  
  mtx_unlock grows an explicit check for regular unlock. On ll/sc architectures
  the routine can fail even if the lock could have been handled by the inline
  primitive.
  
  ==
  
  rwlock: switch to fcmpset
  
  ==
  
  sx: switch to fcmpset
  
  ==
  
  sx: uninline slock/sunlock
  
  Shared locking routines explicitly read the value and test it. If the
  change attempt fails, they fall back to a regular function which would
  retry in a loop.
  
  The problem is that with many concurrent readers the risk of failure is pretty
  high and even the value returned by fcmpset is very likely going to be stale
  by the time the loop in the fallback routine is reached.
  
  Uninline said primitives. It gives a throughput increase when doing concurrent
  slocks/sunlocks with 80 hardware threads from ~50 mln/s to ~56 mln/s.
  
  Interestingly, rwlock primitives are already not inlined.
  
  ==
  
  sx: add witness support missed in r313272
  
  ==
  
  mtx: fix up _mtx_obtain_lock_fetch usage in thread lock
  
  Since _mtx_obtain_lock_fetch no longer sets the argument to MTX_UNOWNED,
  callers have to do it on their own.
  
  ==
  
  mtx: fixup r313278, the assignemnt was supposed to go inside the loop
  
  ==
  
  mtx: fix spin mutexes interaction with failed fcmpset
  
  While doing so move recursion support down to the fallback routine.
  
  ==
  
  locks: ensure proper barriers are used with atomic ops when necessary
  
  Unclear how, but the locking routine for mutexes was using the *release*
  barrier instead of acquire. This must have been either a copy-pasto or bad
  completion.
  
  Going through other uses of atomics shows no barriers in:
  - upgrade routines (addressed in this patch)
  - sections protected with turnstile locks - this should be fine as necessary
  barriers are in the worst case provided by turnstile unlock
  
  I would like to thank Mark Millard and andreast@ for reporting the problem and
  testing previous patches before the issue got identified.

Modified:
  stable/11/sys/kern/kern_mutex.c
  stable/11/sys/kern/kern_rwlock.c
  stable/11/sys/kern/kern_sx.c
  stable/11/sys/sys/mutex.h
  stable/11/sys/sys/rwlock.h
  stable/11/sys/sys/sx.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/kern/kern_mutex.c
==============================================================================
--- stable/11/sys/kern/kern_mutex.c	Thu Mar 16 06:32:58 2017	(r315376)
+++ stable/11/sys/kern/kern_mutex.c	Thu Mar 16 06:35:53 2017	(r315377)
@@ -416,12 +416,11 @@ _mtx_trylock_flags_(volatile uintptr_t *
  * sleep waiting for it), or if we need to recurse on it.
  */
 void
-__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
+__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid, int opts,
     const char *file, int line)
 {
 	struct mtx *m;
 	struct turnstile *ts;
-	uintptr_t v;
 #ifdef ADAPTIVE_MUTEXES
 	volatile struct thread *owner;
 #endif
@@ -450,7 +449,6 @@ __mtx_lock_sleep(volatile uintptr_t *c, 
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	m = mtxlock2mtx(c);
-	v = MTX_READ_VALUE(m);
 
 	if (__predict_false(lv_mtx_owner(v) == (struct thread *)tid)) {
 		KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 ||
@@ -481,9 +479,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, 
 
 	for (;;) {
 		if (v == MTX_UNOWNED) {
-			if (_mtx_obtain_lock(m, tid))
+			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
-			v = MTX_READ_VALUE(m);
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
@@ -635,12 +632,11 @@ _mtx_lock_spin_failed(struct mtx *m)
  * is handled inline.
  */
 void
-_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
-    const char *file, int line)
+_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+    int opts, const char *file, int line)
 {
 	struct mtx *m;
 	struct lock_delay_arg lda;
-	uintptr_t v;
 #ifdef LOCK_PROFILING
 	int contested = 0;
 	uint64_t waittime = 0;
@@ -655,6 +651,14 @@ _mtx_lock_spin_cookie(volatile uintptr_t
 	lock_delay_arg_init(&lda, &mtx_spin_delay);
 	m = mtxlock2mtx(c);
 
+	if (__predict_false(v == MTX_UNOWNED))
+		v = MTX_READ_VALUE(m);
+
+	if (__predict_false(v == tid)) {
+		m->mtx_recurse++;
+		return;
+	}
+
 	if (LOCK_LOG_TEST(&m->lock_object, opts))
 		CTR1(KTR_LOCK, "_mtx_lock_spin: %p spinning", m);
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname((struct thread *)tid),
@@ -667,12 +671,10 @@ _mtx_lock_spin_cookie(volatile uintptr_t
 #ifdef KDTRACE_HOOKS
 	spin_time -= lockstat_nsecs(&m->lock_object);
 #endif
-	v = MTX_READ_VALUE(m);
 	for (;;) {
 		if (v == MTX_UNOWNED) {
-			if (_mtx_obtain_lock(m, tid))
+			if (_mtx_obtain_lock_fetch(m, &v, tid))
 				break;
-			v = MTX_READ_VALUE(m);
 			continue;
 		}
 		/* Give interrupts a chance while we spin. */
@@ -744,6 +746,7 @@ thread_lock_flags_(struct thread *td, in
 #endif
 	for (;;) {
 retry:
+		v = MTX_UNOWNED;
 		spinlock_enter();
 		m = td->td_lock;
 		KASSERT(m->mtx_lock != MTX_DESTROYED,
@@ -757,14 +760,11 @@ retry:
 			    m->lock_object.lo_name, file, line));
 		WITNESS_CHECKORDER(&m->lock_object,
 		    opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL);
-		v = MTX_READ_VALUE(m);
 		for (;;) {
-			if (v == MTX_UNOWNED) {
-				if (_mtx_obtain_lock(m, tid))
-					break;
-				v = MTX_READ_VALUE(m);
+			if (_mtx_obtain_lock_fetch(m, &v, tid))
+				break;
+			if (v == MTX_UNOWNED)
 				continue;
-			}
 			if (v == tid) {
 				m->mtx_recurse++;
 				break;
@@ -857,11 +857,18 @@ __mtx_unlock_sleep(volatile uintptr_t *c
 {
 	struct mtx *m;
 	struct turnstile *ts;
+	uintptr_t v;
 
 	if (SCHEDULER_STOPPED())
 		return;
 
 	m = mtxlock2mtx(c);
+	v = MTX_READ_VALUE(m);
+
+	if (v == (uintptr_t)curthread) {
+		if (_mtx_release_lock(m, (uintptr_t)curthread))
+			return;
+	}
 
 	if (mtx_recursed(m)) {
 		if (--(m->mtx_recurse) == 0)

Modified: stable/11/sys/kern/kern_rwlock.c
==============================================================================
--- stable/11/sys/kern/kern_rwlock.c	Thu Mar 16 06:32:58 2017	(r315376)
+++ stable/11/sys/kern/kern_rwlock.c	Thu Mar 16 06:35:53 2017	(r315377)
@@ -422,7 +422,7 @@ __rw_rlock(volatile uintptr_t *c, const 
 			 * if the lock has been unlocked and write waiters
 			 * were present.
 			 */
-			if (atomic_cmpset_acq_ptr(&rw->rw_lock, v,
+			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v,
 			    v + RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
@@ -431,7 +431,6 @@ __rw_rlock(volatile uintptr_t *c, const 
 					    (void *)(v + RW_ONE_READER));
 				break;
 			}
-			v = RW_READ_VALUE(rw);
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
@@ -657,7 +656,7 @@ _rw_runlock_cookie(volatile uintptr_t *c
 		 * just drop one and return.
 		 */
 		if (RW_READERS(x) > 1) {
-			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
+			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, &x,
 			    x - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
@@ -666,7 +665,6 @@ _rw_runlock_cookie(volatile uintptr_t *c
 					    (void *)(x - RW_ONE_READER));
 				break;
 			}
-			x = RW_READ_VALUE(rw);
 			continue;
 		}
 		/*
@@ -676,14 +674,13 @@ _rw_runlock_cookie(volatile uintptr_t *c
 		if (!(x & RW_LOCK_WAITERS)) {
 			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
 			    RW_READERS_LOCK(1));
-			if (atomic_cmpset_rel_ptr(&rw->rw_lock, x,
+			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, &x,
 			    RW_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, rw);
 				break;
 			}
-			x = RW_READ_VALUE(rw);
 			continue;
 		}
 		/*
@@ -751,8 +748,8 @@ _rw_runlock_cookie(volatile uintptr_t *c
  * read or write lock.
  */
 void
-__rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
-    int line)
+__rw_wlock_hard(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+    const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
@@ -761,7 +758,7 @@ __rw_wlock_hard(volatile uintptr_t *c, u
 	int spintries = 0;
 	int i;
 #endif
-	uintptr_t v, x;
+	uintptr_t x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
@@ -785,7 +782,6 @@ __rw_wlock_hard(volatile uintptr_t *c, u
 	lock_delay_arg_init(&lda, NULL);
 #endif
 	rw = rwlock2rw(c);
-	v = RW_READ_VALUE(rw);
 
 	if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) {
 		KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE,
@@ -807,9 +803,8 @@ __rw_wlock_hard(volatile uintptr_t *c, u
 #endif
 	for (;;) {
 		if (v == RW_UNLOCKED) {
-			if (_rw_write_lock(rw, tid))
+			if (_rw_write_lock_fetch(rw, &v, tid))
 				break;
-			v = RW_READ_VALUE(rw);
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
@@ -1072,7 +1067,7 @@ __rw_try_upgrade(volatile uintptr_t *c, 
 		if (RW_READERS(v) > 1)
 			break;
 		if (!(v & RW_LOCK_WAITERS)) {
-			success = atomic_cmpset_ptr(&rw->rw_lock, v, tid);
+			success = atomic_cmpset_acq_ptr(&rw->rw_lock, v, tid);
 			if (!success)
 				continue;
 			break;

Modified: stable/11/sys/kern/kern_sx.c
==============================================================================
--- stable/11/sys/kern/kern_sx.c	Thu Mar 16 06:32:58 2017	(r315376)
+++ stable/11/sys/kern/kern_sx.c	Thu Mar 16 06:35:53 2017	(r315377)
@@ -256,29 +256,6 @@ sx_destroy(struct sx *sx)
 }
 
 int
-_sx_slock(struct sx *sx, int opts, const char *file, int line)
-{
-	int error = 0;
-
-	if (SCHEDULER_STOPPED())
-		return (0);
-	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
-	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
-	    curthread, sx->lock_object.lo_name, file, line));
-	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
-	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
-	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
-	error = __sx_slock(sx, opts, file, line);
-	if (!error) {
-		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
-		WITNESS_LOCK(&sx->lock_object, 0, file, line);
-		TD_LOCKS_INC(curthread);
-	}
-
-	return (error);
-}
-
-int
 sx_try_slock_(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
@@ -371,21 +348,6 @@ sx_try_xlock_(struct sx *sx, const char 
 }
 
 void
-_sx_sunlock(struct sx *sx, const char *file, int line)
-{
-
-	if (SCHEDULER_STOPPED())
-		return;
-	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
-	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
-	_sx_assert(sx, SA_SLOCKED, file, line);
-	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
-	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
-	__sx_sunlock(sx, file, line);
-	TD_LOCKS_DEC(curthread);
-}
-
-void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
 
@@ -425,7 +387,7 @@ sx_try_upgrade_(struct sx *sx, const cha
 	 * we will wake up the exclusive waiters when we drop the lock.
 	 */
 	x = sx->sx_lock & SX_LOCK_EXCLUSIVE_WAITERS;
-	success = atomic_cmpset_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
+	success = atomic_cmpset_acq_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) | x,
 	    (uintptr_t)curthread | x);
 	LOCK_LOG_TRY("XUPGRADE", &sx->lock_object, 0, success, file, line);
 	if (success) {
@@ -510,15 +472,14 @@ sx_downgrade_(struct sx *sx, const char 
  * accessible from at least sx.h.
  */
 int
-_sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
-    int line)
+_sx_xlock_hard(struct sx *sx, uintptr_t x, uintptr_t tid, int opts,
+    const char *file, int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
 	volatile struct thread *owner;
 	u_int i, spintries = 0;
 #endif
-	uintptr_t x;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
@@ -543,8 +504,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
 	lock_delay_arg_init(&lda, NULL);
 #endif
 
-	x = SX_READ_VALUE(sx);
-
 	/* If we already hold an exclusive lock, then recurse. */
 	if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) {
 		KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0,
@@ -567,9 +526,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t 
 #endif
 	for (;;) {
 		if (x == SX_LOCK_UNLOCKED) {
-			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, tid))
+			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
 				break;
-			x = SX_READ_VALUE(sx);
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
@@ -824,14 +782,8 @@ _sx_xunlock_hard(struct sx *sx, uintptr_
 		kick_proc0();
 }
 
-/*
- * This function represents the so-called 'hard case' for sx_slock
- * operation.  All 'easy case' failures are redirected to this.  Note
- * that ideally this would be a static function, but it needs to be
- * accessible from at least sx.h.
- */
 int
-_sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
+_sx_slock(struct sx *sx, int opts, const char *file, int line)
 {
 	GIANT_DECLARE;
 #ifdef ADAPTIVE_SX
@@ -861,6 +813,12 @@ _sx_slock_hard(struct sx *sx, int opts, 
 #elif defined(KDTRACE_HOOKS)
 	lock_delay_arg_init(&lda, NULL);
 #endif
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
+	    ("sx_slock() by idle thread %p on sx %s @ %s:%d",
+	    curthread, sx->lock_object.lo_name, file, line));
+	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+	    ("sx_slock() of destroyed sx @ %s:%d", file, line));
+	WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&sx->lock_object);
 #endif
@@ -882,7 +840,7 @@ _sx_slock_hard(struct sx *sx, int opts, 
 		 */
 		if (x & SX_LOCK_SHARED) {
 			MPASS(!(x & SX_LOCK_SHARED_WAITERS));
-			if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
+			if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x,
 			    x + SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
@@ -891,7 +849,6 @@ _sx_slock_hard(struct sx *sx, int opts, 
 					    (void *)(x + SX_ONE_SHARER));
 				break;
 			}
-			x = SX_READ_VALUE(sx);
 			continue;
 		}
 #ifdef KDTRACE_HOOKS
@@ -1029,21 +986,19 @@ _sx_slock_hard(struct sx *sx, int opts, 
 		    LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
 		    (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
 #endif
-	if (error == 0)
+	if (error == 0) {
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    contested, waittime, file, line, LOCKSTAT_READER);
+		LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
+		WITNESS_LOCK(&sx->lock_object, 0, file, line);
+		TD_LOCKS_INC(curthread);
+	}
 	GIANT_RESTORE();
 	return (error);
 }
 
-/*
- * This function represents the so-called 'hard case' for sx_sunlock
- * operation.  All 'easy case' failures are redirected to this.  Note
- * that ideally this would be a static function, but it needs to be
- * accessible from at least sx.h.
- */
 void
-_sx_sunlock_hard(struct sx *sx, const char *file, int line)
+_sx_sunlock(struct sx *sx, const char *file, int line)
 {
 	uintptr_t x;
 	int wakeup_swapper;
@@ -1051,6 +1006,12 @@ _sx_sunlock_hard(struct sx *sx, const ch
 	if (SCHEDULER_STOPPED())
 		return;
 
+	KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
+	    ("sx_sunlock() of destroyed sx @ %s:%d", file, line));
+	_sx_assert(sx, SA_SLOCKED, file, line);
+	WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
+	LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
+	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
 	x = SX_READ_VALUE(sx);
 	for (;;) {
 		/*
@@ -1065,7 +1026,7 @@ _sx_sunlock_hard(struct sx *sx, const ch
 		 * so, just drop one and return.
 		 */
 		if (SX_SHARERS(x) > 1) {
-			if (atomic_cmpset_rel_ptr(&sx->sx_lock, x,
+			if (atomic_fcmpset_rel_ptr(&sx->sx_lock, &x,
 			    x - SX_ONE_SHARER)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR4(KTR_LOCK,
@@ -1074,8 +1035,6 @@ _sx_sunlock_hard(struct sx *sx, const ch
 					    (void *)(x - SX_ONE_SHARER));
 				break;
 			}
-
-			x = SX_READ_VALUE(sx);
 			continue;
 		}
 
@@ -1085,14 +1044,14 @@ _sx_sunlock_hard(struct sx *sx, const ch
 		 */
 		if (!(x & SX_LOCK_EXCLUSIVE_WAITERS)) {
 			MPASS(x == SX_SHARERS_LOCK(1));
-			if (atomic_cmpset_rel_ptr(&sx->sx_lock,
-			    SX_SHARERS_LOCK(1), SX_LOCK_UNLOCKED)) {
+			x = SX_SHARERS_LOCK(1);
+			if (atomic_fcmpset_rel_ptr(&sx->sx_lock,
+			    &x, SX_LOCK_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&sx->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, sx);
 				break;
 			}
-			x = SX_READ_VALUE(sx);
 			continue;
 		}
 
@@ -1127,6 +1086,7 @@ _sx_sunlock_hard(struct sx *sx, const ch
 			kick_proc0();
 		break;
 	}
+	TD_LOCKS_DEC(curthread);
 }
 
 #ifdef INVARIANT_SUPPORT

Modified: stable/11/sys/sys/mutex.h
==============================================================================
--- stable/11/sys/sys/mutex.h	Thu Mar 16 06:32:58 2017	(r315376)
+++ stable/11/sys/sys/mutex.h	Thu Mar 16 06:35:53 2017	(r315377)
@@ -98,13 +98,13 @@ void	mtx_sysinit(void *arg);
 int	_mtx_trylock_flags_(volatile uintptr_t *c, int opts, const char *file,
 	    int line);
 void	mutex_init(void);
-void	__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts,
-	    const char *file, int line);
+void	__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+	    int opts, const char *file, int line);
 void	__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file,
 	    int line);
 #ifdef SMP
-void	_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts,
-	    const char *file, int line);
+void	_mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+	    int opts, const char *file, int line);
 #endif
 void	__mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file,
 	    int line);
@@ -140,13 +140,13 @@ void	thread_lock_flags_(struct thread *,
 	_mtx_destroy(&(m)->mtx_lock)
 #define	mtx_trylock_flags_(m, o, f, l)					\
 	_mtx_trylock_flags_(&(m)->mtx_lock, o, f, l)
-#define	_mtx_lock_sleep(m, t, o, f, l)					\
-	__mtx_lock_sleep(&(m)->mtx_lock, t, o, f, l)
+#define	_mtx_lock_sleep(m, v, t, o, f, l)				\
+	__mtx_lock_sleep(&(m)->mtx_lock, v, t, o, f, l)
 #define	_mtx_unlock_sleep(m, o, f, l)					\
 	__mtx_unlock_sleep(&(m)->mtx_lock, o, f, l)
 #ifdef SMP
-#define	_mtx_lock_spin(m, t, o, f, l)					\
-	_mtx_lock_spin_cookie(&(m)->mtx_lock, t, o, f, l)
+#define	_mtx_lock_spin(m, v, t, o, f, l)				\
+	_mtx_lock_spin_cookie(&(m)->mtx_lock, v, t, o, f, l)
 #endif
 #define	_mtx_lock_flags(m, o, f, l)					\
 	__mtx_lock_flags(&(m)->mtx_lock, o, f, l)
@@ -171,6 +171,11 @@ void	thread_lock_flags_(struct thread *,
 #define _mtx_obtain_lock(mp, tid)					\
 	atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid))
 
+#define _mtx_obtain_lock_fetch(mp, vp, tid) ({				\
+	*vp = MTX_UNOWNED;						\
+	atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid));		\
+})
+
 /* Try to release mtx_lock if it is unrecursed and uncontested. */
 #define _mtx_release_lock(mp, tid)					\
 	atomic_cmpset_rel_ptr(&(mp)->mtx_lock, (tid), MTX_UNOWNED)
@@ -188,9 +193,10 @@ void	thread_lock_flags_(struct thread *,
 /* Lock a normal mutex. */
 #define __mtx_lock(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v;							\
 									\
-	if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid)))\
-		_mtx_lock_sleep((mp), _tid, (opts), (file), (line));	\
+	if (!_mtx_obtain_lock_fetch((mp), &_v, _tid))			\
+		_mtx_lock_sleep((mp), _v, _tid, (opts), (file), (line));\
 	else								\
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire,	\
 		    mp, 0, 0, file, line);				\
@@ -205,14 +211,12 @@ void	thread_lock_flags_(struct thread *,
 #ifdef SMP
 #define __mtx_lock_spin(mp, tid, opts, file, line) do {			\
 	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v;							\
 									\
 	spinlock_enter();						\
-	if (((mp)->mtx_lock != MTX_UNOWNED || !_mtx_obtain_lock((mp), _tid))) {\
-		if ((mp)->mtx_lock == _tid)				\
-			(mp)->mtx_recurse++;				\
-		else							\
-			_mtx_lock_spin((mp), _tid, (opts), (file), (line)); \
-	} else 								\
+	if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) 			\
+		_mtx_lock_spin((mp), _v, _tid, (opts), (file), (line)); \
+	else 								\
 		LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire,	\
 		    mp, 0, 0, file, line);				\
 } while (0)
@@ -265,7 +269,7 @@ void	thread_lock_flags_(struct thread *,
 									\
 	if ((mp)->mtx_recurse == 0)					\
 		LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp);	\
-	if ((mp)->mtx_lock != _tid || !_mtx_release_lock((mp), _tid))	\
+	if (!_mtx_release_lock((mp), _tid))				\
 		_mtx_unlock_sleep((mp), (opts), (file), (line));	\
 } while (0)
 

Modified: stable/11/sys/sys/rwlock.h
==============================================================================
--- stable/11/sys/sys/rwlock.h	Thu Mar 16 06:32:58 2017	(r315376)
+++ stable/11/sys/sys/rwlock.h	Thu Mar 16 06:35:53 2017	(r315377)
@@ -84,6 +84,11 @@
 #define	_rw_write_lock(rw, tid)						\
 	atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid))
 
+#define	_rw_write_lock_fetch(rw, vp, tid) ({				\
+	*vp = RW_UNLOCKED;						\
+	atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid));		\
+})
+
 /* Release a write lock quickly if there are no waiters. */
 #define	_rw_write_unlock(rw, tid)					\
 	atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
@@ -97,9 +102,10 @@
 /* Acquire a write lock. */
 #define	__rw_wlock(rw, tid, file, line) do {				\
 	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v;							\
 									\
-	if ((rw)->rw_lock != RW_UNLOCKED || !_rw_write_lock((rw), _tid))\
-		_rw_wlock_hard((rw), _tid, (file), (line));		\
+	if (!_rw_write_lock_fetch((rw), &_v, _tid))			\
+		_rw_wlock_hard((rw), _v, _tid, (file), (line));		\
 	else 								\
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw,	\
 		    0, 0, file, line, LOCKSTAT_WRITER);			\
@@ -114,7 +120,7 @@
 	else {								\
 		LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw,	\
 		    LOCKSTAT_WRITER);					\
-		if ((rw)->rw_lock != _tid || !_rw_write_unlock((rw), _tid))\
+		if (!_rw_write_unlock((rw), _tid))			\
 			_rw_wunlock_hard((rw), _tid, (file), (line));	\
 	}								\
 } while (0)
@@ -135,8 +141,8 @@ void	_rw_wunlock_cookie(volatile uintptr
 void	__rw_rlock(volatile uintptr_t *c, const char *file, int line);
 int	__rw_try_rlock(volatile uintptr_t *c, const char *file, int line);
 void	_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line);
-void	__rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file,
-	    int line);
+void	__rw_wlock_hard(volatile uintptr_t *c, uintptr_t v, uintptr_t tid,
+	    const char *file, int line);
 void	__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid,
 	    const char *file, int line);
 int	__rw_try_upgrade(volatile uintptr_t *c, const char *file, int line);
@@ -171,8 +177,8 @@ void	__rw_assert(const volatile uintptr_
 	__rw_try_rlock(&(rw)->rw_lock, f, l)
 #define	_rw_runlock(rw, f, l)						\
 	_rw_runlock_cookie(&(rw)->rw_lock, f, l)
-#define	_rw_wlock_hard(rw, t, f, l)					\
-	__rw_wlock_hard(&(rw)->rw_lock, t, f, l)
+#define	_rw_wlock_hard(rw, v, t, f, l)					\
+	__rw_wlock_hard(&(rw)->rw_lock, v, t, f, l)
 #define	_rw_wunlock_hard(rw, t, f, l)					\
 	__rw_wunlock_hard(&(rw)->rw_lock, t, f, l)
 #define	_rw_try_upgrade(rw, f, l)					\

Modified: stable/11/sys/sys/sx.h
==============================================================================
--- stable/11/sys/sys/sx.h	Thu Mar 16 06:32:58 2017	(r315376)
+++ stable/11/sys/sys/sx.h	Thu Mar 16 06:35:53 2017	(r315377)
@@ -109,12 +109,10 @@ int	_sx_slock(struct sx *sx, int opts, c
 int	_sx_xlock(struct sx *sx, int opts, const char *file, int line);
 void	_sx_sunlock(struct sx *sx, const char *file, int line);
 void	_sx_xunlock(struct sx *sx, const char *file, int line);
-int	_sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts,
+int	_sx_xlock_hard(struct sx *sx, uintptr_t v, uintptr_t tid, int opts,
 	    const char *file, int line);
-int	_sx_slock_hard(struct sx *sx, int opts, const char *file, int line);
 void	_sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
 	    line);
-void	_sx_sunlock_hard(struct sx *sx, const char *file, int line);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	_sx_assert(const struct sx *sx, int what, const char *file, int line);
 #endif
@@ -153,11 +151,12 @@ __sx_xlock(struct sx *sx, struct thread 
     int line)
 {
 	uintptr_t tid = (uintptr_t)td;
+	uintptr_t v;
 	int error = 0;
 
-	if (sx->sx_lock != SX_LOCK_UNLOCKED ||
-	    !atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid))
-		error = _sx_xlock_hard(sx, tid, opts, file, line);
+	v = SX_LOCK_UNLOCKED;
+	if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid))
+		error = _sx_xlock_hard(sx, v, tid, opts, file, line);
 	else 
 		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
 		    0, 0, file, line, LOCKSTAT_WRITER);
@@ -174,46 +173,10 @@ __sx_xunlock(struct sx *sx, struct threa
 	if (sx->sx_recurse == 0)
 		LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx,
 		    LOCKSTAT_WRITER);
-	if (sx->sx_lock != tid ||
-	    !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
+	if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
 		_sx_xunlock_hard(sx, tid, file, line);
 }
 
-/* Acquire a shared lock. */
-static __inline int
-__sx_slock(struct sx *sx, int opts, const char *file, int line)
-{
-	uintptr_t x = sx->sx_lock;
-	int error = 0;
-
-	if (!(x & SX_LOCK_SHARED) ||
-	    !atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
-		error = _sx_slock_hard(sx, opts, file, line);
-	else
-		LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
-		    0, 0, file, line, LOCKSTAT_READER);
-
-	return (error);
-}
-
-/*
- * Release a shared lock.  We can just drop a single shared lock so
- * long as we aren't trying to drop the last shared lock when other
- * threads are waiting for an exclusive lock.  This takes advantage of
- * the fact that an unlocked lock is encoded as a shared lock with a
- * count of 0.
- */
-static __inline void
-__sx_sunlock(struct sx *sx, const char *file, int line)
-{
-	uintptr_t x = sx->sx_lock;
-
-	LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
-	if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||
-	    !atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER))
-		_sx_sunlock_hard(sx, file, line);
-}
-
 /*
  * Public interface for lock operations.
  */
@@ -227,12 +190,6 @@ __sx_sunlock(struct sx *sx, const char *
 	_sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line))
 #define	sx_xunlock_(sx, file, line)					\
 	_sx_xunlock((sx), (file), (line))
-#define	sx_slock_(sx, file, line)					\
-	(void)_sx_slock((sx), 0, (file), (line))
-#define	sx_slock_sig_(sx, file, line)					\
-	_sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
-#define	sx_sunlock_(sx, file, line)					\
-	_sx_sunlock((sx), (file), (line))
 #else
 #define	sx_xlock_(sx, file, line)					\
 	(void)__sx_xlock((sx), curthread, 0, (file), (line))
@@ -240,13 +197,13 @@ __sx_sunlock(struct sx *sx, const char *
 	__sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line))
 #define	sx_xunlock_(sx, file, line)					\
 	__sx_xunlock((sx), curthread, (file), (line))
+#endif	/* LOCK_DEBUG > 0 || SX_NOINLINE */
 #define	sx_slock_(sx, file, line)					\
-	(void)__sx_slock((sx), 0, (file), (line))
+	(void)_sx_slock((sx), 0, (file), (line))
 #define	sx_slock_sig_(sx, file, line)					\
-	__sx_slock((sx), SX_INTERRUPTIBLE, (file), (line))
+	_sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
 #define	sx_sunlock_(sx, file, line)					\
-	__sx_sunlock((sx), (file), (line))
-#endif	/* LOCK_DEBUG > 0 || SX_NOINLINE */
+	_sx_sunlock((sx), (file), (line))
 #define	sx_try_slock(sx)	sx_try_slock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_try_xlock(sx)	sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE)
 #define	sx_try_upgrade(sx)	sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE)



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703160635.v2G6Zrca082601>