Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 16 Mar 2017 06:53:55 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject:   svn commit: r315380 - stable/11/sys/kern
Message-ID:  <201703160653.v2G6rtYn090510@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Mar 16 06:53:55 2017
New Revision: 315380
URL: https://svnweb.freebsd.org/changeset/base/315380

Log:
  MFC r313454,r313472:
  
  rwlock: implemenet rlock/runlock fast path
  
  This improves singlethreaded throughput on my test machine from ~247 mln
  ops/s to ~328 mln.
  
  It is mostly about avoiding the setup cost of lockstat.
  
  ==
  
  rwlock: fix r313454
  
  The runlock slow path would update wrong variable before restarting the
  loop, in effect corrupting the state.

Modified:
  stable/11/sys/kern/kern_rwlock.c
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/sys/kern/kern_rwlock.c
==============================================================================
--- stable/11/sys/kern/kern_rwlock.c	Thu Mar 16 06:51:00 2017	(r315379)
+++ stable/11/sys/kern/kern_rwlock.c	Thu Mar 16 06:53:55 2017	(r315380)
@@ -359,13 +359,44 @@ _rw_wunlock_cookie(volatile uintptr_t *c
  * is unlocked and has no writer waiters or spinners.  Failing otherwise
  * prioritizes writers before readers.
  */
-#define	RW_CAN_READ(_rw)						\
-    ((curthread->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
+#define	RW_CAN_READ(td, _rw)						\
+    (((td)->td_rw_rlocks && (_rw) & RW_LOCK_READ) || ((_rw) &	\
     (RW_LOCK_READ | RW_LOCK_WRITE_WAITERS | RW_LOCK_WRITE_SPINNER)) ==	\
     RW_LOCK_READ)
 
-void
-__rw_rlock(volatile uintptr_t *c, const char *file, int line)
+static bool __always_inline
+__rw_rlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp,
+    const char *file, int line)
+{
+
+	/*
+	 * Handle the easy case.  If no other thread has a write
+	 * lock, then try to bump up the count of read locks.  Note
+	 * that we have to preserve the current state of the
+	 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
+	 * read lock, then rw_lock must have changed, so restart
+	 * the loop.  Note that this handles the case of a
+	 * completely unlocked rwlock since such a lock is encoded
+	 * as a read lock with no waiters.
+	 */
+	while (RW_CAN_READ(td, *vp)) {
+		if (atomic_fcmpset_acq_ptr(&rw->rw_lock, vp,
+			*vp + RW_ONE_READER)) {
+			if (LOCK_LOG_TEST(&rw->lock_object, 0))
+				CTR4(KTR_LOCK,
+				    "%s: %p succeed %p -> %p", __func__,
+				    rw, (void *)*vp,
+				    (void *)(*vp + RW_ONE_READER));
+			td->td_rw_rlocks++;
+			return (true);
+		}
+	}
+	return (false);
+}
+
+static void __noinline
+__rw_rlock_hard(volatile uintptr_t *c, struct thread *td, uintptr_t v,
+    const char *file, int line)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
@@ -378,7 +409,6 @@ __rw_rlock(volatile uintptr_t *c, const 
 	uint64_t waittime = 0;
 	int contested = 0;
 #endif
-	uintptr_t v;
 #if defined(ADAPTIVE_RWLOCKS) || defined(KDTRACE_HOOKS)
 	struct lock_delay_arg lda;
 #endif
@@ -399,51 +429,15 @@ __rw_rlock(volatile uintptr_t *c, const 
 #endif
 	rw = rwlock2rw(c);
 
-	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
-	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
-	    curthread, rw->lock_object.lo_name, file, line));
-	KASSERT(rw->rw_lock != RW_DESTROYED,
-	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
-	KASSERT(rw_wowner(rw) != curthread,
-	    ("rw_rlock: wlock already held for %s @ %s:%d",
-	    rw->lock_object.lo_name, file, line));
-	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
-
 #ifdef KDTRACE_HOOKS
 	all_time -= lockstat_nsecs(&rw->lock_object);
 #endif
-	v = RW_READ_VALUE(rw);
 #ifdef KDTRACE_HOOKS
 	state = v;
 #endif
 	for (;;) {
-		/*
-		 * Handle the easy case.  If no other thread has a write
-		 * lock, then try to bump up the count of read locks.  Note
-		 * that we have to preserve the current state of the
-		 * RW_LOCK_WRITE_WAITERS flag.  If we fail to acquire a
-		 * read lock, then rw_lock must have changed, so restart
-		 * the loop.  Note that this handles the case of a
-		 * completely unlocked rwlock since such a lock is encoded
-		 * as a read lock with no waiters.
-		 */
-		if (RW_CAN_READ(v)) {
-			/*
-			 * The RW_LOCK_READ_WAITERS flag should only be set
-			 * if the lock has been unlocked and write waiters
-			 * were present.
-			 */
-			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v,
-			    v + RW_ONE_READER)) {
-				if (LOCK_LOG_TEST(&rw->lock_object, 0))
-					CTR4(KTR_LOCK,
-					    "%s: %p succeed %p -> %p", __func__,
-					    rw, (void *)v,
-					    (void *)(v + RW_ONE_READER));
-				break;
-			}
-			continue;
-		}
+		if (__rw_rlock_try(rw, td, &v, file, line))
+			break;
 #ifdef KDTRACE_HOOKS
 		lda.spin_cnt++;
 #endif
@@ -485,7 +479,7 @@ __rw_rlock(volatile uintptr_t *c, const 
 			    rw->lock_object.lo_name);
 			for (i = 0; i < rowner_loops; i++) {
 				v = RW_READ_VALUE(rw);
-				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(v))
+				if ((v & RW_LOCK_READ) == 0 || RW_CAN_READ(td, v))
 					break;
 				cpu_spinwait();
 			}
@@ -513,7 +507,7 @@ __rw_rlock(volatile uintptr_t *c, const 
 		 * recheck its state and restart the loop if needed.
 		 */
 		v = RW_READ_VALUE(rw);
-		if (RW_CAN_READ(v)) {
+		if (RW_CAN_READ(td, v)) {
 			turnstile_cancel(ts);
 			continue;
 		}
@@ -538,7 +532,7 @@ __rw_rlock(volatile uintptr_t *c, const 
 		/*
 		 * The lock is held in write mode or it already has waiters.
 		 */
-		MPASS(!RW_CAN_READ(v));
+		MPASS(!RW_CAN_READ(td, v));
 
 		/*
 		 * If the RW_LOCK_READ_WAITERS flag is already set, then
@@ -598,10 +592,36 @@ __rw_rlock(volatile uintptr_t *c, const 
 	 */
 	LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
 	    waittime, file, line, LOCKSTAT_READER);
+}
+
+void
+__rw_rlock(volatile uintptr_t *c, const char *file, int line)
+{
+	struct rwlock *rw;
+	struct thread *td;
+	uintptr_t v;
+
+	td = curthread;
+	rw = rwlock2rw(c);
+
+	KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(td),
+	    ("rw_rlock() by idle thread %p on rwlock %s @ %s:%d",
+	    td, rw->lock_object.lo_name, file, line));
+	KASSERT(rw->rw_lock != RW_DESTROYED,
+	    ("rw_rlock() of destroyed rwlock @ %s:%d", file, line));
+	KASSERT(rw_wowner(rw) != td,
+	    ("rw_rlock: wlock already held for %s @ %s:%d",
+	    rw->lock_object.lo_name, file, line));
+	WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL);
+
+	v = RW_READ_VALUE(rw);
+	if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(rw__acquire) ||
+	    !__rw_rlock_try(rw, td, &v, file, line)))
+		__rw_rlock_hard(c, td, v, file, line);
+
 	LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
 	WITNESS_LOCK(&rw->lock_object, 0, file, line);
 	TD_LOCKS_INC(curthread);
-	curthread->td_rw_rlocks++;
 }
 
 int
@@ -641,40 +661,25 @@ __rw_try_rlock(volatile uintptr_t *c, co
 	return (0);
 }
 
-void
-_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
+static bool __always_inline
+__rw_runlock_try(struct rwlock *rw, struct thread *td, uintptr_t *vp)
 {
-	struct rwlock *rw;
-	struct turnstile *ts;
-	uintptr_t x, v, queue;
-
-	if (SCHEDULER_STOPPED())
-		return;
-
-	rw = rwlock2rw(c);
-
-	KASSERT(rw->rw_lock != RW_DESTROYED,
-	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
-	__rw_assert(c, RA_RLOCKED, file, line);
-	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
-	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
 
-	/* TODO: drop "owner of record" here. */
-	x = RW_READ_VALUE(rw);
 	for (;;) {
 		/*
 		 * See if there is more than one read lock held.  If so,
 		 * just drop one and return.
 		 */
-		if (RW_READERS(x) > 1) {
-			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, &x,
-			    x - RW_ONE_READER)) {
+		if (RW_READERS(*vp) > 1) {
+			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
+			    *vp - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR4(KTR_LOCK,
 					    "%s: %p succeeded %p -> %p",
-					    __func__, rw, (void *)x,
-					    (void *)(x - RW_ONE_READER));
-				break;
+					    __func__, rw, (void *)*vp,
+					    (void *)(*vp - RW_ONE_READER));
+				td->td_rw_rlocks--;
+				return (true);
 			}
 			continue;
 		}
@@ -682,18 +687,41 @@ _rw_runlock_cookie(volatile uintptr_t *c
 		 * If there aren't any waiters for a write lock, then try
 		 * to drop it quickly.
 		 */
-		if (!(x & RW_LOCK_WAITERS)) {
-			MPASS((x & ~RW_LOCK_WRITE_SPINNER) ==
+		if (!(*vp & RW_LOCK_WAITERS)) {
+			MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) ==
 			    RW_READERS_LOCK(1));
-			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, &x,
+			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
 			    RW_UNLOCKED)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
 					CTR2(KTR_LOCK, "%s: %p last succeeded",
 					    __func__, rw);
-				break;
+				td->td_rw_rlocks--;
+				return (true);
 			}
 			continue;
 		}
+		break;
+	}
+	return (false);
+}
+
+static void __noinline
+__rw_runlock_hard(volatile uintptr_t *c, struct thread *td, uintptr_t v,
+    const char *file, int line)
+{
+	struct rwlock *rw;
+	struct turnstile *ts;
+	uintptr_t x, queue;
+
+	if (SCHEDULER_STOPPED())
+		return;
+
+	rw = rwlock2rw(c);
+
+	for (;;) {
+		if (__rw_runlock_try(rw, td, &v))
+			break;
+
 		/*
 		 * Ok, we know we have waiters and we think we are the
 		 * last reader, so grab the turnstile lock.
@@ -746,13 +774,38 @@ _rw_runlock_cookie(volatile uintptr_t *c
 		turnstile_broadcast(ts, queue);
 		turnstile_unpend(ts, TS_SHARED_LOCK);
 		turnstile_chain_unlock(&rw->lock_object);
+		td->td_rw_rlocks--;
 		break;
 	}
 	LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, LOCKSTAT_READER);
+}
+
+void
+_rw_runlock_cookie(volatile uintptr_t *c, const char *file, int line)
+{
+	struct rwlock *rw;
+	struct thread *td;
+	uintptr_t v;
+
+	rw = rwlock2rw(c);
+
+	KASSERT(rw->rw_lock != RW_DESTROYED,
+	    ("rw_runlock() of destroyed rwlock @ %s:%d", file, line));
+	__rw_assert(c, RA_RLOCKED, file, line);
+	WITNESS_UNLOCK(&rw->lock_object, 0, file, line);
+	LOCK_LOG_LOCK("RUNLOCK", &rw->lock_object, 0, 0, file, line);
+
+	td = curthread;
+	v = RW_READ_VALUE(rw);
+
+	if (__predict_false(LOCKSTAT_OOL_PROFILE_ENABLED(rw__release) ||
+	    !__rw_runlock_try(rw, td, &v)))
+		__rw_runlock_hard(c, td, v, file, line);
+
 	TD_LOCKS_DEC(curthread);
-	curthread->td_rw_rlocks--;
 }
 
+
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703160653.v2G6rtYn090510>