Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 May 2018 07:16:39 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r334023 - head/sys/kern
Message-ID:  <201805220716.w4M7GdKR016843@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Tue May 22 07:16:39 2018
New Revision: 334023
URL: https://svnweb.freebsd.org/changeset/base/334023

Log:
  rw: decrease writer starvation
  
  Writers waiting on readers to finish can set the RW_LOCK_WRITE_SPINNER
  bit. This prevents most new readers from coming on. However, the last
  reader to unlock also clears the bit which means new readers can sneak
  in and the cycle starts over.
  
  Change the code to keep the bit after last unlock.
  
  Note that starvation potential is still there: no matter how many write
  spinners are there, there is one bit. After the writer unlocks, the lock
  is free to get raided by readers again. It is good enough for the time
  being.
  
  The real fix would include counting writers.
  
  This runs into a caveat: the writer which set the bit may now be preempted.
  In order to get rid of the problem all attempts to set the bit are preceeded
  with critical_enter.
  
  The bit gets cleared when the thread which set it goes to sleep. This way
  an invariant holds that if the bit is set, someone is actively spinning and
  will grab the lock soon. In particular this means that readers which find
  the lock in this transient state can safely spin until the lock finds itself
  an owner (i.e. they don't need to block nor speculate how long to spin
  speculatively).
  
  Tested by:	pho

Modified:
  head/sys/kern/kern_rwlock.c

Modified: head/sys/kern/kern_rwlock.c
==============================================================================
--- head/sys/kern/kern_rwlock.c	Tue May 22 06:22:58 2018	(r334022)
+++ head/sys/kern/kern_rwlock.c	Tue May 22 07:16:39 2018	(r334023)
@@ -510,25 +510,38 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, 
 				    sched_tdname(curthread), "running");
 				continue;
 			}
-		} else if (spintries < rowner_retries) {
-			spintries++;
-			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
-			    "spinning", "lockname:\"%s\"",
-			    rw->lock_object.lo_name);
-			for (i = 0; i < rowner_loops; i += n) {
-				n = RW_READERS(v);
-				lock_delay_spin(n);
+		} else {
+			if ((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) {
+				MPASS(!__rw_can_read(td, v, false));
+				lock_delay_spin(2);
 				v = RW_READ_VALUE(rw);
-				if ((v & RW_LOCK_READ) == 0 || __rw_can_read(td, v, false))
-					break;
+				continue;
 			}
+			if (spintries < rowner_retries) {
+				spintries++;
+				KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
+				    "spinning", "lockname:\"%s\"",
+				    rw->lock_object.lo_name);
+				n = RW_READERS(v);
+				for (i = 0; i < rowner_loops; i += n) {
+					lock_delay_spin(n);
+					v = RW_READ_VALUE(rw);
+					if (!(v & RW_LOCK_READ))
+						break;
+					n = RW_READERS(v);
+					if (n == 0)
+						break;
+					if (__rw_can_read(td, v, false))
+						break;
+				}
 #ifdef KDTRACE_HOOKS
-			lda.spin_cnt += rowner_loops - i;
+				lda.spin_cnt += rowner_loops - i;
 #endif
-			KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
-			    "running");
-			if (i < rowner_loops)
-				continue;
+				KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread),
+				    "running");
+				if (i < rowner_loops)
+					continue;
+			}
 		}
 #endif
 
@@ -546,7 +559,8 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, 
 		 */
 		v = RW_READ_VALUE(rw);
 retry_ts:
-		if (__rw_can_read(td, v, false)) {
+		if (((v & RW_LOCK_WRITE_SPINNER) && RW_READERS(v) == 0) ||
+		    __rw_can_read(td, v, false)) {
 			turnstile_cancel(ts);
 			continue;
 		}
@@ -726,11 +740,7 @@ __rw_runlock_try(struct rwlock *rw, struct thread *td,
 {
 
 	for (;;) {
-		/*
-		 * See if there is more than one read lock held.  If so,
-		 * just drop one and return.
-		 */
-		if (RW_READERS(*vp) > 1) {
+		if (RW_READERS(*vp) > 1 || !(*vp & RW_LOCK_WAITERS)) {
 			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
 			    *vp - RW_ONE_READER)) {
 				if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -743,23 +753,6 @@ __rw_runlock_try(struct rwlock *rw, struct thread *td,
 			}
 			continue;
 		}
-		/*
-		 * If there aren't any waiters for a write lock, then try
-		 * to drop it quickly.
-		 */
-		if (!(*vp & RW_LOCK_WAITERS)) {
-			MPASS((*vp & ~RW_LOCK_WRITE_SPINNER) ==
-			    RW_READERS_LOCK(1));
-			if (atomic_fcmpset_rel_ptr(&rw->rw_lock, vp,
-			    RW_UNLOCKED)) {
-				if (LOCK_LOG_TEST(&rw->lock_object, 0))
-					CTR2(KTR_LOCK, "%s: %p last succeeded",
-					    __func__, rw);
-				td->td_rw_rlocks--;
-				return (true);
-			}
-			continue;
-		}
 		break;
 	}
 	return (false);
@@ -788,7 +781,6 @@ __rw_runlock_hard(struct rwlock *rw, struct thread *td
 		if (__rw_runlock_try(rw, td, &v))
 			break;
 
-		v &= (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
 		MPASS(v & RW_LOCK_WAITERS);
 
 		/*
@@ -813,7 +805,7 @@ __rw_runlock_hard(struct rwlock *rw, struct thread *td
 			queue = TS_EXCLUSIVE_QUEUE;
 			setv |= (v & RW_LOCK_READ_WAITERS);
 		}
-		v |= RW_READERS_LOCK(1);
+		setv |= (v & RW_LOCK_WRITE_SPINNER);
 		if (!atomic_fcmpset_rel_ptr(&rw->rw_lock, &v, setv))
 			continue;
 		if (LOCK_LOG_TEST(&rw->lock_object, 0))
@@ -872,6 +864,23 @@ _rw_runlock_cookie(volatile uintptr_t *c, const char *
 	_rw_runlock_cookie_int(rw LOCK_FILE_LINE_ARG);
 }
 
+#ifdef ADAPTIVE_RWLOCKS
+static inline void
+rw_drop_critical(uintptr_t v, bool *in_critical, int *extra_work)
+{
+
+	if (v & RW_LOCK_WRITE_SPINNER)
+		return;
+	if (*in_critical) {
+		critical_exit();
+		*in_critical = false;
+		(*extra_work)--;
+	}
+}
+#else
+#define rw_drop_critical(v, in_critical, extra_work) do { } while (0)
+#endif
+
 /*
  * This function is called when we are unable to obtain a write lock on the
  * first try.  This means that at least one other thread holds either a
@@ -888,8 +897,9 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC
 	int spintries = 0;
 	int i, n;
 	enum { READERS, WRITER } sleep_reason = READERS;
+	bool in_critical = false;
 #endif
-	uintptr_t x;
+	uintptr_t setv;
 #ifdef LOCK_PROFILING
 	uint64_t waittime = 0;
 	int contested = 0;
@@ -906,6 +916,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC
 	uintptr_t state;
 	int doing_lockprof = 0;
 #endif
+	int extra_work = 0;
 
 	tid = (uintptr_t)curthread;
 	rw = rwlock2rw(c);
@@ -916,12 +927,14 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC
 			if (_rw_write_lock_fetch(rw, &v, tid))
 				goto out_lockstat;
 		}
+		extra_work = 1;
 		doing_lockprof = 1;
 		all_time -= lockstat_nsecs(&rw->lock_object);
 		state = v;
 	}
 #endif
 #ifdef LOCK_PROFILING
+	extra_work = 1;
 	doing_lockprof = 1;
 	state = v;
 #endif
@@ -969,12 +982,19 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC
 #endif
 
 #ifdef ADAPTIVE_RWLOCKS
+		if (v == (RW_LOCK_READ | RW_LOCK_WRITE_SPINNER)) {
+			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid))
+				break;
+			continue;
+		}
+
 		/*
 		 * If the lock is write locked and the owner is
 		 * running on another CPU, spin until the owner stops
 		 * running or the state of the lock changes.
 		 */
 		if (!(v & RW_LOCK_READ)) {
+			rw_drop_critical(v, &in_critical, &extra_work);
 			sleep_reason = WRITER;
 			owner = lv_rw_wowner(v);
 			if (!TD_IS_RUNNING(owner))
@@ -998,8 +1018,16 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC
 			if (spintries == rowner_retries)
 				goto ts;
 			if (!(v & RW_LOCK_WRITE_SPINNER)) {
+				if (!in_critical) {
+					critical_enter();
+					in_critical = true;
+					extra_work++;
+				}
 				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
 				    v | RW_LOCK_WRITE_SPINNER)) {
+					critical_exit();
+					in_critical = false;
+					extra_work--;
 					continue;
 				}
 			}
@@ -1007,12 +1035,17 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC
 			KTR_STATE1(KTR_SCHED, "thread", sched_tdname(curthread),
 			    "spinning", "lockname:\"%s\"",
 			    rw->lock_object.lo_name);
+			n = RW_READERS(v);
 			for (i = 0; i < rowner_loops; i += n) {
-				n = RW_READERS(v);
 				lock_delay_spin(n);
 				v = RW_READ_VALUE(rw);
-				if ((v & RW_LOCK_WRITE_SPINNER) == 0)
+				if (!(v & RW_LOCK_WRITE_SPINNER))
 					break;
+				if (!(v & RW_LOCK_READ))
+					break;
+				n = RW_READERS(v);
+				if (n == 0)
+					break;
 			}
 #ifdef KDTRACE_HOOKS
 			lda.spin_cnt += i;
@@ -1040,10 +1073,12 @@ retry_ts:
 		if (owner != NULL) {
 			if (TD_IS_RUNNING(owner)) {
 				turnstile_cancel(ts);
+				rw_drop_critical(v, &in_critical, &extra_work);
 				continue;
 			}
 		} else if (RW_READERS(v) > 0 && sleep_reason == WRITER) {
 			turnstile_cancel(ts);
+			rw_drop_critical(v, &in_critical, &extra_work);
 			continue;
 		}
 #endif
@@ -1054,11 +1089,11 @@ retry_ts:
 		 * If a pending waiters queue is present, claim the lock
 		 * ownership and maintain the pending queue.
 		 */
-		x = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
-		if ((v & ~x) == RW_UNLOCKED) {
-			x &= ~RW_LOCK_WRITE_SPINNER;
-			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | x)) {
-				if (x)
+		setv = v & (RW_LOCK_WAITERS | RW_LOCK_WRITE_SPINNER);
+		if ((v & ~setv) == RW_UNLOCKED) {
+			setv &= ~RW_LOCK_WRITE_SPINNER;
+			if (atomic_fcmpset_acq_ptr(&rw->rw_lock, &v, tid | setv)) {
+				if (setv)
 					turnstile_claim(ts);
 				else
 					turnstile_cancel(ts);
@@ -1066,19 +1101,37 @@ retry_ts:
 			}
 			goto retry_ts;
 		}
-		/*
-		 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
-		 * set it.  If we fail to set it, then loop back and try
-		 * again.
-		 */
-		if (!(v & RW_LOCK_WRITE_WAITERS)) {
-			if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
-			    v | RW_LOCK_WRITE_WAITERS))
-				goto retry_ts;
-			if (LOCK_LOG_TEST(&rw->lock_object, 0))
-				CTR2(KTR_LOCK, "%s: %p set write waiters flag",
-				    __func__, rw);
+
+#ifdef ADAPTIVE_RWLOCKS
+		if (in_critical) {
+			if ((v & RW_LOCK_WRITE_SPINNER) ||
+			    !((v & RW_LOCK_WRITE_WAITERS))) {
+				setv = v & ~RW_LOCK_WRITE_SPINNER;
+				setv |= RW_LOCK_WRITE_WAITERS;
+				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v, setv))
+					goto retry_ts;
+			}
+			critical_exit();
+			in_critical = false;
+			extra_work--;
+		} else {
+#endif
+			/*
+			 * If the RW_LOCK_WRITE_WAITERS flag isn't set, then try to
+			 * set it.  If we fail to set it, then loop back and try
+			 * again.
+			 */
+			if (!(v & RW_LOCK_WRITE_WAITERS)) {
+				if (!atomic_fcmpset_ptr(&rw->rw_lock, &v,
+				    v | RW_LOCK_WRITE_WAITERS))
+					goto retry_ts;
+				if (LOCK_LOG_TEST(&rw->lock_object, 0))
+					CTR2(KTR_LOCK, "%s: %p set write waiters flag",
+					    __func__, rw);
+			}
+#ifdef ADAPTIVE_RWLOCKS
 		}
+#endif
 		/*
 		 * We were unable to acquire the lock and the write waiters
 		 * flag is set, so we must block on the turnstile.
@@ -1103,6 +1156,12 @@ retry_ts:
 #endif
 		v = RW_READ_VALUE(rw);
 	}
+	if (__predict_true(!extra_work))
+		return;
+#ifdef ADAPTIVE_RWLOCKS
+	if (in_critical)
+		critical_exit();
+#endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
 	if (__predict_true(!doing_lockprof))
 		return;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805220716.w4M7GdKR016843>