Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 20 Jun 2019 01:15:34 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r349220 - in head: share/man/man9 sys/kern sys/sys
Message-ID:  <201906200115.x5K1FYgU001933@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Thu Jun 20 01:15:33 2019
New Revision: 349220
URL: https://svnweb.freebsd.org/changeset/base/349220

Log:
  Add wakeup_any(), cheaper wakeup_one() for taskqueue(9).
  
  wakeup_one() and underlying sleepq_signal() spend additional time trying
  to be fair, waking thread with highest priority, sleeping longest time.
  But in case of taskqueue there are many absolutely identical threads, and
  any fairness between them is quite pointless.  It makes even worse, since
  round-robin wakeups not only make previous CPU affinity in scheduler quite
  useless, but also hide from user chance to see CPU bottlenecks, when
  sequential workload with one request at a time looks evenly distributed
  between multiple threads.
  
  This change adds new SLEEPQ_UNFAIR flag to sleepq_signal(), making it wakeup
  thread that went to sleep last, but no longer in context switch (to avoid
  immediate spinning on the thread lock).  On top of that new wakeup_any()
  function is added, equivalent to wakeup_one(), but setting the flag.
  On top of that taskqueue(9) is switchied to wakeup_any() to wakeup its
  threads.
  
  As result, on 72-core Xeon v4 machine sequential ZFS write to 12 ZVOLs
  with 16KB block size spend 34% less time in wakeup_any() and descendants
  then it was spending in wakeup_one(), and total write throughput increased
  by ~10% with the same as before CPU usage.
  
  Reviewed by:	markj, mmacy
  MFC after:	2 weeks
  Sponsored by:	iXsystems, Inc.
  Differential Revision:	https://reviews.freebsd.org/D20669

Modified:
  head/share/man/man9/Makefile
  head/share/man/man9/sleep.9
  head/share/man/man9/sleepqueue.9
  head/sys/kern/kern_synch.c
  head/sys/kern/subr_sleepqueue.c
  head/sys/kern/subr_taskqueue.c
  head/sys/sys/queue.h
  head/sys/sys/sleepqueue.h
  head/sys/sys/systm.h

Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/share/man/man9/Makefile	Thu Jun 20 01:15:33 2019	(r349220)
@@ -1880,7 +1880,8 @@ MLINKS+=sleep.9 msleep.9 \
 	sleep.9 tsleep.9 \
 	sleep.9 tsleep_sbt.9 \
 	sleep.9 wakeup.9 \
-	sleep.9 wakeup_one.9
+	sleep.9 wakeup_one.9 \
+	sleep.9 wakeup_any.9
 MLINKS+=sleepqueue.9 init_sleepqueues.9 \
 	sleepqueue.9 sleepq_abort.9 \
 	sleepqueue.9 sleepq_add.9 \

Modified: head/share/man/man9/sleep.9
==============================================================================
--- head/share/man/man9/sleep.9	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/share/man/man9/sleep.9	Thu Jun 20 01:15:33 2019	(r349220)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd March 4, 2018
+.Dd June 19, 2019
 .Dt SLEEP 9
 .Os
 .Sh NAME
@@ -38,7 +38,9 @@
 .Nm pause_sbt ,
 .Nm tsleep ,
 .Nm tsleep_sbt ,
-.Nm wakeup
+.Nm wakeup ,
+.Nm wakeup_one ,
+.Nm wakeup_any
 .Nd wait for events
 .Sh SYNOPSIS
 .In sys/param.h
@@ -70,6 +72,8 @@
 .Fn wakeup "void *chan"
 .Ft void
 .Fn wakeup_one "void *chan"
+.Ft void
+.Fn wakeup_any "void *chan"
 .Sh DESCRIPTION
 The functions
 .Fn tsleep ,
@@ -79,8 +83,9 @@ The functions
 .Fn pause_sig ,
 .Fn pause_sbt ,
 .Fn wakeup ,
+.Fn wakeup_one ,
 and
-.Fn wakeup_one
+.Fn wakeup_any
 handle event-based thread blocking.
 If a thread must wait for an
 external event, it is put to sleep by
@@ -252,9 +257,10 @@ function is a wrapper around
 .Fn tsleep
 that suspends execution of the current thread for the indicated timeout.
 The thread can not be awakened early by signals or calls to
-.Fn wakeup
+.Fn wakeup ,
+.Fn wakeup_one
 or
-.Fn wakeup_one .
+.Fn wakeup_any .
 The
 .Fn pause_sig
 function is a variant of
@@ -263,8 +269,8 @@ which can be awakened early by signals.
 .Pp
 The
 .Fn wakeup_one
-function makes the first thread in the queue that is sleeping on the
-parameter
+function makes the first highest priority thread in the queue that is
+sleeping on the parameter
 .Fa chan
 runnable.
 This reduces the load when a large number of threads are sleeping on
@@ -292,6 +298,16 @@ to
 pay particular attention to ensure that no other threads wait on the
 same
 .Fa chan .
+.Pp
+The
+.Fn wakeup_any
+function is similar to
+.Fn wakeup_one ,
+except that it makes runnable last thread on the queue (sleeping less),
+ignoring fairness.
+It can be used when threads sleeping on the
+.Fa chan
+are known to be identical and there is no reason to be fair.
 .Pp
 If the timeout given by
 .Fa timo

Modified: head/share/man/man9/sleepqueue.9
==============================================================================
--- head/share/man/man9/sleepqueue.9	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/share/man/man9/sleepqueue.9	Thu Jun 20 01:15:33 2019	(r349220)
@@ -22,7 +22,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 22, 2014
+.Dd June 19, 2019
 .Dt SLEEPQUEUE 9
 .Os
 .Sh NAME
@@ -290,7 +290,8 @@ and
 functions.
 The
 .Fn sleepq_signal
-function awakens the highest priority thread sleeping on a wait channel while
+function awakens the highest priority thread sleeping on a wait channel
+(if SLEEPQ_UNFAIR flag is set, thread that went to sleep recently) while
 .Fn sleepq_broadcast
 awakens all of the threads sleeping on a wait channel.
 The

Modified: head/sys/kern/kern_synch.c
==============================================================================
--- head/sys/kern/kern_synch.c	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/sys/kern/kern_synch.c	Thu Jun 20 01:15:33 2019	(r349220)
@@ -368,6 +368,19 @@ wakeup_one(void *ident)
 		kick_proc0();
 }
 
+void
+wakeup_any(void *ident)
+{
+	int wakeup_swapper;
+
+	sleepq_lock(ident);
+	wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR,
+	    0, 0);
+	sleepq_release(ident);
+	if (wakeup_swapper)
+		kick_proc0();
+}
+
 static void
 kdb_switch(void)
 {

Modified: head/sys/kern/subr_sleepqueue.c
==============================================================================
--- head/sys/kern/subr_sleepqueue.c	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/sys/kern/subr_sleepqueue.c	Thu Jun 20 01:15:33 2019	(r349220)
@@ -123,7 +123,7 @@ CTASSERT(powerof2(SC_TABLESIZE));
  *  c - sleep queue chain lock
  */
 struct sleepqueue {
-	TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];	/* (c) Blocked threads. */
+	struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
 	u_int sq_blockedcnt[NR_SLEEPQS];	/* (c) N. of blocked threads. */
 	LIST_ENTRY(sleepqueue) sq_hash;		/* (c) Chain and free list. */
 	LIST_HEAD(, sleepqueue) sq_free;	/* (c) Free queues. */
@@ -889,12 +889,14 @@ sleepq_init(void *mem, int size, int flags)
 }
 
 /*
- * Find the highest priority thread sleeping on a wait channel and resume it.
+ * Find thread sleeping on a wait channel and resume it.
  */
 int
 sleepq_signal(void *wchan, int flags, int pri, int queue)
 {
+	struct sleepqueue_chain *sc;
 	struct sleepqueue *sq;
+	struct threadqueue *head;
 	struct thread *td, *besttd;
 	int wakeup_swapper;
 
@@ -907,16 +909,33 @@ sleepq_signal(void *wchan, int flags, int pri, int que
 	KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 	    ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 
-	/*
-	 * Find the highest priority thread on the queue.  If there is a
-	 * tie, use the thread that first appears in the queue as it has
-	 * been sleeping the longest since threads are always added to
-	 * the tail of sleep queues.
-	 */
-	besttd = TAILQ_FIRST(&sq->sq_blocked[queue]);
-	TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
-		if (td->td_priority < besttd->td_priority)
+	head = &sq->sq_blocked[queue];
+	if (flags & SLEEPQ_UNFAIR) {
+		/*
+		 * Find the most recently sleeping thread, but try to
+		 * skip threads still in process of context switch to
+		 * avoid spinning on the thread lock.
+		 */
+		sc = SC_LOOKUP(wchan);
+		besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
+		while (besttd->td_lock != &sc->sc_lock) {
+			td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
+			if (td == NULL)
+				break;
 			besttd = td;
+		}
+	} else {
+		/*
+		 * Find the highest priority thread on the queue.  If there
+		 * is a tie, use the thread that first appears in the queue
+		 * as it has been sleeping the longest since threads are
+		 * always added to the tail of sleep queues.
+		 */
+		besttd = td = TAILQ_FIRST(head);
+		while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
+			if (td->td_priority < besttd->td_priority)
+				besttd = td;
+		}
 	}
 	MPASS(besttd != NULL);
 	thread_lock(besttd);

Modified: head/sys/kern/subr_taskqueue.c
==============================================================================
--- head/sys/kern/subr_taskqueue.c	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/sys/kern/subr_taskqueue.c	Thu Jun 20 01:15:33 2019	(r349220)
@@ -804,7 +804,7 @@ taskqueue_thread_enqueue(void *context)
 
 	tqp = context;
 	tq = *tqp;
-	wakeup_one(tq);
+	wakeup_any(tq);
 }
 
 TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,

Modified: head/sys/sys/queue.h
==============================================================================
--- head/sys/sys/queue.h	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/sys/sys/queue.h	Thu Jun 20 01:15:33 2019	(r349220)
@@ -829,6 +829,10 @@ struct {								\
 #define	TAILQ_PREV(elm, headname, field)				\
 	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
 
+#define	TAILQ_PREV_FAST(elm, head, type, field)				\
+    ((elm)->field.tqe_prev == &(head)->tqh_first ? NULL :		\
+     __containerof((elm)->field.tqe_prev, QUEUE_TYPEOF(type), field.tqe_next))
+
 #define	TAILQ_REMOVE(head, elm, field) do {				\
 	QMD_SAVELINK(oldnext, (elm)->field.tqe_next);			\
 	QMD_SAVELINK(oldprev, (elm)->field.tqe_prev);			\

Modified: head/sys/sys/sleepqueue.h
==============================================================================
--- head/sys/sys/sleepqueue.h	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/sys/sys/sleepqueue.h	Thu Jun 20 01:15:33 2019	(r349220)
@@ -84,6 +84,7 @@ struct thread;
 #define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
 #define	SLEEPQ_LK		0x04		/* Used by a lockmgr. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
+#define	SLEEPQ_UNFAIR		0x200		/* Unfair wakeup order. */
 
 void	init_sleepqueues(void);
 int	sleepq_abort(struct thread *td, int intrval);

Modified: head/sys/sys/systm.h
==============================================================================
--- head/sys/sys/systm.h	Thu Jun 20 00:23:51 2019	(r349219)
+++ head/sys/sys/systm.h	Thu Jun 20 01:15:33 2019	(r349220)
@@ -489,6 +489,7 @@ int	pause_sbt(const char *wmesg, sbintime_t sbt, sbint
 	_sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
 void	wakeup(void * chan);
 void	wakeup_one(void * chan);
+void	wakeup_any(void * chan);
 
 /*
  * Common `struct cdev *' stuff are declared here to avoid #include poisoning



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201906200115.x5K1FYgU001933>