Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 15 Dec 2019 21:11:16 +0000 (UTC)
From:      Jeff Roberson <jeff@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r355779 - in head/sys: cddl/compat/opensolaris/sys compat/linux compat/linuxkpi/common/src dev/ocs_fc kern mips/nlm sys vm
Message-ID:  <201912152111.xBFLBG95029553@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jeff
Date: Sun Dec 15 21:11:15 2019
New Revision: 355779
URL: https://svnweb.freebsd.org/changeset/base/355779

Log:
  schedlock 1/4
  
  Eliminate recursion from most thread_lock consumers.  Return from
  sched_add() without the thread_lock held.  This eliminates unnecessary
  atomics and lock word loads as well as reducing the hold time for
  scheduler locks.  This will eventually allow for lockless remote adds.
  
  Discussed with:	kib
  Reviewed by:	jhb
  Tested by:	pho
  Differential Revision:	https://reviews.freebsd.org/D22626

Modified:
  head/sys/cddl/compat/opensolaris/sys/proc.h
  head/sys/compat/linux/linux_fork.c
  head/sys/compat/linuxkpi/common/src/linux_kthread.c
  head/sys/dev/ocs_fc/ocs_os.c
  head/sys/kern/init_main.c
  head/sys/kern/kern_clock.c
  head/sys/kern/kern_fork.c
  head/sys/kern/kern_intr.c
  head/sys/kern/kern_kthread.c
  head/sys/kern/kern_mutex.c
  head/sys/kern/kern_resource.c
  head/sys/kern/kern_sig.c
  head/sys/kern/kern_synch.c
  head/sys/kern/kern_thr.c
  head/sys/kern/kern_thread.c
  head/sys/kern/sched_4bsd.c
  head/sys/kern/sched_ule.c
  head/sys/kern/subr_gtaskqueue.c
  head/sys/kern/subr_pcpu.c
  head/sys/kern/subr_sleepqueue.c
  head/sys/kern/subr_taskqueue.c
  head/sys/kern/subr_turnstile.c
  head/sys/mips/nlm/cms.c
  head/sys/sys/proc.h
  head/sys/sys/resourcevar.h
  head/sys/sys/sched.h
  head/sys/vm/vm_swapout.c

Modified: head/sys/cddl/compat/opensolaris/sys/proc.h
==============================================================================
--- head/sys/cddl/compat/opensolaris/sys/proc.h	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/cddl/compat/opensolaris/sys/proc.h	Sun Dec 15 21:11:15 2019	(r355779)
@@ -89,7 +89,6 @@ do_thread_create(caddr_t stk, size_t stksize, void (*p
 		thread_lock(td);
 		sched_prio(td, pri);
 		sched_add(td, SRQ_BORING);
-		thread_unlock(td);
 	}
 	return (td);
 }

Modified: head/sys/compat/linux/linux_fork.c
==============================================================================
--- head/sys/compat/linux/linux_fork.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/compat/linux/linux_fork.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -92,7 +92,6 @@ linux_fork(struct thread *td, struct linux_fork_args *
 	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	thread_unlock(td2);
 
 	return (0);
 }
@@ -123,7 +122,6 @@ linux_vfork(struct thread *td, struct linux_vfork_args
 	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	thread_unlock(td2);
 
 	return (0);
 }
@@ -228,7 +226,6 @@ linux_clone_proc(struct thread *td, struct linux_clone
 	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	thread_unlock(td2);
 
 	td->td_retval[0] = p2->p_pid;
 
@@ -343,7 +340,6 @@ linux_clone_thread(struct thread *td, struct linux_clo
 	thread_lock(newtd);
 	TD_SET_CAN_RUN(newtd);
 	sched_add(newtd, SRQ_BORING);
-	thread_unlock(newtd);
 
 	td->td_retval[0] = newtd->td_tid;
 

Modified: head/sys/compat/linuxkpi/common/src/linux_kthread.c
==============================================================================
--- head/sys/compat/linuxkpi/common/src/linux_kthread.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/compat/linuxkpi/common/src/linux_kthread.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -142,7 +142,6 @@ linux_kthread_setup_and_run(struct thread *td, linux_t
 	sched_prio(td, PI_SWI(SWI_NET));
 	/* put thread into run-queue */
 	sched_add(td, SRQ_BORING);
-	thread_unlock(td);
 
 	return (task);
 }

Modified: head/sys/dev/ocs_fc/ocs_os.c
==============================================================================
--- head/sys/dev/ocs_fc/ocs_os.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/dev/ocs_fc/ocs_os.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -659,6 +659,8 @@ ocs_thread_create(ocs_os_handle_t os, ocs_thread_t *th
 
 int32_t ocs_thread_start(ocs_thread_t *thread)
 {
+
+	thread_lock(thread->tcb);
 	sched_add(thread->tcb, SRQ_BORING);
 	return 0;
 }

Modified: head/sys/kern/init_main.c
==============================================================================
--- head/sys/kern/init_main.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/init_main.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -853,6 +853,5 @@ kick_init(const void *udata __unused)
 	thread_lock(td);
 	TD_SET_CAN_RUN(td);
 	sched_add(td, SRQ_BORING);
-	thread_unlock(td);
 }
 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_MIDDLE, kick_init, NULL);

Modified: head/sys/kern/kern_clock.c
==============================================================================
--- head/sys/kern/kern_clock.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_clock.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -282,8 +282,7 @@ deadlkres(void)
 				if (TD_ON_LOCK(td))
 					deadlres_td_on_lock(p, td,
 					    blkticks);
-				else if (TD_IS_SLEEPING(td) &&
-				    TD_ON_SLEEPQ(td))
+				else if (TD_IS_SLEEPING(td))
 					deadlres_td_sleep_q(p, td,
 					    slpticks);
 				thread_unlock(td);

Modified: head/sys/kern/kern_fork.c
==============================================================================
--- head/sys/kern/kern_fork.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_fork.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -758,7 +758,6 @@ do_fork(struct thread *td, struct fork_req *fr, struct
 		thread_lock(td2);
 		TD_SET_CAN_RUN(td2);
 		sched_add(td2, SRQ_BORING);
-		thread_unlock(td2);
 	} else {
 		*fr->fr_procp = p2;
 	}

Modified: head/sys/kern/kern_intr.c
==============================================================================
--- head/sys/kern/kern_intr.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_intr.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -558,8 +558,8 @@ ithread_destroy(struct intr_thread *ithread)
 	if (TD_AWAITING_INTR(td)) {
 		TD_CLR_IWAIT(td);
 		sched_add(td, SRQ_INTR);
-	}
-	thread_unlock(td);
+	} else
+		thread_unlock(td);
 }
 
 int
@@ -985,8 +985,8 @@ intr_event_schedule_thread(struct intr_event *ie)
 	} else {
 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
 		    __func__, td->td_proc->p_pid, td->td_name, it->it_need, td->td_state);
+		thread_unlock(td);
 	}
-	thread_unlock(td);
 
 	return (0);
 }

Modified: head/sys/kern/kern_kthread.c
==============================================================================
--- head/sys/kern/kern_kthread.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_kthread.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -146,7 +146,8 @@ kproc_create(void (*func)(void *), void *arg,
 	/* Delay putting it on the run queue until now. */
 	if (!(flags & RFSTOPPED))
 		sched_add(td, SRQ_BORING); 
-	thread_unlock(td);
+	else
+		thread_unlock(td);
 
 	return 0;
 }
@@ -324,7 +325,6 @@ kthread_add(void (*func)(void *), void *arg, struct pr
 	if (!(flags & RFSTOPPED)) {
 		thread_lock(newtd);
 		sched_add(newtd, SRQ_BORING); 
-		thread_unlock(newtd);
 	}
 	if (newtdp)
 		*newtdp = newtd;

Modified: head/sys/kern/kern_mutex.c
==============================================================================
--- head/sys/kern/kern_mutex.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_mutex.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -960,10 +960,9 @@ thread_lock_block(struct thread *td)
 {
 	struct mtx *lock;
 
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
+	mtx_assert(lock, MA_OWNED);
 	td->td_lock = &blocked_lock;
-	mtx_unlock_spin(lock);
 
 	return (lock);
 }
@@ -971,19 +970,33 @@ thread_lock_block(struct thread *td)
 void
 thread_lock_unblock(struct thread *td, struct mtx *new)
 {
+
 	mtx_assert(new, MA_OWNED);
-	MPASS(td->td_lock == &blocked_lock);
+	KASSERT(td->td_lock == &blocked_lock,
+	    ("thread %p lock %p not blocked_lock %p",
+	    td, td->td_lock, &blocked_lock));
 	atomic_store_rel_ptr((volatile void *)&td->td_lock, (uintptr_t)new);
 }
 
 void
+thread_lock_block_wait(struct thread *td)
+{
+
+	while (td->td_lock == &blocked_lock)
+		cpu_spinwait();
+
+	/* Acquire fence to be certain that all thread state is visible. */
+	atomic_thread_fence_acq();
+}
+
+void
 thread_lock_set(struct thread *td, struct mtx *new)
 {
 	struct mtx *lock;
 
 	mtx_assert(new, MA_OWNED);
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	lock = td->td_lock;
+	mtx_assert(lock, MA_OWNED);
 	td->td_lock = new;
 	mtx_unlock_spin(lock);
 }

Modified: head/sys/kern/kern_resource.c
==============================================================================
--- head/sys/kern/kern_resource.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_resource.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -78,7 +78,7 @@ static void	calcru1(struct proc *p, struct rusage_ext 
 		    struct timeval *up, struct timeval *sp);
 static int	donice(struct thread *td, struct proc *chgp, int n);
 static struct uidinfo *uilookup(uid_t uid);
-static void	ruxagg_locked(struct rusage_ext *rux, struct thread *td);
+static void	ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td);
 
 /*
  * Resource controls and accounting.
@@ -858,7 +858,7 @@ rufetchtd(struct thread *td, struct rusage *ru)
 		td->td_incruntime += runtime;
 		PCPU_SET(switchtime, u);
 	}
-	ruxagg(p, td);
+	ruxagg_locked(p, td);
 	*ru = td->td_ru;
 	calcru1(p, &td->td_rux, &ru->ru_utime, &ru->ru_stime);
 }
@@ -1114,11 +1114,9 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struc
  * Aggregate tick counts into the proc's rusage_ext.
  */
 static void
-ruxagg_locked(struct rusage_ext *rux, struct thread *td)
+ruxagg_ext_locked(struct rusage_ext *rux, struct thread *td)
 {
 
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
-	PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 	rux->rux_runtime += td->td_incruntime;
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
@@ -1126,16 +1124,25 @@ ruxagg_locked(struct rusage_ext *rux, struct thread *t
 }
 
 void
-ruxagg(struct proc *p, struct thread *td)
+ruxagg_locked(struct proc *p, struct thread *td)
 {
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	PROC_STATLOCK_ASSERT(td->td_proc, MA_OWNED);
 
-	thread_lock(td);
-	ruxagg_locked(&p->p_rux, td);
-	ruxagg_locked(&td->td_rux, td);
+	ruxagg_ext_locked(&p->p_rux, td);
+	ruxagg_ext_locked(&td->td_rux, td);
 	td->td_incruntime = 0;
 	td->td_uticks = 0;
 	td->td_iticks = 0;
 	td->td_sticks = 0;
+}
+
+void
+ruxagg(struct proc *p, struct thread *td)
+{
+
+	thread_lock(td);
+	ruxagg_locked(p, td);
 	thread_unlock(td);
 }
 

Modified: head/sys/kern/kern_sig.c
==============================================================================
--- head/sys/kern/kern_sig.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_sig.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -2250,6 +2250,8 @@ tdsendsignal(struct proc *p, struct thread *td, int si
 		p->p_step = 0;
 		wakeup(&p->p_step);
 	}
+	wakeup_swapper = 0;
+
 	/*
 	 * Some signals have a process-wide effect and a per-thread
 	 * component.  Most processing occurs when the process next
@@ -2352,15 +2354,13 @@ tdsendsignal(struct proc *p, struct thread *td, int si
 		 * the PROCESS runnable, leave it stopped.
 		 * It may run a bit until it hits a thread_suspend_check().
 		 */
-		wakeup_swapper = 0;
 		PROC_SLOCK(p);
 		thread_lock(td);
-		if (TD_ON_SLEEPQ(td) && (td->td_flags & TDF_SINTR))
+		if (TD_CAN_ABORT(td))
 			wakeup_swapper = sleepq_abort(td, intrval);
-		thread_unlock(td);
+		else
+			thread_unlock(td);
 		PROC_SUNLOCK(p);
-		if (wakeup_swapper)
-			kick_proc0();
 		goto out;
 		/*
 		 * Mutexes are short lived. Threads waiting on them will
@@ -2394,8 +2394,6 @@ tdsendsignal(struct proc *p, struct thread *td, int si
 				sigqueue_delete_proc(p, p->p_xsig);
 			} else
 				PROC_SUNLOCK(p);
-			if (wakeup_swapper)
-				kick_proc0();
 			goto out;
 		}
 	} else {
@@ -2416,6 +2414,9 @@ runfast:
 out:
 	/* If we jump here, proc slock should not be owned. */
 	PROC_SLOCK_ASSERT(p, MA_NOTOWNED);
+	if (wakeup_swapper)
+		kick_proc0();
+
 	return (ret);
 }
 
@@ -2428,10 +2429,8 @@ static void
 tdsigwakeup(struct thread *td, int sig, sig_t action, int intrval)
 {
 	struct proc *p = td->td_proc;
-	int prop;
-	int wakeup_swapper;
+	int prop, wakeup_swapper;
 
-	wakeup_swapper = 0;
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	prop = sigprop(sig);
 
@@ -2487,22 +2486,25 @@ tdsigwakeup(struct thread *td, int sig, sig_t action, 
 			sched_prio(td, PUSER);
 
 		wakeup_swapper = sleepq_abort(td, intrval);
-	} else {
-		/*
-		 * Other states do nothing with the signal immediately,
-		 * other than kicking ourselves if we are running.
-		 * It will either never be noticed, or noticed very soon.
-		 */
+		PROC_SUNLOCK(p);
+		if (wakeup_swapper)
+			kick_proc0();
+		return;
+	}
+
+	/*
+	 * Other states do nothing with the signal immediately,
+	 * other than kicking ourselves if we are running.
+	 * It will either never be noticed, or noticed very soon.
+	 */
 #ifdef SMP
-		if (TD_IS_RUNNING(td) && td != curthread)
-			forward_signal(td);
+	if (TD_IS_RUNNING(td) && td != curthread)
+		forward_signal(td);
 #endif
-	}
+
 out:
 	PROC_SUNLOCK(p);
 	thread_unlock(td);
-	if (wakeup_swapper)
-		kick_proc0();
 }
 
 static int
@@ -2530,12 +2532,13 @@ sig_suspend_threads(struct thread *td, struct proc *p,
 				 */
 				KASSERT(!TD_IS_SUSPENDED(td2),
 				    ("thread with deferred stops suspended"));
-				if (TD_SBDRY_INTR(td2))
+				if (TD_SBDRY_INTR(td2)) {
 					wakeup_swapper |= sleepq_abort(td2,
 					    TD_SBDRY_ERRNO(td2));
-			} else if (!TD_IS_SUSPENDED(td2)) {
+					continue;
+				}
+			} else if (!TD_IS_SUSPENDED(td2))
 				thread_suspend_one(td2);
-			}
 		} else if (!TD_IS_SUSPENDED(td2)) {
 			if (sending || td != td2)
 				td2->td_flags |= TDF_ASTPENDING;

Modified: head/sys/kern/kern_synch.c
==============================================================================
--- head/sys/kern/kern_synch.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_synch.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -538,40 +538,48 @@ mi_switch(int flags, struct thread *newtd)
  * Change thread state to be runnable, placing it on the run queue if
  * it is in memory.  If it is swapped out, return true so our caller
  * will know to awaken the swapper.
+ *
+ * Requires the thread lock on entry, drops on exit.
  */
 int
-setrunnable(struct thread *td)
+setrunnable(struct thread *td, int srqflags)
 {
+	int swapin;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
 	    ("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
+
+	swapin = 0;
 	switch (td->td_state) {
 	case TDS_RUNNING:
 	case TDS_RUNQ:
+		break;
+	case TDS_CAN_RUN:
+		KASSERT((td->td_flags & TDF_INMEM) != 0,
+		    ("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X",
+		    td, td->td_flags, td->td_inhibitors));
+		/* unlocks thread lock according to flags */
+		sched_wakeup(td, srqflags);
 		return (0);
 	case TDS_INHIBITED:
 		/*
 		 * If we are only inhibited because we are swapped out
-		 * then arange to swap in this process. Otherwise just return.
+		 * arrange to swap in this process.
 		 */
-		if (td->td_inhibitors != TDI_SWAPPED)
-			return (0);
-		/* FALLTHROUGH */
-	case TDS_CAN_RUN:
+		if (td->td_inhibitors == TDI_SWAPPED &&
+		    (td->td_flags & TDF_SWAPINREQ) == 0) {
+			td->td_flags |= TDF_SWAPINREQ;
+			swapin = 1;
+		}
 		break;
 	default:
-		printf("state is 0x%x", td->td_state);
-		panic("setrunnable(2)");
+		panic("setrunnable: state 0x%x", td->td_state);
 	}
-	if ((td->td_flags & TDF_INMEM) == 0) {
-		if ((td->td_flags & TDF_SWAPINREQ) == 0) {
-			td->td_flags |= TDF_SWAPINREQ;
-			return (1);
-		}
-	} else
-		sched_wakeup(td);
-	return (0);
+	if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0)
+		thread_unlock(td);
+
+	return (swapin);
 }
 
 /*

Modified: head/sys/kern/kern_thr.c
==============================================================================
--- head/sys/kern/kern_thr.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_thr.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -281,7 +281,6 @@ thread_create(struct thread *td, struct rtprio *rtp,
 	}
 	TD_SET_CAN_RUN(newtd);
 	sched_add(newtd, SRQ_BORING);
-	thread_unlock(newtd);
 
 	return (0);
 

Modified: head/sys/kern/kern_thread.c
==============================================================================
--- head/sys/kern/kern_thread.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/kern_thread.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -565,7 +565,6 @@ thread_exit(void)
 					thread_lock(p->p_singlethread);
 					wakeup_swapper = thread_unsuspend_one(
 						p->p_singlethread, p, false);
-					thread_unlock(p->p_singlethread);
 					if (wakeup_swapper)
 						kick_proc0();
 				}
@@ -606,7 +605,7 @@ thread_exit(void)
 
 	/* Save our resource usage in our process. */
 	td->td_ru.ru_nvcsw++;
-	ruxagg(p, td);
+	ruxagg_locked(p, td);
 	rucollect(&p->p_ru, &td->td_ru);
 	PROC_STATUNLOCK(p);
 
@@ -730,19 +729,36 @@ weed_inhib(int mode, struct thread *td2, struct proc *
 	THREAD_LOCK_ASSERT(td2, MA_OWNED);
 
 	wakeup_swapper = 0;
+
+	/*
+	 * Since the thread lock is dropped by the scheduler we have
+	 * to retry to check for races.
+	 */
+restart:
 	switch (mode) {
 	case SINGLE_EXIT:
-		if (TD_IS_SUSPENDED(td2))
+		if (TD_IS_SUSPENDED(td2)) {
 			wakeup_swapper |= thread_unsuspend_one(td2, p, true);
-		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
+			thread_lock(td2);
+			goto restart;
+		}
+		if (TD_CAN_ABORT(td2)) {
 			wakeup_swapper |= sleepq_abort(td2, EINTR);
+			return (wakeup_swapper);
+		}
 		break;
 	case SINGLE_BOUNDARY:
 	case SINGLE_NO_EXIT:
-		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & TDF_BOUNDARY) == 0)
+		if (TD_IS_SUSPENDED(td2) &&
+		    (td2->td_flags & TDF_BOUNDARY) == 0) {
 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
-		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0)
+			thread_lock(td2);
+			goto restart;
+		}
+		if (TD_CAN_ABORT(td2)) {
 			wakeup_swapper |= sleepq_abort(td2, ERESTART);
+			return (wakeup_swapper);
+		}
 		break;
 	case SINGLE_ALLPROC:
 		/*
@@ -754,18 +770,25 @@ weed_inhib(int mode, struct thread *td2, struct proc *
 		 * is used to avoid immediate un-suspend.
 		 */
 		if (TD_IS_SUSPENDED(td2) && (td2->td_flags & (TDF_BOUNDARY |
-		    TDF_ALLPROCSUSP)) == 0)
+		    TDF_ALLPROCSUSP)) == 0) {
 			wakeup_swapper |= thread_unsuspend_one(td2, p, false);
-		if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR) != 0) {
+			thread_lock(td2);
+			goto restart;
+		}
+		if (TD_CAN_ABORT(td2)) {
 			if ((td2->td_flags & TDF_SBDRY) == 0) {
 				thread_suspend_one(td2);
 				td2->td_flags |= TDF_ALLPROCSUSP;
 			} else {
 				wakeup_swapper |= sleepq_abort(td2, ERESTART);
+				return (wakeup_swapper);
 			}
 		}
 		break;
+	default:
+		break;
 	}
+	thread_unlock(td2);
 	return (wakeup_swapper);
 }
 
@@ -842,9 +865,10 @@ thread_single(struct proc *p, int mode)
 #ifdef SMP
 			} else if (TD_IS_RUNNING(td2) && td != td2) {
 				forward_signal(td2);
+				thread_unlock(td2);
 #endif
-			}
-			thread_unlock(td2);
+			} else
+				thread_unlock(td2);
 		}
 		if (wakeup_swapper)
 			kick_proc0();
@@ -1028,7 +1052,6 @@ thread_suspend_check(int return_instead)
 				thread_lock(p->p_singlethread);
 				wakeup_swapper = thread_unsuspend_one(
 				    p->p_singlethread, p, false);
-				thread_unlock(p->p_singlethread);
 				if (wakeup_swapper)
 					kick_proc0();
 			}
@@ -1112,7 +1135,7 @@ thread_unsuspend_one(struct thread *td, struct proc *p
 			p->p_boundary_count--;
 		}
 	}
-	return (setrunnable(td));
+	return (setrunnable(td, 0));
 }
 
 /*
@@ -1133,8 +1156,8 @@ thread_unsuspend(struct proc *p)
 			if (TD_IS_SUSPENDED(td)) {
 				wakeup_swapper |= thread_unsuspend_one(td, p,
 				    true);
-			}
-			thread_unlock(td);
+			} else
+				thread_unlock(td);
 		}
 	} else if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
 	    p->p_numthreads == p->p_suspcount) {
@@ -1147,7 +1170,6 @@ thread_unsuspend(struct proc *p)
 			thread_lock(p->p_singlethread);
 			wakeup_swapper = thread_unsuspend_one(
 			    p->p_singlethread, p, false);
-			thread_unlock(p->p_singlethread);
 		}
 	}
 	if (wakeup_swapper)
@@ -1193,8 +1215,8 @@ thread_single_end(struct proc *p, int mode)
 			if (TD_IS_SUSPENDED(td)) {
 				wakeup_swapper |= thread_unsuspend_one(td, p,
 				    mode == SINGLE_BOUNDARY);
-			}
-			thread_unlock(td);
+			} else
+				thread_unlock(td);
 		}
 	}
 	KASSERT(mode != SINGLE_BOUNDARY || p->p_boundary_count == 0,

Modified: head/sys/kern/sched_4bsd.c
==============================================================================
--- head/sys/kern/sched_4bsd.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/sched_4bsd.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -846,7 +846,7 @@ sched_priority(struct thread *td, u_char prio)
 	td->td_priority = prio;
 	if (TD_ON_RUNQ(td) && td->td_rqindex != (prio / RQ_PPQ)) {
 		sched_rem(td);
-		sched_add(td, SRQ_BORING);
+		sched_add(td, SRQ_BORING | SRQ_HOLDTD);
 	}
 }
 
@@ -980,25 +980,12 @@ sched_switch(struct thread *td, struct thread *newtd, 
 	struct proc *p;
 	int preempted;
 
-	tmtx = NULL;
+	tmtx = &sched_lock;
 	ts = td_get_sched(td);
 	p = td->td_proc;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 
-	/* 
-	 * Switch to the sched lock to fix things up and pick
-	 * a new thread.
-	 * Block the td_lock in order to avoid breaking the critical path.
-	 */
-	if (td->td_lock != &sched_lock) {
-		mtx_lock_spin(&sched_lock);
-		tmtx = thread_lock_block(td);
-	}
-
-	if ((td->td_flags & TDF_NOLOAD) == 0)
-		sched_load_rem();
-
 	td->td_lastcpu = td->td_oncpu;
 	preempted = (td->td_flags & TDF_SLICEEND) == 0 &&
 	    (flags & SW_PREEMPT) != 0;
@@ -1021,10 +1008,25 @@ sched_switch(struct thread *td, struct thread *newtd, 
 		if (TD_IS_RUNNING(td)) {
 			/* Put us back on the run queue. */
 			sched_add(td, preempted ?
-			    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
-			    SRQ_OURSELF|SRQ_YIELDING);
+			    SRQ_HOLDTD|SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
+			    SRQ_HOLDTD|SRQ_OURSELF|SRQ_YIELDING);
 		}
 	}
+
+	/* 
+	 * Switch to the sched lock to fix things up and pick
+	 * a new thread.  Block the td_lock in order to avoid
+	 * breaking the critical path.
+	 */
+	if (td->td_lock != &sched_lock) {
+		mtx_lock_spin(&sched_lock);
+		tmtx = thread_lock_block(td);
+		mtx_unlock_spin(tmtx);
+	}
+
+	if ((td->td_flags & TDF_NOLOAD) == 0)
+		sched_load_rem();
+
 	if (newtd) {
 		/*
 		 * The thread we are about to run needs to be counted
@@ -1042,9 +1044,10 @@ sched_switch(struct thread *td, struct thread *newtd, 
 			sched_load_add();
 	} else {
 		newtd = choosethread();
-		MPASS(newtd->td_lock == &sched_lock);
 	}
 
+	MPASS(newtd->td_lock == &sched_lock);
+
 #if (KTR_COMPILE & KTR_SCHED) != 0
 	if (TD_IS_IDLETHREAD(td))
 		KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle",
@@ -1075,7 +1078,7 @@ sched_switch(struct thread *td, struct thread *newtd, 
 			(*dtrace_vtime_switch_func)(newtd);
 #endif
 
-		cpu_switch(td, newtd, tmtx != NULL ? tmtx : td->td_lock);
+		cpu_switch(td, newtd, tmtx);
 		lock_profile_obtain_lock_success(&sched_lock.lock_object,
 		    0, 0, __FILE__, __LINE__);
 		/*
@@ -1100,8 +1103,10 @@ sched_switch(struct thread *td, struct thread *newtd, 
 		if (PMC_PROC_IS_USING_PMCS(td->td_proc))
 			PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
 #endif
-	} else
+	} else {
+		td->td_lock = &sched_lock;
 		SDT_PROBE0(sched, , , remain__cpu);
+	}
 
 	KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running",
 	    "prio:%d", td->td_priority);
@@ -1116,7 +1121,7 @@ sched_switch(struct thread *td, struct thread *newtd, 
 }
 
 void
-sched_wakeup(struct thread *td)
+sched_wakeup(struct thread *td, int srqflags)
 {
 	struct td_sched *ts;
 
@@ -1130,7 +1135,7 @@ sched_wakeup(struct thread *td)
 	td->td_slptick = 0;
 	ts->ts_slptime = 0;
 	ts->ts_slice = sched_slice;
-	sched_add(td, SRQ_BORING);
+	sched_add(td, srqflags);
 }
 
 #ifdef SMP
@@ -1316,7 +1321,11 @@ sched_add(struct thread *td, int flags)
 	 */
 	if (td->td_lock != &sched_lock) {
 		mtx_lock_spin(&sched_lock);
-		thread_lock_set(td, &sched_lock);
+		if ((flags & SRQ_HOLD) != 0)
+			td->td_lock = &sched_lock;
+		else
+			thread_lock_set(td, &sched_lock);
+
 	}
 	TD_SET_RUNQ(td);
 
@@ -1380,6 +1389,8 @@ sched_add(struct thread *td, int flags)
 				maybe_resched(td);
 		}
 	}
+	if ((flags & SRQ_HOLDTD) == 0)
+		thread_unlock(td);
 }
 #else /* SMP */
 {
@@ -1407,7 +1418,10 @@ sched_add(struct thread *td, int flags)
 	 */
 	if (td->td_lock != &sched_lock) {
 		mtx_lock_spin(&sched_lock);
-		thread_lock_set(td, &sched_lock);
+		if ((flags & SRQ_HOLD) != 0)
+			td->td_lock = &sched_lock;
+		else
+			thread_lock_set(td, &sched_lock);
 	}
 	TD_SET_RUNQ(td);
 	CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td);
@@ -1418,6 +1432,8 @@ sched_add(struct thread *td, int flags)
 	runq_add(ts->ts_runq, td, flags);
 	if (!maybe_preempt(td))
 		maybe_resched(td);
+	if ((flags & SRQ_HOLDTD) == 0)
+		thread_unlock(td);
 }
 #endif /* SMP */
 
@@ -1776,7 +1792,7 @@ sched_affinity(struct thread *td)
 
 		/* Put this thread on a valid per-CPU runqueue. */
 		sched_rem(td);
-		sched_add(td, SRQ_BORING);
+		sched_add(td, SRQ_HOLDTD | SRQ_BORING);
 		break;
 	case TDS_RUNNING:
 		/*

Modified: head/sys/kern/sched_ule.c
==============================================================================
--- head/sys/kern/sched_ule.c	Sun Dec 15 21:08:40 2019	(r355778)
+++ head/sys/kern/sched_ule.c	Sun Dec 15 21:11:15 2019	(r355779)
@@ -464,7 +464,7 @@ tdq_runq_add(struct tdq *tdq, struct thread *td, int f
 	u_char pri;
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
 
 	pri = td->td_priority;
 	ts = td_get_sched(td);
@@ -515,6 +515,7 @@ tdq_runq_rem(struct tdq *tdq, struct thread *td)
 
 	ts = td_get_sched(td);
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+	THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
 	KASSERT(ts->ts_runq != NULL,
 	    ("tdq_runq_remove: thread %p null ts_runq", td));
 	if (ts->ts_flags & TSF_XFERABLE) {
@@ -539,7 +540,7 @@ tdq_load_add(struct tdq *tdq, struct thread *td)
 {
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
 
 	tdq->tdq_load++;
 	if ((td->td_flags & TDF_NOLOAD) == 0)
@@ -556,8 +557,8 @@ static void
 tdq_load_rem(struct tdq *tdq, struct thread *td)
 {
 
-	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+	THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
 	KASSERT(tdq->tdq_load != 0,
 	    ("tdq_load_rem: Removing with 0 load on queue %d", TDQ_ID(tdq)));
 
@@ -949,7 +950,6 @@ sched_balance_pair(struct tdq *high, struct tdq *low)
 static struct thread *
 tdq_move(struct tdq *from, struct tdq *to)
 {
-	struct td_sched *ts;
 	struct thread *td;
 	struct tdq *tdq;
 	int cpu;
@@ -962,18 +962,18 @@ tdq_move(struct tdq *from, struct tdq *to)
 	td = tdq_steal(tdq, cpu);
 	if (td == NULL)
 		return (NULL);
-	ts = td_get_sched(td);
+
 	/*
-	 * Although the run queue is locked the thread may be blocked.  Lock
-	 * it to clear this and acquire the run-queue lock.
+	 * Although the run queue is locked the thread may be
+	 * blocked.  We can not set the lock until it is unblocked.
 	 */
-	thread_lock(td);
-	/* Drop recursive lock on from acquired via thread_lock(). */
-	TDQ_UNLOCK(from);
+	thread_lock_block_wait(td);
 	sched_rem(td);
-	ts->ts_cpu = cpu;
+	THREAD_LOCKPTR_ASSERT(td, TDQ_LOCKPTR(from));
 	td->td_lock = TDQ_LOCKPTR(to);
+	td_get_sched(td)->ts_cpu = cpu;
 	tdq_add(to, td, SRQ_YIELDING);
+
 	return (td);
 }
 
@@ -1205,6 +1205,7 @@ sched_setcpu(struct thread *td, int cpu, int flags)
 {
 
 	struct tdq *tdq;
+	struct mtx *mtx;
 
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	tdq = TDQ_CPU(cpu);
@@ -1212,26 +1213,20 @@ sched_setcpu(struct thread *td, int cpu, int flags)
 	/*
 	 * If the lock matches just return the queue.
 	 */
-	if (td->td_lock == TDQ_LOCKPTR(tdq))
+	if (td->td_lock == TDQ_LOCKPTR(tdq)) {
+		KASSERT((flags & SRQ_HOLD) == 0,
+		    ("sched_setcpu: Invalid lock for SRQ_HOLD"));
 		return (tdq);
-#ifdef notyet
-	/*
-	 * If the thread isn't running its lockptr is a
-	 * turnstile or a sleepqueue.  We can just lock_set without
-	 * blocking.
-	 */
-	if (TD_CAN_RUN(td)) {
-		TDQ_LOCK(tdq);
-		thread_lock_set(td, TDQ_LOCKPTR(tdq));
-		return (tdq);
 	}
-#endif
+
 	/*
 	 * The hard case, migration, we need to block the thread first to
 	 * prevent order reversals with other cpus locks.
 	 */
 	spinlock_enter();
-	thread_lock_block(td);
+	mtx = thread_lock_block(td);
+	if ((flags & SRQ_HOLD) == 0)
+		mtx_unlock_spin(mtx);
 	TDQ_LOCK(tdq);
 	thread_lock_unblock(td, TDQ_LOCKPTR(tdq));
 	spinlock_exit();
@@ -1422,8 +1417,7 @@ tdq_setup(struct tdq *tdq, int id)
 	tdq->tdq_id = id;
 	snprintf(tdq->tdq_name, sizeof(tdq->tdq_name),
 	    "sched lock %d", (int)TDQ_ID(tdq));
-	mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock",
-	    MTX_SPIN | MTX_RECURSE);
+	mtx_init(&tdq->tdq_lock, tdq->tdq_name, "sched lock", MTX_SPIN);
 #ifdef KTR
 	snprintf(tdq->tdq_loadname, sizeof(tdq->tdq_loadname),
 	    "CPU %d load", (int)TDQ_ID(tdq));
@@ -1785,7 +1779,7 @@ sched_thread_priority(struct thread *td, u_char prio)
 	if (TD_ON_RUNQ(td) && prio < td->td_priority) {
 		sched_rem(td);
 		td->td_priority = prio;
-		sched_add(td, SRQ_BORROWING);
+		sched_add(td, SRQ_BORROWING | SRQ_HOLDTD);
 		return;
 	}
 	/*
@@ -2011,6 +2005,7 @@ static struct mtx *
 sched_switch_migrate(struct tdq *tdq, struct thread *td, int flags)
 {
 	struct tdq *tdn;
+	struct mtx *mtx;
 
 	KASSERT(!CPU_ABSENT(td_get_sched(td)->ts_cpu), ("sched_switch_migrate: "
 	    "thread %s queued on absent CPU %d.", td->td_name,
@@ -2024,7 +2019,8 @@ sched_switch_migrate(struct tdq *tdq, struct thread *t
 	 * not holding either run-queue lock.
 	 */
 	spinlock_enter();
-	thread_lock_block(td);	/* This releases the lock on tdq. */
+	mtx = thread_lock_block(td);
+	mtx_unlock_spin(mtx);
 
 	/*
 	 * Acquire both run-queue locks before placing the thread on the new
@@ -2044,8 +2040,7 @@ sched_switch_migrate(struct tdq *tdq, struct thread *t
 }
 
 /*
- * Variadic version of thread_lock_unblock() that does not assume td_lock
- * is blocked.
+ * thread_lock_unblock() that does not assume td_lock is blocked.
  */
 static inline void
 thread_unblock_switch(struct thread *td, struct mtx *mtx)
@@ -2114,8 +2109,13 @@ sched_switch(struct thread *td, struct thread *newtd, 
 		}
 	} else {
 		/* This thread must be going to sleep. */
-		TDQ_LOCK(tdq);
 		mtx = thread_lock_block(td);
+		if (mtx != TDQ_LOCKPTR(tdq)) {
+			spinlock_enter();
+			mtx_unlock_spin(mtx);
+			TDQ_LOCK(tdq);
+			spinlock_exit();
+		}
 		tdq_load_rem(tdq, td);
 #ifdef SMP
 		if (tdq->tdq_load == 0)
@@ -2237,9 +2237,11 @@ sched_sleep(struct thread *td, int prio)
 /*
  * Schedule a thread to resume execution and record how long it voluntarily
  * slept.  We also update the pctcpu, interactivity, and priority.
+ *
+ * Requires the thread lock on entry, drops on exit.
  */
 void
-sched_wakeup(struct thread *td)
+sched_wakeup(struct thread *td, int srqflags)
 {
 	struct td_sched *ts;
 	int slptick;
@@ -2247,6 +2249,7 @@ sched_wakeup(struct thread *td)
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	ts = td_get_sched(td);
 	td->td_flags &= ~TDF_CANSWAP;
+
 	/*
 	 * If we slept for more than a tick update our interactivity and
 	 * priority.
@@ -2262,7 +2265,7 @@ sched_wakeup(struct thread *td)
 	 * Reset the slice value since we slept and advanced the round-robin.
 	 */
 	ts->ts_slice = 0;
-	sched_add(td, SRQ_BORING);
+	sched_add(td, SRQ_BORING | srqflags);
 }
 
 /*
@@ -2578,6 +2581,7 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags)
 {
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
+	THREAD_LOCK_BLOCKED_ASSERT(td, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 	    ("sched_add: trying to run inhibited thread"));
 	KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
@@ -2594,6 +2598,8 @@ tdq_add(struct tdq *tdq, struct thread *td, int flags)
 /*
  * Select the target thread queue and add a thread to it.  Request
  * preemption or IPI a remote processor if required.
+ *
+ * Requires the thread lock on entry, drops on exit.
  */
 void
 sched_add(struct thread *td, int flags)
@@ -2625,10 +2631,10 @@ sched_add(struct thread *td, int flags)
 	cpu = sched_pickcpu(td, flags);
 	tdq = sched_setcpu(td, cpu, flags);
 	tdq_add(tdq, td, flags);
-	if (cpu != PCPU_GET(cpuid)) {
+	if (cpu != PCPU_GET(cpuid))
 		tdq_notify(tdq, td);
-		return;
-	}
+	else if (!(flags & SRQ_YIELDING))
+		sched_setpreempt(td);
 #else
 	tdq = TDQ_SELF();
 	TDQ_LOCK(tdq);
@@ -2636,11 +2642,16 @@ sched_add(struct thread *td, int flags)
 	 * Now that the thread is moving to the run-queue, set the lock

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201912152111.xBFLBG95029553>