Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 2 Oct 2007 16:53:33 -0700 (PDT)
From:      Jeff Roberson <jroberson@chesapeake.net>
To:        current@freebsd.org
Subject:   ULE/yielding patch for testing.
Message-ID:  <20071002165007.D587@10.0.0.1>

index | next in thread | raw e-mail

[-- Attachment #1 --]
Enclosed is a patch that does two things:

1)  Reduces UP context switch time by over 10% making it faster than 4BSD 
on UP.  On SMP it's hard to compare since ULE can do as many as 30x as 
many switches per second on my 8way system.

2)  Restores old sched_yield() behavior from 6.x.  This was changed in 
-current unintentionally I think.

I'd appreciate any extra testing.  The ULE context switch time 
improvements required some changes to the frequency that we recalculate 
priorities.  I'm mostly interested in hearing whether this causes any 
regression in normal workloads.

Those of you still using 4BSD can also verify that the yield changes don't 
cause any problems there.

Thanks,
Jeff
[-- Attachment #2 --]
Index: kern_switch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_switch.c,v
retrieving revision 1.136
diff -p -u -r1.136 kern_switch.c
--- kern_switch.c	20 Sep 2007 20:38:43 -0000	1.136
+++ kern_switch.c	2 Oct 2007 21:41:10 -0000
@@ -133,16 +133,6 @@ choosethread(void)
 {
 	struct thread *td;
 
-#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
-	if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
-		/* Shutting down, run idlethread on AP's */
-		td = PCPU_GET(idlethread);
-		CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
-		TD_SET_RUNNING(td);
-		return (td);
-	}
-#endif
-
 retry:
 	td = sched_choose();
 
@@ -184,7 +174,7 @@ critical_exit(void)
 	td = curthread;
 	KASSERT(td->td_critnest != 0,
 	    ("critical_exit: td_critnest == 0"));
-#ifdef PREEMPTION
+
 	if (td->td_critnest == 1) {
 		td->td_critnest = 0;
 		if (td->td_owepreempt) {
@@ -196,7 +186,6 @@ critical_exit(void)
 			thread_unlock(td);
 		}
 	} else
-#endif
 		td->td_critnest--;
 
 	CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
Index: kern_synch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.301
diff -p -u -r1.301 kern_synch.c
--- kern_synch.c	17 Sep 2007 05:27:20 -0000	1.301
+++ kern_synch.c	2 Oct 2007 08:18:19 -0000
@@ -553,8 +553,11 @@ synch_setup(dummy)
 int
 yield(struct thread *td, struct yield_args *uap)
 {
-	mtx_assert(&Giant, MA_NOTOWNED);
-	(void)uap;
-	sched_relinquish(td);
+
+	thread_lock(td);
+	sched_prio(td, PRI_MAX_TIMESHARE);
+	mi_switch(SW_VOL, NULL);
+	thread_unlock(td);
+	td->td_retval[0] = 0;
 	return (0);
 }
Index: p1003_1b.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/p1003_1b.c,v
retrieving revision 1.35
diff -p -u -r1.35 p1003_1b.c
--- p1003_1b.c	5 Mar 2007 13:10:57 -0000	1.35
+++ p1003_1b.c	2 Oct 2007 21:55:48 -0000
@@ -241,7 +241,8 @@ int
 sched_yield(struct thread *td, struct sched_yield_args *uap)
 {
 
-	return (ksched_yield(ksched));
+	sched_relinquish(curthread);
+	return 0;
 }
 
 int
Index: sched_4bsd.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_4bsd.c,v
retrieving revision 1.105
diff -p -u -r1.105 sched_4bsd.c
--- sched_4bsd.c	21 Sep 2007 04:10:23 -0000	1.105
+++ sched_4bsd.c	2 Oct 2007 08:08:36 -0000
@@ -1324,8 +1324,6 @@ void
 sched_relinquish(struct thread *td)
 {
 	thread_lock(td);
-	if (td->td_pri_class == PRI_TIMESHARE)
-		sched_prio(td, PRI_MAX_TIMESHARE);
 	SCHED_STAT_INC(switch_relinquish);
 	mi_switch(SW_VOL, NULL);
 	thread_unlock(td);
Index: sched_ule.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v
retrieving revision 1.209
diff -p -u -r1.209 sched_ule.c
--- sched_ule.c	24 Sep 2007 00:28:54 -0000	1.209
+++ sched_ule.c	2 Oct 2007 22:26:14 -0000
@@ -376,6 +376,7 @@ tdq_runq_add(struct tdq *tdq, struct td_
 {
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	THREAD_LOCK_ASSERT(ts->ts_thread, MA_OWNED);
+	TD_SET_RUNQ(ts->ts_thread);
 #ifdef SMP
 	if (THREAD_CAN_MIGRATE(ts->ts_thread)) {
 		tdq->tdq_transferable++;
@@ -411,6 +412,23 @@ tdq_runq_add(struct tdq *tdq, struct td_
 		runq_add(ts->ts_runq, ts, flags);
 }
 
+/*
+ * Pick the run queue based on priority.
+ */
+static __inline void
+tdq_runq_pick(struct tdq *tdq, struct td_sched *ts)
+{
+	int pri;
+
+	pri = ts->ts_thread->td_priority;
+	if (pri <= PRI_MAX_REALTIME)
+		ts->ts_runq = &tdq->tdq_realtime;
+	else if (pri <= PRI_MAX_TIMESHARE)
+		ts->ts_runq = &tdq->tdq_timeshare;
+	else
+		ts->ts_runq = &tdq->tdq_idle;
+}
+
 /* 
  * Remove a thread from a run-queue.  This typically happens when a thread
  * is selected to run.  Running threads are not on the queue and the
@@ -434,13 +452,6 @@ tdq_runq_rem(struct tdq *tdq, struct td_
 			runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx);
 		else
 			runq_remove_idx(ts->ts_runq, ts, NULL);
-		/*
-		 * For timeshare threads we update the priority here so
-		 * the priority reflects the time we've been sleeping.
-		 */
-		ts->ts_ltick = ticks;
-		sched_pctcpu_update(ts);
-		sched_priority(ts->ts_thread);
 	} else
 		runq_remove(ts->ts_runq, ts);
 }
@@ -1099,6 +1110,15 @@ sched_pickcpu(struct td_sched *ts, int f
 
 #endif	/* SMP */
 
+#if RQB_LEN == 1
+#define	runq_empty(rq)	((rq)->rq_status.rqb_bits[0] == 0)
+#elif RQB_LEN == 2
+#define	runq_empty(rq)							\
+    (((rq)->rq_status.rqb_bits[0] | (rq)->rq_status.rqb_bits[1]) == 0)
+#else
+#error "Unsupported RQB_LEN"
+#endif
+
 /*
  * Pick the highest priority task we have and return it.
  */
@@ -1108,25 +1128,29 @@ tdq_choose(struct tdq *tdq)
 	struct td_sched *ts;
 
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
-	ts = runq_choose(&tdq->tdq_realtime);
-	if (ts != NULL)
-		return (ts);
-	ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
-	if (ts != NULL) {
-		KASSERT(ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE,
-		    ("tdq_choose: Invalid priority on timeshare queue %d",
-		    ts->ts_thread->td_priority));
-		return (ts);
+	if (!runq_empty(&tdq->tdq_realtime)) {
+		ts = runq_choose(&tdq->tdq_realtime);
+		if (ts != NULL)
+			return (ts);
+	}
+	if (!runq_empty(&tdq->tdq_timeshare)) {
+		ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
+		if (ts != NULL) {
+			KASSERT(ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE,
+			    ("tdq_choose: Invalid timeshare priority %d",
+			    ts->ts_thread->td_priority));
+			return (ts);
+		}
 	}
-
-	ts = runq_choose(&tdq->tdq_idle);
-	if (ts != NULL) {
-		KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE,
-		    ("tdq_choose: Invalid priority on idle queue %d",
-		    ts->ts_thread->td_priority));
-		return (ts);
+	if (!runq_empty(&tdq->tdq_idle)) {
+		ts = runq_choose(&tdq->tdq_idle);
+		if (ts != NULL) {
+			KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE,
+			    ("tdq_choose: Invalid priority on idle queue %d",
+			    ts->ts_thread->td_priority));
+			return (ts);
+		}
 	}
-
 	return (NULL);
 }
 
@@ -1302,6 +1326,7 @@ sched_setup(void *dummy)
 	/* Add thread0's load since it's running. */
 	TDQ_LOCK(tdq);
 	thread0.td_lock = TDQ_LOCKPTR(TDQ_SELF());
+	tdq_runq_pick(tdq, &td_sched0);
 	tdq_load_add(tdq, &td_sched0);
 	TDQ_UNLOCK(tdq);
 }
@@ -1565,15 +1590,20 @@ static void
 sched_thread_priority(struct thread *td, u_char prio)
 {
 	struct td_sched *ts;
+	struct tdq *tdq;
 
 	CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
 	    td, td->td_proc->p_comm, td->td_priority, prio, curthread,
 	    curthread->td_proc->p_comm);
 	ts = td->td_sched;
+	tdq = TDQ_CPU(ts->ts_cpu);
 	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	if (td->td_priority == prio)
 		return;
-
+#ifdef SMP
+	if (prio < tdq->tdq_lowpri)
+		tdq->tdq_lowpri = prio;
+#endif
 	if (TD_ON_RUNQ(td) && prio < td->td_priority) {
 		/*
 		 * If the priority has been elevated due to priority
@@ -1585,14 +1615,8 @@ sched_thread_priority(struct thread *td,
 		td->td_priority = prio;
 		sched_add(td, SRQ_BORROWING);
 	} else {
-#ifdef SMP
-		struct tdq *tdq;
-
-		tdq = TDQ_CPU(ts->ts_cpu);
-		if (prio < tdq->tdq_lowpri)
-			tdq->tdq_lowpri = prio;
-#endif
 		td->td_priority = prio;
+		tdq_runq_pick(tdq, ts);
 	}
 }
 
@@ -1739,6 +1763,8 @@ sched_switch_migrate(struct tdq *tdq, st
 
 	tdn = TDQ_CPU(td->td_sched->ts_cpu);
 #ifdef SMP
+	/* The load is being removed from the current cpu. */
+	tdq_load_rem(tdq, td->td_sched);
 	/*
 	 * Do the lock dance required to avoid LOR.  We grab an extra
 	 * spinlock nesting to prevent preemption while we're
@@ -1830,12 +1856,11 @@ sched_switch(struct thread *td, struct t
 		TD_SET_CAN_RUN(td);
 	} else if (TD_IS_RUNNING(td)) {
 		MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
-		tdq_load_rem(tdq, ts);
 		srqflag = (flags & SW_PREEMPT) ?
 		    SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
 		    SRQ_OURSELF|SRQ_YIELDING;
 		if (ts->ts_cpu == cpuid)
-			tdq_add(tdq, td, srqflag);
+			tdq_runq_add(tdq, ts, srqflag);
 		else
 			mtx = sched_switch_migrate(tdq, td, srqflag);
 	} else {
@@ -1949,7 +1974,6 @@ sched_wakeup(struct thread *td)
 		ts->ts_slptime += hzticks;
 		sched_interact_update(td);
 		sched_pctcpu_update(ts);
-		sched_priority(td);
 	}
 	/* Reset the slice value after we sleep. */
 	ts->ts_slice = sched_slice;
@@ -2154,16 +2178,17 @@ sched_clock(struct thread *td)
 	 */
 	td->td_sched->ts_runtime += tickincr;
 	sched_interact_update(td);
+	sched_priority(td);
 	/*
 	 * We used up one time slice.
 	 */
 	if (--ts->ts_slice > 0)
 		return;
 	/*
-	 * We're out of time, recompute priorities and requeue.
+	 * We're out of time, force a requeue later.
 	 */
-	sched_priority(td);
 	td->td_flags |= TDF_NEEDRESCHED;
+	ts->ts_slice = sched_slice;
 }
 
 /*
@@ -2284,11 +2309,10 @@ void
 tdq_add(struct tdq *tdq, struct thread *td, int flags)
 {
 	struct td_sched *ts;
-	int class;
 #ifdef SMP
+	int class;
 	int cpumask;
 #endif
-
 	TDQ_LOCK_ASSERT(tdq, MA_OWNED);
 	KASSERT((td->td_inhibitors == 0),
 	    ("sched_add: trying to run inhibited thread"));
@@ -2298,20 +2322,11 @@ tdq_add(struct tdq *tdq, struct thread *
 	    ("sched_add: thread swapped out"));
 
 	ts = td->td_sched;
-	class = PRI_BASE(td->td_pri_class);
-        TD_SET_RUNQ(td);
-	if (ts->ts_slice == 0)
-		ts->ts_slice = sched_slice;
-	/*
-	 * Pick the run queue based on priority.
-	 */
-	if (td->td_priority <= PRI_MAX_REALTIME)
-		ts->ts_runq = &tdq->tdq_realtime;
-	else if (td->td_priority <= PRI_MAX_TIMESHARE)
-		ts->ts_runq = &tdq->tdq_timeshare;
-	else
-		ts->ts_runq = &tdq->tdq_idle;
+	tdq_runq_pick(tdq, ts);
+	tdq_runq_add(tdq, ts, flags);
+	tdq_load_add(tdq, ts);
 #ifdef SMP
+	class = PRI_BASE(td->td_pri_class);
 	cpumask = 1 << ts->ts_cpu;
 	/*
 	 * If we had been idle, clear our bit in the group and potentially
@@ -2334,8 +2349,6 @@ tdq_add(struct tdq *tdq, struct thread *
 	if (td->td_priority < tdq->tdq_lowpri)
 		tdq->tdq_lowpri = td->td_priority;
 #endif
-	tdq_runq_add(tdq, ts, flags);
-	tdq_load_add(tdq, ts);
 }
 
 /*
@@ -2502,8 +2515,6 @@ void
 sched_relinquish(struct thread *td)
 {
 	thread_lock(td);
-	if (td->td_pri_class == PRI_TIMESHARE)
-		sched_prio(td, PRI_MAX_TIMESHARE);
 	SCHED_STAT_INC(switch_relinquish);
 	mi_switch(SW_VOL, NULL);
 	thread_unlock(td);
help

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20071002165007.D587>