Date: Tue, 2 Oct 2007 16:53:33 -0700 (PDT) From: Jeff Roberson <jroberson@chesapeake.net> To: current@freebsd.org Subject: ULE/yielding patch for testing. Message-ID: <20071002165007.D587@10.0.0.1>
index | next in thread | raw e-mail
[-- Attachment #1 --]
Enclosed is a patch that does two things:
1) Reduces UP context switch time by over 10% making it faster than 4BSD
on UP. On SMP it's hard to compare since ULE can do as many as 30x as
many switches per second on my 8way system.
2) Restores old sched_yield() behavior from 6.x. This was changed in
-current unintentionally I think.
I'd appreciate any extra testing. The ULE context switch time
improvements required some changes to the frequency that we recalculate
priorities. I'm mostly interested in hearing whether this causes any
regression in normal workloads.
Those of you still using 4BSD can also verify that the yield changes don't
cause any problems there.
Thanks,
Jeff
[-- Attachment #2 --]
Index: kern_switch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_switch.c,v
retrieving revision 1.136
diff -p -u -r1.136 kern_switch.c
--- kern_switch.c 20 Sep 2007 20:38:43 -0000 1.136
+++ kern_switch.c 2 Oct 2007 21:41:10 -0000
@@ -133,16 +133,6 @@ choosethread(void)
{
struct thread *td;
-#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
- if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
- /* Shutting down, run idlethread on AP's */
- td = PCPU_GET(idlethread);
- CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
- TD_SET_RUNNING(td);
- return (td);
- }
-#endif
-
retry:
td = sched_choose();
@@ -184,7 +174,7 @@ critical_exit(void)
td = curthread;
KASSERT(td->td_critnest != 0,
("critical_exit: td_critnest == 0"));
-#ifdef PREEMPTION
+
if (td->td_critnest == 1) {
td->td_critnest = 0;
if (td->td_owepreempt) {
@@ -196,7 +186,6 @@ critical_exit(void)
thread_unlock(td);
}
} else
-#endif
td->td_critnest--;
CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
Index: kern_synch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.301
diff -p -u -r1.301 kern_synch.c
--- kern_synch.c 17 Sep 2007 05:27:20 -0000 1.301
+++ kern_synch.c 2 Oct 2007 08:18:19 -0000
@@ -553,8 +553,11 @@ synch_setup(dummy)
int
yield(struct thread *td, struct yield_args *uap)
{
- mtx_assert(&Giant, MA_NOTOWNED);
- (void)uap;
- sched_relinquish(td);
+
+ thread_lock(td);
+ sched_prio(td, PRI_MAX_TIMESHARE);
+ mi_switch(SW_VOL, NULL);
+ thread_unlock(td);
+ td->td_retval[0] = 0;
return (0);
}
Index: p1003_1b.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/p1003_1b.c,v
retrieving revision 1.35
diff -p -u -r1.35 p1003_1b.c
--- p1003_1b.c 5 Mar 2007 13:10:57 -0000 1.35
+++ p1003_1b.c 2 Oct 2007 21:55:48 -0000
@@ -241,7 +241,8 @@ int
sched_yield(struct thread *td, struct sched_yield_args *uap)
{
- return (ksched_yield(ksched));
+ sched_relinquish(curthread);
+ return 0;
}
int
Index: sched_4bsd.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_4bsd.c,v
retrieving revision 1.105
diff -p -u -r1.105 sched_4bsd.c
--- sched_4bsd.c 21 Sep 2007 04:10:23 -0000 1.105
+++ sched_4bsd.c 2 Oct 2007 08:08:36 -0000
@@ -1324,8 +1324,6 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
- if (td->td_pri_class == PRI_TIMESHARE)
- sched_prio(td, PRI_MAX_TIMESHARE);
SCHED_STAT_INC(switch_relinquish);
mi_switch(SW_VOL, NULL);
thread_unlock(td);
Index: sched_ule.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/sched_ule.c,v
retrieving revision 1.209
diff -p -u -r1.209 sched_ule.c
--- sched_ule.c 24 Sep 2007 00:28:54 -0000 1.209
+++ sched_ule.c 2 Oct 2007 22:26:14 -0000
@@ -376,6 +376,7 @@ tdq_runq_add(struct tdq *tdq, struct td_
{
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
THREAD_LOCK_ASSERT(ts->ts_thread, MA_OWNED);
+ TD_SET_RUNQ(ts->ts_thread);
#ifdef SMP
if (THREAD_CAN_MIGRATE(ts->ts_thread)) {
tdq->tdq_transferable++;
@@ -411,6 +412,23 @@ tdq_runq_add(struct tdq *tdq, struct td_
runq_add(ts->ts_runq, ts, flags);
}
+/*
+ * Pick the run queue based on priority.
+ */
+static __inline void
+tdq_runq_pick(struct tdq *tdq, struct td_sched *ts)
+{
+ int pri;
+
+ pri = ts->ts_thread->td_priority;
+ if (pri <= PRI_MAX_REALTIME)
+ ts->ts_runq = &tdq->tdq_realtime;
+ else if (pri <= PRI_MAX_TIMESHARE)
+ ts->ts_runq = &tdq->tdq_timeshare;
+ else
+ ts->ts_runq = &tdq->tdq_idle;
+}
+
/*
* Remove a thread from a run-queue. This typically happens when a thread
* is selected to run. Running threads are not on the queue and the
@@ -434,13 +452,6 @@ tdq_runq_rem(struct tdq *tdq, struct td_
runq_remove_idx(ts->ts_runq, ts, &tdq->tdq_ridx);
else
runq_remove_idx(ts->ts_runq, ts, NULL);
- /*
- * For timeshare threads we update the priority here so
- * the priority reflects the time we've been sleeping.
- */
- ts->ts_ltick = ticks;
- sched_pctcpu_update(ts);
- sched_priority(ts->ts_thread);
} else
runq_remove(ts->ts_runq, ts);
}
@@ -1099,6 +1110,15 @@ sched_pickcpu(struct td_sched *ts, int f
#endif /* SMP */
+#if RQB_LEN == 1
+#define runq_empty(rq) ((rq)->rq_status.rqb_bits[0] == 0)
+#elif RQB_LEN == 2
+#define runq_empty(rq) \
+ (((rq)->rq_status.rqb_bits[0] | (rq)->rq_status.rqb_bits[1]) == 0)
+#else
+#error "Unsupported RQB_LEN"
+#endif
+
/*
* Pick the highest priority task we have and return it.
*/
@@ -1108,25 +1128,29 @@ tdq_choose(struct tdq *tdq)
struct td_sched *ts;
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
- ts = runq_choose(&tdq->tdq_realtime);
- if (ts != NULL)
- return (ts);
- ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
- if (ts != NULL) {
- KASSERT(ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE,
- ("tdq_choose: Invalid priority on timeshare queue %d",
- ts->ts_thread->td_priority));
- return (ts);
+ if (!runq_empty(&tdq->tdq_realtime)) {
+ ts = runq_choose(&tdq->tdq_realtime);
+ if (ts != NULL)
+ return (ts);
+ }
+ if (!runq_empty(&tdq->tdq_timeshare)) {
+ ts = runq_choose_from(&tdq->tdq_timeshare, tdq->tdq_ridx);
+ if (ts != NULL) {
+ KASSERT(ts->ts_thread->td_priority >= PRI_MIN_TIMESHARE,
+ ("tdq_choose: Invalid timeshare priority %d",
+ ts->ts_thread->td_priority));
+ return (ts);
+ }
}
-
- ts = runq_choose(&tdq->tdq_idle);
- if (ts != NULL) {
- KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE,
- ("tdq_choose: Invalid priority on idle queue %d",
- ts->ts_thread->td_priority));
- return (ts);
+ if (!runq_empty(&tdq->tdq_idle)) {
+ ts = runq_choose(&tdq->tdq_idle);
+ if (ts != NULL) {
+ KASSERT(ts->ts_thread->td_priority >= PRI_MIN_IDLE,
+ ("tdq_choose: Invalid priority on idle queue %d",
+ ts->ts_thread->td_priority));
+ return (ts);
+ }
}
-
return (NULL);
}
@@ -1302,6 +1326,7 @@ sched_setup(void *dummy)
/* Add thread0's load since it's running. */
TDQ_LOCK(tdq);
thread0.td_lock = TDQ_LOCKPTR(TDQ_SELF());
+ tdq_runq_pick(tdq, &td_sched0);
tdq_load_add(tdq, &td_sched0);
TDQ_UNLOCK(tdq);
}
@@ -1565,15 +1590,20 @@ static void
sched_thread_priority(struct thread *td, u_char prio)
{
struct td_sched *ts;
+ struct tdq *tdq;
CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)",
td, td->td_proc->p_comm, td->td_priority, prio, curthread,
curthread->td_proc->p_comm);
ts = td->td_sched;
+ tdq = TDQ_CPU(ts->ts_cpu);
THREAD_LOCK_ASSERT(td, MA_OWNED);
if (td->td_priority == prio)
return;
-
+#ifdef SMP
+ if (prio < tdq->tdq_lowpri)
+ tdq->tdq_lowpri = prio;
+#endif
if (TD_ON_RUNQ(td) && prio < td->td_priority) {
/*
* If the priority has been elevated due to priority
@@ -1585,14 +1615,8 @@ sched_thread_priority(struct thread *td,
td->td_priority = prio;
sched_add(td, SRQ_BORROWING);
} else {
-#ifdef SMP
- struct tdq *tdq;
-
- tdq = TDQ_CPU(ts->ts_cpu);
- if (prio < tdq->tdq_lowpri)
- tdq->tdq_lowpri = prio;
-#endif
td->td_priority = prio;
+ tdq_runq_pick(tdq, ts);
}
}
@@ -1739,6 +1763,8 @@ sched_switch_migrate(struct tdq *tdq, st
tdn = TDQ_CPU(td->td_sched->ts_cpu);
#ifdef SMP
+ /* The load is being removed from the current cpu. */
+ tdq_load_rem(tdq, td->td_sched);
/*
* Do the lock dance required to avoid LOR. We grab an extra
* spinlock nesting to prevent preemption while we're
@@ -1830,12 +1856,11 @@ sched_switch(struct thread *td, struct t
TD_SET_CAN_RUN(td);
} else if (TD_IS_RUNNING(td)) {
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
- tdq_load_rem(tdq, ts);
srqflag = (flags & SW_PREEMPT) ?
SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
SRQ_OURSELF|SRQ_YIELDING;
if (ts->ts_cpu == cpuid)
- tdq_add(tdq, td, srqflag);
+ tdq_runq_add(tdq, ts, srqflag);
else
mtx = sched_switch_migrate(tdq, td, srqflag);
} else {
@@ -1949,7 +1974,6 @@ sched_wakeup(struct thread *td)
ts->ts_slptime += hzticks;
sched_interact_update(td);
sched_pctcpu_update(ts);
- sched_priority(td);
}
/* Reset the slice value after we sleep. */
ts->ts_slice = sched_slice;
@@ -2154,16 +2178,17 @@ sched_clock(struct thread *td)
*/
td->td_sched->ts_runtime += tickincr;
sched_interact_update(td);
+ sched_priority(td);
/*
* We used up one time slice.
*/
if (--ts->ts_slice > 0)
return;
/*
- * We're out of time, recompute priorities and requeue.
+ * We're out of time, force a requeue later.
*/
- sched_priority(td);
td->td_flags |= TDF_NEEDRESCHED;
+ ts->ts_slice = sched_slice;
}
/*
@@ -2284,11 +2309,10 @@ void
tdq_add(struct tdq *tdq, struct thread *td, int flags)
{
struct td_sched *ts;
- int class;
#ifdef SMP
+ int class;
int cpumask;
#endif
-
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
@@ -2298,20 +2322,11 @@ tdq_add(struct tdq *tdq, struct thread *
("sched_add: thread swapped out"));
ts = td->td_sched;
- class = PRI_BASE(td->td_pri_class);
- TD_SET_RUNQ(td);
- if (ts->ts_slice == 0)
- ts->ts_slice = sched_slice;
- /*
- * Pick the run queue based on priority.
- */
- if (td->td_priority <= PRI_MAX_REALTIME)
- ts->ts_runq = &tdq->tdq_realtime;
- else if (td->td_priority <= PRI_MAX_TIMESHARE)
- ts->ts_runq = &tdq->tdq_timeshare;
- else
- ts->ts_runq = &tdq->tdq_idle;
+ tdq_runq_pick(tdq, ts);
+ tdq_runq_add(tdq, ts, flags);
+ tdq_load_add(tdq, ts);
#ifdef SMP
+ class = PRI_BASE(td->td_pri_class);
cpumask = 1 << ts->ts_cpu;
/*
* If we had been idle, clear our bit in the group and potentially
@@ -2334,8 +2349,6 @@ tdq_add(struct tdq *tdq, struct thread *
if (td->td_priority < tdq->tdq_lowpri)
tdq->tdq_lowpri = td->td_priority;
#endif
- tdq_runq_add(tdq, ts, flags);
- tdq_load_add(tdq, ts);
}
/*
@@ -2502,8 +2515,6 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
- if (td->td_pri_class == PRI_TIMESHARE)
- sched_prio(td, PRI_MAX_TIMESHARE);
SCHED_STAT_INC(switch_relinquish);
mi_switch(SW_VOL, NULL);
thread_unlock(td);
help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20071002165007.D587>
