From owner-p4-projects@FreeBSD.ORG Sat Dec 1 22:32:50 2007 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 92F0116A4DE; Sat, 1 Dec 2007 22:32:50 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 548D516A51B for ; Sat, 1 Dec 2007 22:32:50 +0000 (UTC) (envelope-from peter@freebsd.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 1D65A13C4CC for ; Sat, 1 Dec 2007 22:32:50 +0000 (UTC) (envelope-from peter@freebsd.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.1/8.14.1) with ESMTP id lB1MWoZW084766 for ; Sat, 1 Dec 2007 22:32:50 GMT (envelope-from peter@freebsd.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.1/8.14.1/Submit) id lB1MWn7h084762 for perforce@freebsd.org; Sat, 1 Dec 2007 22:32:49 GMT (envelope-from peter@freebsd.org) Date: Sat, 1 Dec 2007 22:32:49 GMT Message-Id: <200712012232.lB1MWn7h084762@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to peter@freebsd.org using -f From: Peter Wemm To: Perforce Change Reviews Cc: Subject: PERFORCE change 129909 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 01 Dec 2007 22:32:50 -0000 http://perforce.freebsd.org/chv.cgi?CH=129909 Change 129909 by peter@peter_daintree on 2007/12/01 22:30:49 Revert to vendor. too painful to merge, will redo. Affected files ... .. //depot/projects/bike_sched/sys/kern/sched_4bsd.c#8 integrate .. //depot/projects/bike_sched/sys/kern/sched_ule.c#6 integrate Differences ... ==== //depot/projects/bike_sched/sys/kern/sched_4bsd.c#8 (text+ko) ==== @@ -33,12 +33,10 @@ */ #include -__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.86 2006/07/02 20:53:52 maxim Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.110 2007/11/14 06:21:22 julian Exp $"); #include "opt_hwpmc_hooks.h" -#define kse td_sched - #include #include #include @@ -53,6 +51,7 @@ #include #include #include +#include #include #include @@ -76,56 +75,40 @@ /* * The schedulable entity that runs a context. - * A process may have several of these. Probably one per processor - * but possibly a few more. + * This is an extension to the thread structure and is tailored to + * the requirements of this scheduler */ -struct kse { - TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ - struct thread *ke_thread; /* (*) Active associated thread. */ - fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ - u_char ke_rqindex; /* (j) Run queue index. */ - enum { - KES_THREAD = 0x0, /* slaved to thread state */ - KES_ONRUNQ - } ke_state; /* (j) KSE status. */ - int ke_cpticks; /* (j) Ticks of cpu time. */ - struct runq *ke_runq; /* runq the kse is currently on */ +struct td_sched { + TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */ + struct thread *ts_thread; /* (*) Active associated thread. */ + fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */ + u_char ts_rqindex; /* (j) Run queue index. */ + int ts_cpticks; /* (j) Ticks of cpu time. */ + int ts_slptime; /* (j) Seconds !RUNNING. */ + struct runq *ts_runq; /* runq the thread is currently on */ }; -#define td_kse td_sched - /* flags kept in td_flags */ -#define TDF_DIDRUN TDF_SCHED0 /* KSE actually ran. */ -#define TDF_EXIT TDF_SCHED1 /* KSE is being killed. */ +#define TDF_DIDRUN TDF_SCHED0 /* thread actually ran. */ +#define TDF_EXIT TDF_SCHED1 /* thread is being killed. */ #define TDF_BOUND TDF_SCHED2 -#define ke_flags ke_thread->td_flags -#define KEF_DIDRUN TDF_DIDRUN /* KSE actually ran. */ -#define KEF_EXIT TDF_EXIT /* KSE is being killed. */ -#define KEF_BOUND TDF_BOUND /* stuck to one CPU */ +#define ts_flags ts_thread->td_flags +#define TSF_DIDRUN TDF_DIDRUN /* thread actually ran. */ +#define TSF_EXIT TDF_EXIT /* thread is being killed. */ +#define TSF_BOUND TDF_BOUND /* stuck to one CPU */ -#define SKE_RUNQ_PCPU(ke) \ - ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq) +#define SKE_RUNQ_PCPU(ts) \ + ((ts)->ts_runq != 0 && (ts)->ts_runq != &runq) -/* - * KSE_CAN_MIGRATE macro returns true if the kse can migrate between - * cpus. - */ -#define KSE_CAN_MIGRATE(ke) \ - ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) - -static struct kse kse0; +static struct td_sched td_sched0; +struct mtx sched_lock; static int sched_tdcnt; /* Total runnable threads in the system. */ static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ #define SCHED_QUANTUM (hz / 10) /* Default sched quantum */ -static struct callout roundrobin_callout; - -static struct thread *sched_choose(void); - static void setup_runqs(void); -static void roundrobin(void *arg); static void schedcpu(void); static void schedcpu_thread(void); static void sched_priority(struct thread *td, u_char prio); @@ -236,6 +219,12 @@ "account for htt"); #endif +#if 0 +static int sched_followon = 0; +SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, + &sched_followon, 0, + "allow threads to share a quantum"); +#endif static __inline void sched_load_add(void) @@ -258,36 +247,15 @@ maybe_resched(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority < curthread->td_priority) curthread->td_flags |= TDF_NEEDRESCHED; } /* - * Force switch among equal priority processes every 100ms. - * We don't actually need to force a context switch of the current process. - * The act of firing the event triggers a context switch to softclock() and - * then switching back out again which is equivalent to a preemption, thus - * no further work is needed on the local CPU. - */ -/* ARGSUSED */ -static void -roundrobin(void *arg) -{ - -#ifdef SMP - mtx_lock_spin(&sched_lock); - forward_roundrobin(); - mtx_unlock_spin(&sched_lock); -#endif - - callout_reset(&roundrobin_callout, sched_quantum, roundrobin, NULL); -} - -/* * Constants for digital decay and forget: * 90% of (td_estcpu) usage in 5 * loadav time - * 95% of (ke_pctcpu) usage in 60 seconds (load insensitive) + * 95% of (ts_pctcpu) usage in 60 seconds (load insensitive) * Note that, as ps(1) mentions, this can let percentages * total over 100% (I've seen 137.9% for 3 processes). * @@ -352,7 +320,7 @@ #define loadfactor(loadav) (2 * (loadav)) #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) -/* decay 95% of `ke_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ +/* decay 95% of `ts_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, ""); @@ -381,77 +349,70 @@ register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); struct thread *td; struct proc *p; - struct kse *ke; + struct td_sched *ts; int awake, realstathz; realstathz = stathz ? stathz : hz; sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { - /* - * Prevent state changes and protect run queue. - */ - mtx_lock_spin(&sched_lock); - /* - * Increment time in/out of memory. We ignore overflow; with - * 16-bit int's (remember them?) overflow takes 45 days. - */ - p->p_swtime++; + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { awake = 0; - ke = td->td_kse; + thread_lock(td); + ts = td->td_sched; /* * Increment sleep time (if sleeping). We * ignore overflow, as above. */ /* - * The kse slptimes are not touched in wakeup - * because the thread may not HAVE a KSE. + * The td_sched slptimes are not touched in wakeup + * because the thread may not HAVE everything in + * memory? XXX I think this is out of date. */ - if (ke->ke_state == KES_ONRUNQ) { + if (TD_ON_RUNQ(td)) { awake = 1; - ke->ke_flags &= ~KEF_DIDRUN; - } else if ((ke->ke_state == KES_THREAD) && - (TD_IS_RUNNING(td))) { + ts->ts_flags &= ~TSF_DIDRUN; + } else if (TD_IS_RUNNING(td)) { awake = 1; - /* Do not clear KEF_DIDRUN */ - } else if (ke->ke_flags & KEF_DIDRUN) { + /* Do not clear TSF_DIDRUN */ + } else if (ts->ts_flags & TSF_DIDRUN) { awake = 1; - ke->ke_flags &= ~KEF_DIDRUN; + ts->ts_flags &= ~TSF_DIDRUN; } /* - * ke_pctcpu is only for ps and ttyinfo(). - * Do it per kse, and add them up at the end? + * ts_pctcpu is only for ps and ttyinfo(). + * Do it per td_sched, and add them up at the end? * XXXKSE */ - ke->ke_pctcpu = (ke->ke_pctcpu * ccpu) >> - FSHIFT; + ts->ts_pctcpu = (ts->ts_pctcpu * ccpu) >> FSHIFT; /* - * If the kse has been idle the entire second, + * If the td_sched has been idle the entire second, * stop recalculating its priority until * it wakes up. */ - if (ke->ke_cpticks == 0) - continue; + if (ts->ts_cpticks != 0) { #if (FSHIFT >= CCPU_SHIFT) - ke->ke_pctcpu += (realstathz == 100) - ? ((fixpt_t) ke->ke_cpticks) << - (FSHIFT - CCPU_SHIFT) : - 100 * (((fixpt_t) ke->ke_cpticks) - << (FSHIFT - CCPU_SHIFT)) / realstathz; + ts->ts_pctcpu += (realstathz == 100) + ? ((fixpt_t) ts->ts_cpticks) << + (FSHIFT - CCPU_SHIFT) : + 100 * (((fixpt_t) ts->ts_cpticks) + << (FSHIFT - CCPU_SHIFT)) / realstathz; #else - ke->ke_pctcpu += ((FSCALE - ccpu) * - (ke->ke_cpticks * - FSCALE / realstathz)) >> FSHIFT; + ts->ts_pctcpu += ((FSCALE - ccpu) * + (ts->ts_cpticks * + FSCALE / realstathz)) >> FSHIFT; #endif - ke->ke_cpticks = 0; - + ts->ts_cpticks = 0; + } /* * If there are ANY running threads in this process, * then don't count it as sleeping. +XXX this is broken + */ if (awake) { - if (td->td_slptime > 1) { + if (ts->ts_slptime > 1) { /* * In an ideal world, this should not * happen, because whoever woke us @@ -463,16 +424,19 @@ */ updatepri(td); } - td->td_slptime = 0; + ts->ts_slptime = 0; } else - td->td_slptime++; - if (td->td_slptime > 1) + ts->ts_slptime++; + if (ts->ts_slptime > 1) { + thread_unlock(td); continue; + } td->td_estcpu = decay_cpu(loadfac, td->td_estcpu); resetpriority(td); resetpriority_thread(td); + thread_unlock(td); } /* end of thread loop */ - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } /* end of process loop */ sx_sunlock(&allproc_lock); } @@ -483,11 +447,10 @@ static void schedcpu_thread(void) { - int nowake; for (;;) { schedcpu(); - tsleep(&nowake, 0, "-", hz); + pause("-", hz); } } @@ -499,16 +462,18 @@ static void updatepri(struct thread *td) { - register fixpt_t loadfac; - register unsigned int newcpu; + struct td_sched *ts; + fixpt_t loadfac; + unsigned int newcpu; + ts = td->td_sched; loadfac = loadfactor(averunnable.ldavg[0]); - if (td->td_slptime > 5 * loadfac) + if (ts->ts_slptime > 5 * loadfac) td->td_estcpu = 0; else { newcpu = td->td_estcpu; - td->td_slptime--; /* was incremented in schedcpu() */ - while (newcpu && --td->td_slptime) + ts->ts_slptime--; /* was incremented in schedcpu() */ + while (newcpu && --ts->ts_slptime) newcpu = decay_cpu(loadfac, newcpu); td->td_estcpu = newcpu; } @@ -529,12 +494,12 @@ NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN); newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), PRI_MAX_TIMESHARE); - td->td_user_pri = newpriority; + sched_user_prio(td, newpriority); } } /* - * Update the thread's priority when the associated ksegroup's user + * Update the thread's priority when the associated process's user * priority changes. */ static void @@ -562,11 +527,6 @@ sched_quantum = SCHED_QUANTUM; hogticks = 2 * sched_quantum; - callout_init(&roundrobin_callout, CALLOUT_MPSAFE); - - /* Kick off timeout driven events by calling first time. */ - roundrobin(NULL); - /* Account for thread0. */ sched_load_add(); } @@ -585,9 +545,10 @@ * Set up the scheduler specific parts of proc0. */ proc0.p_sched = NULL; /* XXX */ - thread0.td_sched = &kse0; - kse0.ke_thread = &thread0; - kse0.ke_state = KES_THREAD; + thread0.td_sched = &td_sched0; + thread0.td_lock = &sched_lock; + td_sched0.ts_thread = &thread0; + mtx_init(&sched_lock, "sched lock", NULL, MTX_SPIN | MTX_RECURSE); } int @@ -625,17 +586,25 @@ void sched_clock(struct thread *td) { - struct kse *ke; + struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); - ke = td->td_kse; + THREAD_LOCK_ASSERT(td, MA_OWNED); + ts = td->td_sched; - ke->ke_cpticks++; + ts->ts_cpticks++; td->td_estcpu = ESTCPULIM(td->td_estcpu + 1); if ((td->td_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { resetpriority(td); resetpriority_thread(td); } + + /* + * Force a context switch if the current thread has used up a full + * quantum (default quantum is 100ms). + */ + if (!TD_IS_IDLETHREAD(td) && + ticks - PCPU_GET(switchticks) >= sched_quantum) + td->td_flags |= TDF_NEEDRESCHED; } /* @@ -644,20 +613,39 @@ void sched_exit(struct proc *p, struct thread *td) { - struct thread *parent = FIRST_THREAD_IN_PROC(p); CTR3(KTR_SCHED, "sched_exit: %p(%s) prio %d", - td, td->td_proc->p_comm, td->td_priority); + td, td->td_name, td->td_priority); + PROC_SLOCK_ASSERT(p, MA_OWNED); + sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); +} + +void +sched_exit_thread(struct thread *td, struct thread *child) +{ - parent->td_estcpu = ESTCPULIM(parent->td_estcpu + td->td_estcpu); - if ((td->td_proc->p_flag & P_NOLOAD) == 0) + CTR3(KTR_SCHED, "sched_exit_thread: %p(%s) prio %d", + child, child->td_name, child->td_priority); + thread_lock(td); + td->td_estcpu = ESTCPULIM(td->td_estcpu + child->td_estcpu); + thread_unlock(td); + mtx_lock_spin(&sched_lock); + if ((child->td_proc->p_flag & P_NOLOAD) == 0) sched_load_rem(); + mtx_unlock_spin(&sched_lock); } void sched_fork(struct thread *td, struct thread *childtd) { + sched_fork_thread(td, childtd); +} + +void +sched_fork_thread(struct thread *td, struct thread *childtd) +{ childtd->td_estcpu = td->td_estcpu; + childtd->td_lock = &sched_lock; sched_newthread(childtd); } @@ -667,18 +655,20 @@ struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED); + PROC_SLOCK_ASSERT(p, MA_OWNED); p->p_nice = nice; FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); resetpriority(td); resetpriority_thread(td); + thread_unlock(td); } } void sched_class(struct thread *td, int class) { - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_pri_class = class; } @@ -689,16 +679,17 @@ sched_priority(struct thread *td, u_char prio) { CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", - td, td->td_proc->p_comm, td->td_priority, prio, curthread, - curthread->td_proc->p_comm); + td, td->td_name, td->td_priority, prio, curthread, + curthread->td_name); - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority == prio) return; - if (TD_ON_RUNQ(td)) { - adjustrunqueue(td, prio); - } else { - td->td_priority = prio; + td->td_priority = prio; + if (TD_ON_RUNQ(td) && + td->td_sched->ts_rqindex != (prio / RQ_PPQ)) { + sched_rem(td); + sched_add(td, SRQ_BORING); } } @@ -767,26 +758,78 @@ } void +sched_user_prio(struct thread *td, u_char prio) +{ + u_char oldprio; + + td->td_base_user_pri = prio; + if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) + return; + oldprio = td->td_user_pri; + td->td_user_pri = prio; + + if (TD_ON_UPILOCK(td) && oldprio != prio) + umtx_pi_adjust(td, oldprio); +} + +void +sched_lend_user_prio(struct thread *td, u_char prio) +{ + u_char oldprio; + + td->td_flags |= TDF_UBORROWING; + + oldprio = td->td_user_pri; + td->td_user_pri = prio; + + if (TD_ON_UPILOCK(td) && oldprio != prio) + umtx_pi_adjust(td, oldprio); +} + +void +sched_unlend_user_prio(struct thread *td, u_char prio) +{ + u_char base_pri; + + base_pri = td->td_base_user_pri; + if (prio >= base_pri) { + td->td_flags &= ~TDF_UBORROWING; + sched_user_prio(td, base_pri); + } else + sched_lend_user_prio(td, prio); +} + +void sched_sleep(struct thread *td) { - mtx_assert(&sched_lock, MA_OWNED); - td->td_slptime = 0; + THREAD_LOCK_ASSERT(td, MA_OWNED); + td->td_slptick = ticks; + td->td_sched->ts_slptime = 0; } void sched_switch(struct thread *td, struct thread *newtd, int flags) { - struct kse *ke; + struct td_sched *ts; struct proc *p; - ke = td->td_kse; + ts = td->td_sched; p = td->td_proc; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); + /* + * Switch to the sched lock to fix things up and pick + * a new thread. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_unlock(td); + } if ((p->p_flag & P_NOLOAD) == 0) sched_load_rem(); + if (newtd) newtd->td_flags |= (td->td_flags & TDF_NEEDRESCHED); @@ -800,12 +843,15 @@ * or stopped or any thing else similar. We never put the idle * threads on the run queue, however. */ - if (td == PCPU_GET(idlethread)) + if (td->td_flags & TDF_IDLETD) { TD_SET_CAN_RUN(td); - else { +#ifdef SMP + idle_cpus_mask &= ~PCPU_GET(cpumask); +#endif + } else { if (TD_IS_RUNNING(td)) { - /* Put us back on the run queue (kse and all). */ - setrunqueue(td, (flags & SW_PREEMPT) ? + /* Put us back on the run queue. */ + sched_add(td, (flags & SW_PREEMPT) ? SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED : SRQ_OURSELF|SRQ_YIELDING); } @@ -816,45 +862,72 @@ * as if it had been added to the run queue and selected. * It came from: * * A preemption + * * An upcall * * A followon */ KASSERT((newtd->td_inhibitors == 0), - ("trying to run inhibitted thread")); - newtd->td_kse->ke_flags |= KEF_DIDRUN; + ("trying to run inhibited thread")); + newtd->td_sched->ts_flags |= TSF_DIDRUN; TD_SET_RUNNING(newtd); if ((newtd->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); } else { newtd = choosethread(); } + MPASS(newtd->td_lock == &sched_lock); if (td != newtd) { #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif - cpu_switch(td, newtd); + + /* I feel sleepy */ + cpu_switch(td, newtd, td->td_lock); + /* + * Where am I? What year is it? + * We are in the same thread that went to sleep above, + * but any amount of time may have passed. All out context + * will still be available as will local variables. + * PCPU values however may have changed as we may have + * changed CPU so don't trust cached values of them. + * New threads will go to fork_exit() instead of here + * so if you change things here you may need to change + * things there too. + * If the thread above was exiting it will never wake + * up again here, so either it has saved everything it + * needed to, or the thread_wait() or wait() will + * need to reap it. + */ #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); #endif } +#ifdef SMP + if (td->td_flags & TDF_IDLETD) + idle_cpus_mask |= PCPU_GET(cpumask); +#endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); + MPASS(td->td_lock == &sched_lock); } void sched_wakeup(struct thread *td) { + struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); - if (td->td_slptime > 1) { + THREAD_LOCK_ASSERT(td, MA_OWNED); + ts = td->td_sched; + if (ts->ts_slptime > 1) { updatepri(td); resetpriority(td); } - td->td_slptime = 0; - setrunqueue(td, SRQ_BORING); + td->td_slptick = ticks; + ts->ts_slptime = 0; + sched_add(td, SRQ_BORING); } #ifdef SMP @@ -984,45 +1057,56 @@ sched_add(struct thread *td, int flags) #ifdef SMP { - struct kse *ke; + struct td_sched *ts; int forwarded = 0; int cpu; int single_cpu = 0; - ke = td->td_kse; - mtx_assert(&sched_lock, MA_OWNED); - KASSERT(ke->ke_state != KES_ONRUNQ, - ("sched_add: kse %p (%s) already in run queue", ke, - td->td_proc->p_comm)); - KASSERT(td->td_proc->p_sflag & PS_INMEM, - ("sched_add: process swapped out")); + ts = td->td_sched; + THREAD_LOCK_ASSERT(td, MA_OWNED); + KASSERT((td->td_inhibitors == 0), + ("sched_add: trying to run inhibited thread")); + KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), + ("sched_add: bad thread state")); + KASSERT(td->td_flags & TDF_INMEM, + ("sched_add: thread swapped out")); CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", - td, td->td_proc->p_comm, td->td_priority, curthread, - curthread->td_proc->p_comm); + td, td->td_name, td->td_priority, curthread, + curthread->td_name); + /* + * Now that the thread is moving to the run-queue, set the lock + * to the scheduler's lock. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_lock_set(td, &sched_lock); + } + TD_SET_RUNQ(td); if (td->td_pinned != 0) { cpu = td->td_lastcpu; - ke->ke_runq = &runq_pcpu[cpu]; + ts->ts_runq = &runq_pcpu[cpu]; single_cpu = 1; CTR3(KTR_RUNQ, - "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu); - } else if ((ke)->ke_flags & KEF_BOUND) { + "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu); + } else if ((ts)->ts_flags & TSF_BOUND) { /* Find CPU from bound runq */ - KASSERT(SKE_RUNQ_PCPU(ke),("sched_add: bound kse not on cpu runq")); - cpu = ke->ke_runq - &runq_pcpu[0]; + KASSERT(SKE_RUNQ_PCPU(ts),("sched_add: bound td_sched not on cpu runq")); + cpu = ts->ts_runq - &runq_pcpu[0]; single_cpu = 1; CTR3(KTR_RUNQ, - "sched_add: Put kse:%p(td:%p) on cpu%d runq", ke, td, cpu); + "sched_add: Put td_sched:%p(td:%p) on cpu%d runq", ts, td, cpu); } else { CTR2(KTR_RUNQ, - "sched_add: adding kse:%p (td:%p) to gbl runq", ke, td); + "sched_add: adding td_sched:%p (td:%p) to gbl runq", ts, td); cpu = NOCPU; - ke->ke_runq = &runq; + ts->ts_runq = &runq; } if (single_cpu && (cpu != PCPU_GET(cpuid))) { kick_other_cpu(td->td_priority,cpu); } else { + if (!single_cpu) { cpumask_t me = PCPU_GET(cpumask); int idle = idle_cpus_mask & me; @@ -1031,6 +1115,7 @@ (idle_cpus_mask & ~(hlt_cpus_mask | me))) forwarded = forward_wakeup(cpu); } + if (!forwarded) { if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td)) return; @@ -1041,24 +1126,33 @@ if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); - runq_add(ke->ke_runq, ke, flags); - ke->ke_state = KES_ONRUNQ; + runq_add(ts->ts_runq, ts, flags); } #else /* SMP */ { - struct kse *ke; - ke = td->td_kse; - mtx_assert(&sched_lock, MA_OWNED); - KASSERT(ke->ke_state != KES_ONRUNQ, - ("sched_add: kse %p (%s) already in run queue", ke, - td->td_proc->p_comm)); - KASSERT(td->td_proc->p_sflag & PS_INMEM, - ("sched_add: process swapped out")); + struct td_sched *ts; + ts = td->td_sched; + THREAD_LOCK_ASSERT(td, MA_OWNED); + KASSERT((td->td_inhibitors == 0), + ("sched_add: trying to run inhibited thread")); + KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), + ("sched_add: bad thread state")); + KASSERT(td->td_flags & TDF_INMEM, + ("sched_add: thread swapped out")); CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)", - td, td->td_proc->p_comm, td->td_priority, curthread, - curthread->td_proc->p_comm); - CTR2(KTR_RUNQ, "sched_add: adding kse:%p (td:%p) to runq", ke, td); - ke->ke_runq = &runq; + td, td->td_name, td->td_priority, curthread, + curthread->td_name); + /* + * Now that the thread is moving to the run-queue, set the lock + * to the scheduler's lock. + */ + if (td->td_lock != &sched_lock) { + mtx_lock_spin(&sched_lock); + thread_lock_set(td, &sched_lock); + } + TD_SET_RUNQ(td); + CTR2(KTR_RUNQ, "sched_add: adding td_sched:%p (td:%p) to runq", ts, td); + ts->ts_runq = &runq; /* * If we are yielding (on the way out anyhow) @@ -1077,70 +1171,30 @@ } if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_add(); - runq_add(ke->ke_runq, ke, flags); - ke->ke_state = KES_ONRUNQ; + runq_add(ts->ts_runq, ts, flags); maybe_resched(td); } #endif /* SMP */ void -sched_run_ithread(struct thread *td) -{ - struct kse *ke = td->td_kse; - - /* Inline of setrunqueue */ - CTR2(KTR_RUNQ, "sched_run_ithread: td:%p pid:%d", - td, td->td_proc->p_pid); - CTR5(KTR_SCHED, "sched_run_ithread: %p(%s) prio %d by %p(%s)", - td, td->td_proc->p_comm, td->td_priority, ctd, - ctd->td_proc->p_comm); - mtx_assert(&sched_lock, MA_OWNED); - KASSERT((td->td_inhibitors == 0), - ("sched_run_ithread: trying to run inhibitted thread")); - KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)), - ("sched_run_ithread: bad thread state")); - KASSERT(ke->ke_state != KES_ONRUNQ, - ("sched_run_ithread: kse %p (%s) already in run queue", ke, - td->td_proc->p_comm)); - KASSERT(td->td_proc->p_sflag & PS_INMEM, - ("sched_run_ithread: process swapped out")); - CTR5(KTR_SCHED, "sched_run_ithread: %p(%s) prio %d by %p(%s)", - td, td->td_proc->p_comm, td->td_priority, curthread, - curthread->td_proc->p_comm); - CTR2(KTR_RUNQ, "sched_run_ithread: adding kse:%p (td:%p) to runq", ke, td); - - TD_SET_RUNQ(td); - ke->ke_runq = &runq; - /* Preempt if we can. If we did, we're finished */ - if (maybe_preempt(td)) - return; - /* We didn't preempt. Place on runq */ - if ((td->td_proc->p_flag & P_NOLOAD) == 0) - sched_load_add(); - runq_add(ke->ke_runq, ke, SRQ_INTR); - ke->ke_state = KES_ONRUNQ; - maybe_resched(td); -} - -void sched_rem(struct thread *td) { - struct kse *ke; + struct td_sched *ts; - ke = td->td_kse; - KASSERT(td->td_proc->p_sflag & PS_INMEM, - ("sched_rem: process swapped out")); - KASSERT((ke->ke_state == KES_ONRUNQ), - ("sched_rem: KSE not on run queue")); + ts = td->td_sched; + KASSERT(td->td_flags & TDF_INMEM, + ("sched_rem: thread swapped out")); + KASSERT(TD_ON_RUNQ(td), + ("sched_rem: thread not on run queue")); mtx_assert(&sched_lock, MA_OWNED); CTR5(KTR_SCHED, "sched_rem: %p(%s) prio %d by %p(%s)", - td, td->td_proc->p_comm, td->td_priority, curthread, - curthread->td_proc->p_comm); + td, td->td_name, td->td_priority, curthread, + curthread->td_name); if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_load_rem(); - runq_remove(ke->ke_runq, ke); - ke->ke_state = KES_THREAD; + runq_remove(ts->ts_runq, ts); + TD_SET_CAN_RUN(td); } /* @@ -1150,58 +1204,83 @@ struct thread * sched_choose(void) { - struct kse *ke; + struct td_sched *ts; struct runq *rq; + mtx_assert(&sched_lock, MA_OWNED); #ifdef SMP - struct kse *kecpu; + struct td_sched *kecpu; rq = &runq; - ke = runq_choose(&runq); + ts = runq_choose(&runq); kecpu = runq_choose(&runq_pcpu[PCPU_GET(cpuid)]); - if (ke == NULL || + if (ts == NULL || (kecpu != NULL && - kecpu->ke_thread->td_priority < ke->ke_thread->td_priority)) { - CTR2(KTR_RUNQ, "choosing kse %p from pcpu runq %d", kecpu, + kecpu->ts_thread->td_priority < ts->ts_thread->td_priority)) { + CTR2(KTR_RUNQ, "choosing td_sched %p from pcpu runq %d", kecpu, PCPU_GET(cpuid)); - ke = kecpu; + ts = kecpu; rq = &runq_pcpu[PCPU_GET(cpuid)]; } else { - CTR1(KTR_RUNQ, "choosing kse %p from main runq", ke); + CTR1(KTR_RUNQ, "choosing td_sched %p from main runq", ts); } #else rq = &runq; - ke = runq_choose(&runq); + ts = runq_choose(&runq); #endif - if (ke) { - runq_remove(rq, ke); - ke->ke_state = KES_THREAD; + if (ts) { + runq_remove(rq, ts); + ts->ts_flags |= TSF_DIDRUN; + + KASSERT(ts->ts_thread->td_flags & TDF_INMEM, + ("sched_choose: thread swapped out")); + return (ts->ts_thread); + } + return (PCPU_GET(idlethread)); +} - KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM, - ("sched_choose: process swapped out")); - return (ke->ke_thread); +void +sched_userret(struct thread *td) +{ + /* + * XXX we cheat slightly on the locking here to avoid locking in + * the usual case. Setting td_priority here is essentially an + * incomplete workaround for not setting it properly elsewhere. + * Now that some interrupt handlers are threads, not setting it + * properly elsewhere can clobber it in the window between setting + * it here and returning to user mode, so don't waste time setting + * it perfectly here. + */ + KASSERT((td->td_flags & TDF_BORROWING) == 0, + ("thread with borrowed priority returning to userland")); + if (td->td_priority != td->td_user_pri) { + thread_lock(td); + td->td_priority = td->td_user_pri; + td->td_base_pri = td->td_user_pri; + thread_unlock(td); } - return (NULL); } void sched_bind(struct thread *td, int cpu) { - struct kse *ke; + struct td_sched *ts; - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("sched_bind: cannot bind non-running thread")); - ke = td->td_kse; - ke->ke_flags |= KEF_BOUND; + + ts = td->td_sched; + + ts->ts_flags |= TSF_BOUND; #ifdef SMP >>> TRUNCATED FOR MAIL (1000 lines) <<<