Date: Thu, 30 Dec 2004 20:59:52 GMT From: John Baldwin <jhb@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 67928 for review Message-ID: <200412302059.iBUKxqHh045296@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=67928 Change 67928 by jhb@jhb_slimer on 2004/12/30 20:59:50 IFC @67927. Loop back priority inversion fixes. Affected files ... .. //depot/projects/smpng/sys/compat/ndis/subr_hal.c#9 integrate .. //depot/projects/smpng/sys/dev/md/md.c#61 integrate .. //depot/projects/smpng/sys/geom/geom_kern.c#19 integrate .. //depot/projects/smpng/sys/geom/mirror/g_mirror.c#15 integrate .. //depot/projects/smpng/sys/geom/raid3/g_raid3.c#12 integrate .. //depot/projects/smpng/sys/kern/kern_intr.c#64 integrate .. //depot/projects/smpng/sys/kern/kern_resource.c#53 integrate .. //depot/projects/smpng/sys/kern/kern_synch.c#88 integrate .. //depot/projects/smpng/sys/kern/sched_4bsd.c#45 integrate .. //depot/projects/smpng/sys/kern/sched_ule.c#49 integrate .. //depot/projects/smpng/sys/kern/subr_trap.c#75 integrate .. //depot/projects/smpng/sys/kern/subr_turnstile.c#18 integrate .. //depot/projects/smpng/sys/sys/proc.h#137 integrate .. //depot/projects/smpng/sys/sys/sched.h#18 integrate .. //depot/projects/smpng/sys/sys/turnstile.h#5 integrate Differences ... ==== //depot/projects/smpng/sys/compat/ndis/subr_hal.c#9 (text+ko) ==== @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/compat/ndis/subr_hal.c,v 1.13 2004/08/01 20:04:30 wpaul Exp $"); +__FBSDID("$FreeBSD: src/sys/compat/ndis/subr_hal.c,v 1.14 2004/12/30 20:29:58 jhb Exp $"); #include <sys/param.h> #include <sys/types.h> @@ -308,7 +308,6 @@ mtx_lock_spin(&sched_lock); oldirql = curthread->td_base_pri; sched_prio(curthread, PI_REALTIME); - curthread->td_base_pri = PI_REALTIME; mtx_unlock_spin(&sched_lock); return(oldirql); @@ -324,7 +323,6 @@ panic("IRQL_NOT_GREATER_THAN"); mtx_lock_spin(&sched_lock); - curthread->td_base_pri = oldirql; sched_prio(curthread, oldirql); mtx_unlock_spin(&sched_lock); ==== //depot/projects/smpng/sys/dev/md/md.c#61 (text+ko) ==== @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $FreeBSD: src/sys/dev/md/md.c,v 1.146 2004/12/27 17:20:06 pjd Exp $ + * $FreeBSD: src/sys/dev/md/md.c,v 1.147 2004/12/30 20:29:58 jhb Exp $ * */ @@ -71,6 +71,7 @@ #include <sys/namei.h> #include <sys/proc.h> #include <sys/queue.h> +#include <sys/sched.h> #include <sys/sf_buf.h> #include <sys/sysctl.h> #include <sys/vnode.h> @@ -618,7 +619,9 @@ int error, hasgiant; sc = arg; - curthread->td_base_pri = PRIBIO; + mtx_lock_spin(&sched_lock); + sched_prio(curthread, PRIBIO); + mtx_unlock_spin(&sched_lock); switch (sc->type) { case MD_VNODE: ==== //depot/projects/smpng/sys/geom/geom_kern.c#19 (text+ko) ==== @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/geom/geom_kern.c,v 1.35 2004/09/13 14:58:27 pjd Exp $"); +__FBSDID("$FreeBSD: src/sys/geom/geom_kern.c,v 1.36 2004/12/30 20:29:58 jhb Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -47,8 +47,9 @@ #include <sys/kthread.h> #include <sys/lock.h> #include <sys/mutex.h> +#include <sys/sbuf.h> +#include <sys/sched.h> #include <sys/sx.h> -#include <sys/sbuf.h> #include <geom/geom.h> #include <geom/geom_int.h> @@ -87,7 +88,9 @@ struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - tp->td_base_pri = PRIBIO; + mtx_lock_spin(&sched_lock); + sched_prio(tp, PRIBIO); + mtx_unlock_spin(&sched_lock); for(;;) { g_io_schedule_up(tp); } @@ -108,7 +111,9 @@ struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - tp->td_base_pri = PRIBIO; + mtx_lock_spin(&sched_lock); + sched_prio(tp, PRIBIO); + mtx_unlock_spin(&sched_lock); for(;;) { g_io_schedule_down(tp); } @@ -129,7 +134,9 @@ struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - tp->td_base_pri = PRIBIO; + mtx_lock_spin(&sched_lock); + sched_prio(tp, PRIBIO); + mtx_unlock_spin(&sched_lock); for(;;) { g_run_events(); tsleep(&g_wait_event, PRIBIO, "-", hz/10); ==== //depot/projects/smpng/sys/geom/mirror/g_mirror.c#15 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror.c,v 1.54 2004/12/23 21:15:15 pjd Exp $"); +__FBSDID("$FreeBSD: src/sys/geom/mirror/g_mirror.c,v 1.55 2004/12/30 20:29:58 jhb Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -42,6 +42,7 @@ #include <geom/geom.h> #include <sys/proc.h> #include <sys/kthread.h> +#include <sys/sched.h> #include <geom/mirror/g_mirror.h> @@ -1478,7 +1479,9 @@ u_int nreqs; sc = arg; - curthread->td_base_pri = PRIBIO; + mtx_lock_spin(&sched_lock); + sched_prio(curthread, PRIBIO); + mtx_unlock_spin(&sched_lock); nreqs = 0; for (;;) { ==== //depot/projects/smpng/sys/geom/raid3/g_raid3.c#12 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3.c,v 1.33 2004/12/28 21:52:45 pjd Exp $"); +__FBSDID("$FreeBSD: src/sys/geom/raid3/g_raid3.c,v 1.34 2004/12/30 20:29:58 jhb Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -43,6 +43,7 @@ #include <geom/geom.h> #include <sys/proc.h> #include <sys/kthread.h> +#include <sys/sched.h> #include <geom/raid3/g_raid3.h> @@ -1729,7 +1730,9 @@ u_int nreqs; sc = arg; - curthread->td_base_pri = PRIBIO; + mtx_lock_spin(&sched_lock); + sched_prio(curthread, PRIBIO); + mtx_unlock_spin(&sched_lock); nreqs = 0; for (;;) { ==== //depot/projects/smpng/sys/kern/kern_intr.c#64 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_intr.c,v 1.120 2004/11/17 14:39:41 jhb Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/kern_intr.c,v 1.121 2004/12/30 20:29:58 jhb Exp $"); #include "opt_ddb.h" @@ -45,6 +45,7 @@ #include <sys/proc.h> #include <sys/random.h> #include <sys/resourcevar.h> +#include <sys/sched.h> #include <sys/sysctl.h> #include <sys/unistd.h> #include <sys/vmmeter.h> @@ -143,14 +144,12 @@ ih = TAILQ_FIRST(&ithd->it_handlers); if (ih == NULL) { mtx_lock_spin(&sched_lock); - td->td_priority = PRI_MAX_ITHD; - td->td_base_pri = PRI_MAX_ITHD; + sched_prio(td, PRI_MAX_ITHD); mtx_unlock_spin(&sched_lock); return; } mtx_lock_spin(&sched_lock); - td->td_priority = ih->ih_pri; - td->td_base_pri = ih->ih_pri; + sched_prio(td, ih->ih_pri); mtx_unlock_spin(&sched_lock); missed = 0; TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) { ==== //depot/projects/smpng/sys/kern/kern_resource.c#53 (text+ko) ==== @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_resource.c,v 1.146 2004/10/05 18:51:11 jhb Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/kern_resource.c,v 1.147 2004/12/30 20:29:58 jhb Exp $"); #include "opt_compat.h" @@ -423,7 +423,6 @@ } sched_class(kg, rtp->type); if (curthread->td_ksegrp == kg) { - curthread->td_base_pri = kg->kg_user_pri; sched_prio(curthread, kg->kg_user_pri); /* XXX dubious */ } return (0); ==== //depot/projects/smpng/sys/kern/kern_synch.c#88 (text+ko) ==== @@ -35,7 +35,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_synch.c,v 1.266 2004/12/26 00:14:21 jeff Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/kern_synch.c,v 1.267 2004/12/30 20:29:58 jhb Exp $"); #include "opt_ktrace.h" @@ -212,8 +212,6 @@ /* * Adjust this thread's priority. - * - * XXX: do we need to save priority in td_base_pri? */ mtx_lock_spin(&sched_lock); sched_prio(td, priority & PRIMASK); ==== //depot/projects/smpng/sys/kern/sched_4bsd.c#45 (text+ko) ==== @@ -33,7 +33,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.70 2004/12/26 00:16:24 jeff Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.71 2004/12/30 20:52:44 jhb Exp $"); #define kse td_sched @@ -50,6 +50,7 @@ #include <sys/smp.h> #include <sys/sysctl.h> #include <sys/sx.h> +#include <sys/turnstile.h> #include <machine/smp.h> /* @@ -159,10 +160,12 @@ static void roundrobin(void *arg); static void schedcpu(void); static void schedcpu_thread(void); +static void sched_priority(struct thread *td, u_char prio); static void sched_setup(void *dummy); static void maybe_resched(struct thread *td); static void updatepri(struct ksegrp *kg); static void resetpriority(struct ksegrp *kg); +static void resetpriority_thread(struct thread *td, struct ksegrp *kg); #ifdef SMP static int forward_wakeup(int cpunum); #endif @@ -516,9 +519,7 @@ kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); resetpriority(kg); FOREACH_THREAD_IN_GROUP(kg, td) { - if (td->td_priority >= PUSER) { - sched_prio(td, kg->kg_user_pri); - } + resetpriority_thread(td, kg); } } /* end of ksegrp loop */ mtx_unlock_spin(&sched_lock); @@ -561,7 +562,6 @@ newcpu = decay_cpu(loadfac, newcpu); kg->kg_estcpu = newcpu; } - resetpriority(kg); } /* @@ -573,7 +573,6 @@ resetpriority(struct ksegrp *kg) { register unsigned int newpriority; - struct thread *td; if (kg->kg_pri_class == PRI_TIMESHARE) { newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + @@ -582,9 +581,25 @@ PRI_MAX_TIMESHARE); kg->kg_user_pri = newpriority; } - FOREACH_THREAD_IN_GROUP(kg, td) { - maybe_resched(td); /* XXXKSE silly */ - } +} + +/* + * Update the thread's priority when the associated ksegroup's user + * priority changes. + */ +static void +resetpriority_thread(struct thread *td, struct ksegrp *kg) +{ + + /* Only change threads with a time sharing user priority. */ + if (td->td_priority < PRI_MIN_TIMESHARE || + td->td_priority > PRI_MAX_TIMESHARE) + return; + + /* XXX the whole needresched thing is broken, but not silly. */ + maybe_resched(td); + + sched_prio(td, kg->kg_user_pri); } /* ARGSUSED */ @@ -674,8 +689,7 @@ kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { resetpriority(kg); - if (td->td_priority >= PUSER) - td->td_priority = kg->kg_user_pri; + resetpriority_thread(td, kg); } } @@ -735,12 +749,16 @@ sched_nice(struct proc *p, int nice) { struct ksegrp *kg; + struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); mtx_assert(&sched_lock, MA_OWNED); p->p_nice = nice; FOREACH_KSEGRP_IN_PROC(p, kg) { resetpriority(kg); + FOREACH_THREAD_IN_GROUP(kg, td) { + resetpriority_thread(td, kg); + } } } @@ -757,14 +775,16 @@ * changing the assignment of a kse to the thread, * and moving a KSE in the system run queue. */ -void -sched_prio(struct thread *td, u_char prio) +static void +sched_priority(struct thread *td, u_char prio) { CTR6(KTR_SCHED, "sched_prio: %p(%s) prio %d newprio %d by %p(%s)", td, td->td_proc->p_comm, td->td_priority, prio, curthread, curthread->td_proc->p_comm); mtx_assert(&sched_lock, MA_OWNED); + if (td->td_priority == prio) + return; if (TD_ON_RUNQ(td)) { adjustrunqueue(td, prio); } else { @@ -772,13 +792,76 @@ } } +/* + * Update a thread's priority when it is lent another thread's + * priority. + */ +void +sched_lend_prio(struct thread *td, u_char prio) +{ + + td->td_flags |= TDF_BORROWING; + sched_priority(td, prio); +} + +/* + * Restore a thread's priority when priority propagation is + * over. The prio argument is the minimum priority the thread + * needs to have to satisfy other possible priority lending + * requests. If the thread's regulary priority is less + * important than prio the thread will keep a priority boost + * of prio. + */ +void +sched_unlend_prio(struct thread *td, u_char prio) +{ + u_char base_pri; + + if (td->td_base_pri >= PRI_MIN_TIMESHARE && + td->td_base_pri <= PRI_MAX_TIMESHARE) + base_pri = td->td_ksegrp->kg_user_pri; + else + base_pri = td->td_base_pri; + if (prio >= base_pri) { + td->td_flags &= ~TDF_BORROWING; + sched_prio(td, base_pri); + } else + sched_lend_prio(td, prio); +} + void +sched_prio(struct thread *td, u_char prio) +{ + u_char oldprio; + + /* First, update the base priority. */ + td->td_base_pri = prio; + + /* + * If the thread is borrowing another thread's priority, don't ever + * lower the priority. + */ + if (td->td_flags & TDF_BORROWING && td->td_priority < prio) + return; + + /* Change the real priority. */ + oldprio = td->td_priority; + sched_priority(td, prio); + + /* + * If the thread is on a turnstile, then let the turnstile update + * its state. + */ + if (TD_ON_LOCK(td) && oldprio != prio) + turnstile_adjust(td, oldprio); +} + +void sched_sleep(struct thread *td) { mtx_assert(&sched_lock, MA_OWNED); td->td_ksegrp->kg_slptime = 0; - td->td_base_pri = td->td_priority; } static void remrunqueue(struct thread *td); @@ -889,8 +972,10 @@ mtx_assert(&sched_lock, MA_OWNED); kg = td->td_ksegrp; - if (kg->kg_slptime > 1) + if (kg->kg_slptime > 1) { updatepri(kg); + resetpriority(kg); + } kg->kg_slptime = 0; setrunqueue(td, SRQ_BORING); } @@ -1157,10 +1242,13 @@ * it here and returning to user mode, so don't waste time setting * it perfectly here. */ + KASSERT((td->td_flags & TDF_BORROWING) == 0, + ("thread with borrowed priority returning to userland")); kg = td->td_ksegrp; if (td->td_priority != kg->kg_user_pri) { mtx_lock_spin(&sched_lock); td->td_priority = kg->kg_user_pri; + td->td_base_pri = kg->kg_user_pri; mtx_unlock_spin(&sched_lock); } } ==== //depot/projects/smpng/sys/kern/sched_ule.c#49 (text+ko) ==== @@ -25,7 +25,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/sched_ule.c,v 1.143 2004/12/26 23:21:48 jeff Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/sched_ule.c,v 1.144 2004/12/30 20:52:44 jhb Exp $"); #include <opt_sched.h> @@ -46,6 +46,7 @@ #include <sys/sx.h> #include <sys/sysctl.h> #include <sys/sysproto.h> +#include <sys/turnstile.h> #include <sys/vmmeter.h> #ifdef KTRACE #include <sys/uio.h> @@ -133,8 +134,7 @@ #define KEF_XFERABLE 0x0004 /* Thread was added as transferable. */ #define KEF_HOLD 0x0008 /* Thread is temporarily bound. */ #define KEF_REMOVED 0x0010 /* Thread was removed while ASSIGNED */ -#define KEF_PRIOELEV 0x0020 /* Thread has had its prio elevated. */ -#define KEF_INTERNAL 0x0040 +#define KEF_INTERNAL 0x0020 struct kg_sched { struct thread *skg_last_assigned; /* (j) Last thread assigned to */ @@ -234,7 +234,7 @@ #define SCHED_INTERACTIVE(kg) \ (sched_interact_score(kg) < SCHED_INTERACT_THRESH) #define SCHED_CURR(kg, ke) \ - ((ke->ke_flags & KEF_PRIOELEV) || SCHED_INTERACTIVE(kg)) + ((ke->ke_thread->td_flags & TDF_BORROWING) || SCHED_INTERACTIVE(kg)) /* * Cpu percentage computation macros and defines. @@ -315,6 +315,7 @@ static struct kse *sched_choose(void); /* XXX Should be thread * */ static void sched_slice(struct kse *ke); static void sched_priority(struct ksegrp *kg); +static void sched_thread_priority(struct thread *td, u_char prio); static int sched_interact_score(struct ksegrp *kg); static void sched_interact_update(struct ksegrp *kg); static void sched_interact_fork(struct ksegrp *kg); @@ -1066,7 +1067,7 @@ kg = ke->ke_ksegrp; kseq = KSEQ_CPU(ke->ke_cpu); - if (ke->ke_flags & KEF_PRIOELEV) { + if (ke->ke_thread->td_flags & TDF_BORROWING) { ke->ke_slice = SCHED_SLICE_MIN; return; } @@ -1230,7 +1231,7 @@ } void -sched_prio(struct thread *td, u_char prio) +sched_thread_priority(struct thread *td, u_char prio) { struct kse *ke; @@ -1239,6 +1240,8 @@ curthread->td_proc->p_comm); ke = td->td_kse; mtx_assert(&sched_lock, MA_OWNED); + if (td->td_priority == prio) + return; if (TD_ON_RUNQ(td)) { /* * If the priority has been elevated due to priority @@ -1253,8 +1256,6 @@ ke->ke_runq = KSEQ_CPU(ke->ke_cpu)->ksq_curr; runq_add(ke->ke_runq, ke, 0); } - if (prio < td->td_priority) - ke->ke_flags |= KEF_PRIOELEV; /* * Hold this kse on this cpu so that sched_prio() doesn't * cause excessive migration. We only want migration to @@ -1267,7 +1268,71 @@ td->td_priority = prio; } +/* + * Update a thread's priority when it is lent another thread's + * priority. + */ +void +sched_lend_prio(struct thread *td, u_char prio) +{ + + td->td_flags |= TDF_BORROWING; + sched_thread_priority(td, prio); +} + +/* + * Restore a thread's priority when priority propagation is + * over. The prio argument is the minimum priority the thread + * needs to have to satisfy other possible priority lending + * requests. If the thread's regular priority is less + * important than prio, the thread will keep a priority boost + * of prio. + */ void +sched_unlend_prio(struct thread *td, u_char prio) +{ + u_char base_pri; + + if (td->td_base_pri >= PRI_MIN_TIMESHARE && + td->td_base_pri <= PRI_MAX_TIMESHARE) + base_pri = td->td_ksegrp->kg_user_pri; + else + base_pri = td->td_base_pri; + if (prio >= base_pri) { + td->td_flags &= ~ TDF_BORROWING; + sched_thread_priority(td, base_pri); + } else + sched_lend_prio(td, prio); +} + +void +sched_prio(struct thread *td, u_char prio) +{ + u_char oldprio; + + /* First, update the base priority. */ + td->td_base_pri = prio; + + /* + * If the therad is borrowing another thread's priority, don't + * ever lower the priority. + */ + if (td->td_flags & TDF_BORROWING && td->td_priority < prio) + return; + + /* Change the real priority. */ + oldprio = td->td_priority; + sched_thread_priority(td, prio); + + /* + * If the thread is on a turnstile, then let the turnstile update + * its state. + */ + if (TD_ON_LOCK(td) && oldprio != prio) + turnstile_adjust(td, oldprio); +} + +void sched_switch(struct thread *td, struct thread *newtd, int flags) { struct kseq *ksq; @@ -1374,7 +1439,6 @@ mtx_assert(&sched_lock, MA_OWNED); td->td_slptime = ticks; - td->td_base_pri = td->td_priority; } void @@ -1644,21 +1708,14 @@ sched_userret(struct thread *td) { struct ksegrp *kg; - struct kse *ke; - kg = td->td_ksegrp; - ke = td->td_kse; - - if (td->td_priority != kg->kg_user_pri || - ke->ke_flags & KEF_PRIOELEV) { + KASSERT((td->td_flags & TDF_BORROWING) == 0, + ("thread with borrowed priority returning to userland")); + kg = td->td_ksegrp; + if (td->td_priority != kg->kg_user_pri) { mtx_lock_spin(&sched_lock); td->td_priority = kg->kg_user_pri; - if (ke->ke_flags & KEF_PRIOELEV) { - ke->ke_flags &= ~KEF_PRIOELEV; - sched_slice(ke); - if (ke->ke_slice == 0) - mi_switch(SW_INVOL, NULL); - } + td->td_base_pri = kg->kg_user_pri; mtx_unlock_spin(&sched_lock); } } ==== //depot/projects/smpng/sys/kern/subr_trap.c#75 (text+ko) ==== @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/subr_trap.c,v 1.277 2004/12/26 07:30:35 jeff Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/subr_trap.c,v 1.278 2004/12/30 20:30:58 jhb Exp $"); #include "opt_ktrace.h" #include "opt_mac.h" @@ -128,6 +128,7 @@ ticks = td->td_sticks - oticks; addupc_task(td, TRAPF_PC(frame), (u_int)ticks * psratio); } + /* * Let the scheduler adjust our priority etc. */ ==== //depot/projects/smpng/sys/kern/subr_turnstile.c#18 (text+ko) ==== @@ -59,7 +59,7 @@ #include "opt_turnstile_profiling.h" #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/subr_turnstile.c,v 1.150 2004/10/12 18:36:20 jhb Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/subr_turnstile.c,v 1.151 2004/12/30 20:52:44 jhb Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -145,7 +145,9 @@ #ifdef TURNSTILE_PROFILING static void init_turnstile_profiling(void *arg); #endif -static void propagate_priority(struct thread *); +static void propagate_priority(struct thread *td); +static int turnstile_adjust_thread(struct turnstile *ts, + struct thread *td); static void turnstile_setowner(struct turnstile *ts, struct thread *owner); /* @@ -158,7 +160,6 @@ { struct turnstile_chain *tc; struct turnstile *ts; - struct thread *td1; int pri; mtx_assert(&sched_lock, MA_OWNED); @@ -187,8 +188,8 @@ * isn't SRUN or SLOCK. */ KASSERT(!TD_IS_SLEEPING(td), - ("sleeping thread (pid %d) owns a non-sleepable lock", - td->td_proc->p_pid)); + ("sleeping thread (tid %d) owns a non-sleepable lock", + td->td_tid)); /* * If this thread already has higher priority than the @@ -198,10 +199,16 @@ return; /* - * If lock holder is actually running, just bump priority. + * Bump this thread's priority. + */ + sched_lend_prio(td, pri); + + /* + * If lock holder is actually running or on the run queue + * then we are done. */ - if (TD_IS_RUNNING(td)) { - td->td_priority = pri; + if (TD_IS_RUNNING(td) || TD_ON_RUNQ(td)) { + MPASS(td->td_blocked == NULL); return; } @@ -214,27 +221,11 @@ #endif /* - * If on run queue move to new run queue, and quit. - * XXXKSE this gets a lot more complicated under threads - * but try anyhow. - */ - if (TD_ON_RUNQ(td)) { - MPASS(td->td_blocked == NULL); - sched_prio(td, pri); - return; - } - - /* - * Bump this thread's priority. - */ - td->td_priority = pri; - - /* * If we aren't blocked on a lock, we should be. */ KASSERT(TD_ON_LOCK(td), ( - "process %d(%s):%d holds %s but isn't blocked on a lock\n", - td->td_proc->p_pid, td->td_proc->p_comm, td->td_state, + "thread %d(%s):%d holds %s but isn't blocked on a lock\n", + td->td_tid, td->td_proc->p_comm, td->td_state, ts->ts_lockobj->lo_name)); /* @@ -245,61 +236,81 @@ tc = TC_LOOKUP(ts->ts_lockobj); mtx_lock_spin(&tc->tc_lock); - /* - * This thread may not be blocked on this turnstile anymore - * but instead might already be woken up on another CPU - * that is waiting on sched_lock in turnstile_unpend() to - * finish waking this thread up. We can detect this case - * by checking to see if this thread has been given a - * turnstile by either turnstile_signal() or - * turnstile_broadcast(). In this case, treat the thread as - * if it was already running. - */ - if (td->td_turnstile != NULL) { + /* Resort td on the list if needed. */ + if (!turnstile_adjust_thread(ts, td)) { mtx_unlock_spin(&tc->tc_lock); return; } + mtx_unlock_spin(&tc->tc_lock); + } +} + +/* + * Adjust the thread's position on a turnstile after its priority has been + * changed. + */ +static int +turnstile_adjust_thread(struct turnstile *ts, struct thread *td) +{ + struct turnstile_chain *tc; + struct thread *td1, *td2; + + mtx_assert(&sched_lock, MA_OWNED); + MPASS(TD_ON_LOCK(td)); - /* - * Check if the thread needs to be moved up on - * the blocked chain. It doesn't need to be moved - * if it is already at the head of the list or if - * the item in front of it still has a higher priority. - */ - if (td == TAILQ_FIRST(&ts->ts_blocked)) { - mtx_unlock_spin(&tc->tc_lock); - continue; - } + /* + * This thread may not be blocked on this turnstile anymore + * but instead might already be woken up on another CPU + * that is waiting on sched_lock in turnstile_unpend() to + * finish waking this thread up. We can detect this case + * by checking to see if this thread has been given a + * turnstile by either turnstile_signal() or + * turnstile_broadcast(). In this case, treat the thread as + * if it was already running. + */ + if (td->td_turnstile != NULL) + return (0); - td1 = TAILQ_PREV(td, threadqueue, td_lockq); - if (td1->td_priority <= pri) { - mtx_unlock_spin(&tc->tc_lock); - continue; - } + /* + * Check if the thread needs to be moved on the blocked chain. + * It needs to be moved if either its priority is lower than + * the previous thread or higher than the next thread. + */ + tc = TC_LOOKUP(ts->ts_lockobj); + mtx_assert(&tc->tc_lock, MA_OWNED); + td1 = TAILQ_PREV(td, threadqueue, td_lockq); + td2 = TAILQ_NEXT(td, td_lockq); + if ((td1 != NULL && td->td_priority < td1->td_priority) || + (td2 != NULL && td->td_priority > td2->td_priority)) { /* * Remove thread from blocked chain and determine where - * it should be moved up to. Since we know that td1 has - * a lower priority than td, we know that at least one - * thread in the chain has a lower priority and that - * td1 will thus not be NULL after the loop. + * it should be moved to. */ mtx_lock_spin(&td_contested_lock); TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq); TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) { MPASS(td1->td_proc->p_magic == P_MAGIC); - if (td1->td_priority > pri) + if (td1->td_priority > td->td_priority) break; } - MPASS(td1 != NULL); - TAILQ_INSERT_BEFORE(td1, td, td_lockq); + if (td1 == NULL) + TAILQ_INSERT_TAIL(&ts->ts_blocked, td, td_lockq); + else + TAILQ_INSERT_BEFORE(td1, td, td_lockq); mtx_unlock_spin(&td_contested_lock); - CTR4(KTR_LOCK, - "propagate_priority: td %p moved before %p on [%p] %s", - td, td1, ts->ts_lockobj, ts->ts_lockobj->lo_name); - mtx_unlock_spin(&tc->tc_lock); + if (td1 == NULL) + CTR3(KTR_LOCK, + "turnstile_adjust_thread: td %d put at tail on [%p] %s", + td->td_tid, ts->ts_lockobj, ts->ts_lockobj->lo_name); + else + CTR4(KTR_LOCK, + "turnstile_adjust_thread: td %d moved before %d on [%p] %s", + td->td_tid, td1->td_tid, ts->ts_lockobj, + ts->ts_lockobj->lo_name); } + return (1); } /* @@ -355,6 +366,46 @@ SYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL); /* + * Update a thread on the turnstile list after it's priority has been changed. + * The old priority is passed in as an argument. + */ +void +turnstile_adjust(struct thread *td, u_char oldpri) +{ + struct turnstile_chain *tc; + struct turnstile *ts; + + mtx_assert(&sched_lock, MA_OWNED); + MPASS(TD_ON_LOCK(td)); + + /* + * Pick up the lock that td is blocked on. + */ + ts = td->td_blocked; + MPASS(ts != NULL); + tc = TC_LOOKUP(ts->ts_lockobj); + mtx_lock_spin(&tc->tc_lock); + + /* Resort the turnstile on the list. */ + if (!turnstile_adjust_thread(ts, td)) { + mtx_unlock_spin(&tc->tc_lock); + return; + } + + /* + * If our priority was lowered and we are at the head of the + * turnstile, then propagate our new priority up the chain. + * Note that we currently don't try to revoke lent priorities + * when our priority goes up. + */ + if (td == TAILQ_FIRST(&ts->ts_blocked) && td->td_priority < oldpri) { + mtx_unlock_spin(&tc->tc_lock); + propagate_priority(td); + } else + mtx_unlock_spin(&tc->tc_lock); +} + +/* * Set the owner of the lock this turnstile is attached to. */ static void @@ -470,7 +521,7 @@ */ mtx_lock_spin(&sched_lock); if (td->td_priority < owner->td_priority) - owner->td_priority = td->td_priority; + sched_lend_prio(owner, td->td_priority); mtx_unlock_spin(&sched_lock); } @@ -578,14 +629,14 @@ propagate_priority(td); if (LOCK_LOG_TEST(lock, 0)) - CTR4(KTR_LOCK, "%s: td %p blocked on [%p] %s", __func__, td, - lock, lock->lo_name); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200412302059.iBUKxqHh045296>