Date: Sat, 10 Apr 2004 18:34:45 -0700 (PDT) From: Julian Elischer <julian@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 50807 for review Message-ID: <200404110134.i3B1YjoA099508@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=50807 Change 50807 by julian@julian_jules1 on 2004/04/10 18:34:01 Having thought about it, put everything into sched_4bsd.c This will still need cleaning but it cleans up stuff. Now to make it not crash. Affected files ... .. //depot/projects/nsched/sys/conf/files#5 edit .. //depot/projects/nsched/sys/kern/sched_4bsd.c#3 edit Differences ... ==== //depot/projects/nsched/sys/conf/files#5 (text+ko) ==== @@ -1080,9 +1080,7 @@ kern/link_elf.c standard kern/md4c.c optional netsmb kern/md5c.c standard -kern/scheduler/4bsd/sched_4bsd.c optional sched_4bsd -kern/scheduler/4bsd/sched_4bsd_kse.c optional sched_4bsd -kern/scheduler/4bsd/sched_4bsd_runq.c optional sched_4bsd +kern/sched_4bsd.c optional sched_4bsd kern/scheduler/ule/sched_ule.c optional sched_ule kern/scheduler/ule/sched_ule_kse.c optional sched_ule kern/scheduler/ule/sched_ule_runq.c optional sched_ule ==== //depot/projects/nsched/sys/kern/sched_4bsd.c#3 (text+ko) ==== @@ -15,6 +15,7 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. + * 3. [Condition rescinded by the Regents of the University of California.] * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. @@ -33,7 +34,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.37 2004/04/05 21:03:35 imp Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/sched_4bsd.c,v 1.35 2004/03/05 19:27:04 rwatson Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -48,7 +49,122 @@ #include <sys/smp.h> #include <sys/sysctl.h> #include <sys/sx.h> +#include <sys/queue.h> +#include <machine/critical.h> +#include <sys/thr.h> /* XXXKSE */ +#if 0 +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#endif +#include <vm/uma.h> +#include <machine/critical.h> +/* + * This is a scheduler private structure that it uses (for now) + * to implement the thread fairness algorythm. + * The threads are made runnable by the rest of the system, but + * only NKSE of them are actually put on the run queues to compete with + * threads from other processes. For normal processes there is only one KSE + * but for threaded processes we allocate only upto NKSE kses per ksegrp. + * KSEs are what is put on the system run queue, so multithreaded + * processes must multiplex their threads onto their KSEs, making them compete + * on a similar basis to nonthreaded processes. + * In pictures: + * With a single run queue used by all processors: + * + * RUNQ: --->KSE---KSE--... + * | / + * KSEG---THREAD--THREAD--THREAD + * + * (processors run THREADs from the KSEG until they are exhausted or + * the KSEG exhausts its quantum) + * + * XXX This structure is stictly speaking, not needed any more + * and its fields will move to the thread structure. The first 'N' + * threads can be kept trak of with a simple count. Do this soon. + */ + +struct kse { + struct proc *ke_proc; /* (*) Associated process. */ + struct ksegrp *ke_ksegrp; /* (*) Associated KSEG. */ + TAILQ_ENTRY(kse) ke_kglist; /* (*) Queue of KSEs in ke_ksegrp. */ + TAILQ_ENTRY(kse) ke_kgrlist; /* (*) Queue of KSEs in this state. */ + TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ + + struct thread *ke_thread; /* (*) Active associated thread. */ +#define ke_startzero ke_flags + int ke_flags; /* (j) KEF_* flags. */ + fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ + u_char ke_oncpu; /* (j) Which cpu we are on. */ + char ke_rqindex; /* (j) Run queue index. */ + enum { + KES_UNUSED = 0x0, + KES_IDLE, + KES_ONRUNQ, + KES_UNQUEUED, /* in transit */ + KES_THREAD /* slaved to thread state */ + } ke_state; /* (j) KSE status. */ +#define ke_endzero ke_cpticks + int ke_cpticks; /* (j) Ticks of cpu time. */ + struct runq *ke_runq; /* runq the kse is currently on */ +}; + +/* flags kept in ke_flags */ +#define KEF_BOUND 0x00001 /* Stuck on a thread.. (as per normal procs) */ +#define KEF_EXIT 0x00002 /* KSE is being killed. */ +#define KEF_DIDRUN 0x00004 /* KSE actually ran. */ + +/* + * Scheduler private extensions to thread, ksegrp and proc structures. + * These are invisile outside the scheduler. + * They are usually allocated beyond the end of the proc, thread or ksegrp + * structure and always accessed via an indirection. + * This allows proc0 and friends to be allocated statically + * without knowing the sise of the extension. This allows different + * schedulers to have different extensions. + * Proc0 case: thread0:[... ptr] [td_sched0] + * | ^ + * \------/ + * + * Usual case: td-> [... ptr|td_sched] ( allocated together ) + * | ^ + * \---/ + */ + +struct td_sched { + struct kse *std_last_kse; /* (j) Previous value of td_kse. */ + struct kse *std_kse; /* (j) Current KSE if running. */ +}; +#define td_last_kse td_sched->std_last_kse +#define td_kse td_sched->std_kse + +struct kg_sched { + TAILQ_HEAD(, kse) skg_kseq; /* (ke_kglist) All KSEs. */ + TAILQ_HEAD(, kse) skg_iq; /* (ke_kgrlist) All idle KSEs. */ + struct thread *skg_last_assigned; /* (j) Last thread assigned */ + /* ( to a KSE). */ + int skg_runq_kses; /* (j) Num KSEs on runq. */ + int skg_idle_kses; /* (j) Num KSEs on iq. */ + int skg_kses; /* (j) Num KSEs in group. */ +}; +#define kg_kseq kg_sched->skg_kseq +#define kg_iq kg_sched->skg_iq +#define kg_last_assigned kg_sched->skg_last_assigned +#define kg_runq_kses kg_sched->skg_runq_kses +#define kg_idle_kses kg_sched->skg_idle_kses +#define kg_kses kg_sched->skg_kses + +#define FIRST_KSE_IN_KSEGRP(kg) TAILQ_FIRST(&(kg)->kg_kseq) +#define FIRST_KSE_IN_PROC(p) FIRST_KSE_IN_KSEGRP(FIRST_KSEGRP_IN_PROC(p)) + +void kse_free(struct kse *ke); +void kse_stash(struct kse *ke); +void kse_reassign(struct kse *ke); +void sched_fork_kse(struct thread *parenttd, struct kse *newke); +void sched_exit_kse(struct proc *parent, struct thread *childtd); + #define KTR_4BSD 0x0 /* @@ -65,14 +181,7 @@ #endif #define NICE_WEIGHT 1 /* Priorities per nice level. */ -struct ke_sched { - int ske_cpticks; /* (j) Ticks of cpu time. */ - struct runq *ske_runq; /* runq the kse is currently on */ -}; -#define ke_runq ke_sched->ske_runq -#define KEF_BOUND KEF_SCHED1 - -#define SKE_RUNQ_PCPU(ke) \ +#define KE_RUNQ_PCPU(ke) \ ((ke)->ke_runq != 0 && (ke)->ke_runq != &runq) /* @@ -81,13 +190,7 @@ */ #define KSE_CAN_MIGRATE(ke) \ ((ke)->ke_thread->td_pinned == 0 && ((ke)->ke_flags & KEF_BOUND) == 0) -static struct ke_sched ke_sched; -struct ke_sched *kse0_sched = &ke_sched; -struct kg_sched *ksegrp0_sched = NULL; -struct p_sched *proc0_sched = NULL; -struct td_sched *thread0_sched = NULL; - static int sched_tdcnt; /* Total runnable threads in the system. */ static int sched_quantum; /* Roundrobin scheduling quantum in ticks. */ #define SCHED_QUANTUM (hz / 10) /* Default sched quantum */ @@ -169,6 +272,10 @@ curthread->td_flags |= TDF_NEEDRESCHED; } + +/**************************************************************** + * BSD 4 based scheduling policy engine. * + ***************************************************************/ /* * Force switch among equal priority processes every 100ms. * We don't actually need to force a context switch of the current process. @@ -338,20 +445,20 @@ * stop recalculating its priority until * it wakes up. */ - if (ke->ke_sched->ske_cpticks == 0) + if (ke->ke_cpticks == 0) continue; #if (FSHIFT >= CCPU_SHIFT) ke->ke_pctcpu += (realstathz == 100) - ? ((fixpt_t) ke->ke_sched->ske_cpticks) << + ? ((fixpt_t) ke->ke_cpticks) << (FSHIFT - CCPU_SHIFT) : - 100 * (((fixpt_t) ke->ke_sched->ske_cpticks) + 100 * (((fixpt_t) ke->ke_cpticks) << (FSHIFT - CCPU_SHIFT)) / realstathz; #else ke->ke_pctcpu += ((FSCALE - ccpu) * - (ke->ke_sched->ske_cpticks * + (ke->ke_cpticks * FSCALE / realstathz)) >> FSHIFT; #endif - ke->ke_sched->ske_cpticks = 0; + ke->ke_cpticks = 0; } /* end of kse loop */ /* * If there are ANY running threads in this KSEGRP, @@ -468,7 +575,18 @@ sched_tdcnt++; } -/* External interfaces start here */ + + + + +/******************************************************************* + * These functions represent entrypoints that a scheduler needs to * + * Supply in order to give the rest of the system opportunities to * + * notify it of important events and to let it do housekeeping * + * in a timely and efficient manner. Events include the creation, * + * allocation and death of various entities, timer events, and * + * direct calls for scheduler services. * + ******************************************************************/ int sched_runnable(void) { @@ -511,7 +629,7 @@ kg = td->td_ksegrp; ke = td->td_kse; - ke->ke_sched->ske_cpticks++; + ke->ke_cpticks++; kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + 1); if ((kg->kg_estcpu % INVERSE_ESTCPU_WEIGHT) == 0) { resetpriority(kg); @@ -529,56 +647,62 @@ * aggregated all the estcpu into the 'built-in' ksegrp. */ void -sched_exit(struct proc *p, struct proc *p1) +sched_exit(struct proc *parent, struct thread *childtd) { - sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); - sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); - sched_exit_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); + sched_exit_ksegrp(parent, childtd); + sched_exit_thread(parent, childtd); } +/* This PROBABLY gives the estcpu to the wrong kseg */ void -sched_exit_kse(struct kse *ke, struct kse *child) +sched_exit_ksegrp(struct proc *parent, struct thread *child) { -} + struct ksegrp *kg; -void -sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) -{ - mtx_assert(&sched_lock, MA_OWNED); - kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->kg_estcpu); + kg = FIRST_KSEGRP_IN_PROC(parent); /* XXXKSE */ + child->td_kse->ke_flags |= KEF_EXIT; + kg->kg_estcpu = ESTCPULIM(kg->kg_estcpu + child->td_ksegrp->kg_estcpu); } void -sched_exit_thread(struct thread *td, struct thread *child) +sched_exit_thread(struct proc *parent, struct thread *childtd) { - if ((child->td_proc->p_flag & P_NOLOAD) == 0) + if ((childtd->td_proc->p_flag & P_NOLOAD) == 0) sched_tdcnt--; + if (childtd->td_kse) + sched_exit_kse(parent, childtd); } void -sched_fork(struct proc *p, struct proc *p1) +sched_fork(struct thread *parenttd, struct proc *child) { - sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); - sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); - sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); + struct thread *td2; + struct kse *ke2; + + td2 = FIRST_THREAD_IN_PROC(child); + ke2 = FIRST_KSE_IN_PROC(child); + sched_fork_kse(parenttd, ke2); + sched_fork_ksegrp(parenttd, FIRST_KSEGRP_IN_PROC(child)); + ke2->ke_thread = td2; + td2->td_kse = ke2; + /* sched_fork_thread(parenttd, td2);*/ } void -sched_fork_kse(struct kse *ke, struct kse *child) +sched_fork_ksegrp(struct thread *parent, struct ksegrp *child) { - child->ke_sched->ske_cpticks = 0; -} + struct ksegrp *kg; -void -sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) -{ mtx_assert(&sched_lock, MA_OWNED); + kg = parent->td_ksegrp; child->kg_estcpu = kg->kg_estcpu; } + + void -sched_fork_thread(struct thread *td, struct thread *child) +sched_fork_thread(struct thread *parent, struct thread *child) { } @@ -706,7 +830,7 @@ ke->ke_runq = &runq; } else { CTR1(KTR_4BSD, "adding kse:%p to pcpu runq", ke); - if (!SKE_RUNQ_PCPU(ke)) + if (!KE_RUNQ_PCPU(ke)) ke->ke_runq = &runq_pcpu[PCPU_GET(cpuid)]; } #else @@ -731,7 +855,7 @@ if ((td->td_proc->p_flag & P_NOLOAD) == 0) sched_tdcnt--; - runq_remove(ke->ke_sched->ske_runq, ke); + runq_remove(ke->ke_runq, ke); ke->ke_state = KES_THREAD; ke->ke_ksegrp->kg_runq_kses--; @@ -838,24 +962,21 @@ } int -sched_sizeof_kse(void) -{ - return (sizeof(struct kse) + sizeof(struct ke_sched)); -} -int sched_sizeof_ksegrp(void) { - return (sizeof(struct ksegrp)); + return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); } + int sched_sizeof_proc(void) { return (sizeof(struct proc)); } + int sched_sizeof_thread(void) { - return (sizeof(struct thread)); + return (sizeof(struct thread) + sizeof(struct td_sched)); } fixpt_t @@ -871,3 +992,926 @@ return (0); } + + + +static uma_zone_t kse_zone; + +struct kse kse0; +static struct kg_sched kg_sched0; +static struct td_sched td_sched0; + +static struct kse * kse_alloc(void); +static void kse_link(struct proc *p, struct kse *ke, struct ksegrp *kg); +static void kse_unlink(struct kse *ke); + +extern struct mtx kse_zombie_lock; +TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); + +void +sched_GC(void) +{ + struct kse *ke_first, *ke_next; + + if (!TAILQ_EMPTY(&zombie_kses)) { + mtx_lock_spin(&kse_zombie_lock); + ke_first = TAILQ_FIRST(&zombie_kses); + if (ke_first) + TAILQ_INIT(&zombie_kses); + while (ke_first) { + ke_next = TAILQ_NEXT(ke_first, ke_procq); + kse_free(ke_first); + ke_first = ke_next; + } + mtx_unlock_spin(&kse_zombie_lock); + } +} + +void +schedinit(void) +{ + /* + * Set up the scheduler specific parts of proc0. + */ + ksegrp0.kg_sched = &kg_sched0; + proc0.p_sched = NULL; /* XXX */ + thread0.td_sched = &td_sched0; + + proc_linkup(&proc0, &ksegrp0, &thread0); + kse_link(&proc0, &kse0, &ksegrp0); + kse0.ke_thread = &thread0; + thread0.td_kse = &kse0; /* we are running */ + kse0.ke_state = KES_THREAD; + + kse_zone = uma_zcreate("KSE", sched_sizeof_kse(), + NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); +} + +/* + * for now have special thr code + * later on, clean these up into common code. + */ +#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) +int +sched_thr_newthread(struct thread *td, struct thread *td0, int flags) +{ + struct kse *ke0; + /* Initialize our kse structure. */ + ke0 = kse_alloc(); + bzero(&ke0->ke_startzero, + RANGEOF(struct kse, ke_startzero, ke_endzero)); + + /* Link the thread and kse into the ksegrp and make it runnable. */ + mtx_lock_spin(&sched_lock); + + thread_link(td0, td->td_ksegrp); + kse_link(td->td_proc, ke0, td->td_ksegrp); + + /* Bind this thread and kse together. */ + td0->td_kse = ke0; + ke0->ke_thread = td0; + + sched_fork_kse(td, td->td_kse); + sched_fork_thread(td, td0); + + TD_SET_CAN_RUN(td0); + if ((flags & THR_SUSPENDED) == 0) + setrunqueue(td0); + + mtx_unlock_spin(&sched_lock); + return (0); /* the API could fail but not in this case */ +} +#undef RANGEOF + +void +sched_thr_exit(struct thread *td) +{ + struct kse *ke; + + ke = td->td_kse; + + ke->ke_state = KES_UNQUEUED; + ke->ke_thread = NULL; + kse_unlink(ke); + sched_exit_kse(td->td_proc->p_pptr, td); + td->td_kse = NULL; + td->td_last_kse = NULL; +} + +/* + * Allocate a kse. + */ +static struct kse * +kse_alloc(void) +{ + return (uma_zalloc(kse_zone, M_WAITOK)); +} + +/* + * Deallocate a kse. + */ +void +kse_free(struct kse *td) +{ + uma_zfree(kse_zone, td); +} + +/* + * Stash an embarasingly extra kse into the zombie kse queue. + */ +void +kse_stash(struct kse *ke) +{ + mtx_lock_spin(&kse_zombie_lock); + TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); + mtx_unlock_spin(&kse_zombie_lock); +} + +/* + * KSE is linked into kse group. + */ +void +kse_link(struct proc *p, struct kse *ke, struct ksegrp *kg) +{ + TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); + kg->kg_kses++; + ke->ke_state = KES_UNQUEUED; + ke->ke_proc = p; + ke->ke_ksegrp = kg; + ke->ke_thread = NULL; + ke->ke_oncpu = NOCPU; + ke->ke_flags = 0; +} + +int +sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) +{ + struct kse *ke; + + ke = kse_alloc(); + if (ke) { + kse_link(p, ke, kg); + td->td_kse = ke; + ke->ke_thread = td; + return (0); + } + return (ENOMEM ); +} + +/* Assumes kg->kg_sched is already set up */ +void +sched_newkseg(struct ksegrp *kg) +{ + + TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ + TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */ + kg->kg_kses = 0; + kg->kg_runq_kses = 0; /* XXXKSE change name */ + kg->kg_idle_kses = 0; +} + +/* Assumes td->td_sched is already set up */ +void +sched_newthread(struct thread *td) +{ + td->td_last_kse = NULL; + td->td_kse = NULL; +} + +void +kse_unlink(struct kse *ke) +{ + struct ksegrp *kg; + + mtx_assert(&sched_lock, MA_OWNED); + kg = ke->ke_ksegrp; + TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); + if (ke->ke_state == KES_IDLE) { + TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses--; + } + if (--kg->kg_kses == 0) + ksegrp_unlink(kg); + /* + * Aggregate stats from the KSE + */ + kse_stash(ke); +} + +/* need the thread to know if we are on that ksegrp or not. */ +void +sched_clean_ksegrp(struct ksegrp *kg, struct thread *td) +{ + struct kse *ke; + + while ((ke = TAILQ_FIRST(&kg->kg_iq)) != NULL) { + KASSERT(ke->ke_state == KES_IDLE, + ("%s: wrong idle KSE state", __func__)); + kse_unlink(ke); + } + KASSERT((kg->kg_kses == 1), + ("%s: ksegrp still has %d KSEs", __func__, kg->kg_kses)); + KASSERT(((kg->kg_kses == 0) && (kg != td->td_ksegrp)) || + ((kg->kg_kses == 1) && (kg == td->td_ksegrp)), + ("ksegrp has wrong kg_kses: %d", kg->kg_kses)); +} + + +#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) + +/* new version of sched-fork() */ +void +sched_fork_kse(struct thread *parenttd, struct kse *ke2) +{ + struct ksegrp *kg2; + + kg2 = ke2->ke_ksegrp; + bzero(&ke2->ke_startzero, + (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); + ke2->ke_state = KES_THREAD; + ke2->ke_cpticks = 0; + kg2->kg_estcpu = parenttd->td_ksegrp->kg_estcpu; +} + +/* + * Try handle the generic case where there may be > 1 kseg even + * though that should never happen. It should be cheap to do so. + */ +void +sched_destroyproc(struct proc *p) +{ + struct ksegrp *kg; + struct kse *ke; + + /* remove all the kses we can find and free them */ + FOREACH_KSEGRP_IN_PROC(p, kg) { + while (!TAILQ_EMPTY(&kg->kg_kseq)) { + ke = TAILQ_FIRST(&kg->kg_kseq); + TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); + if (ke->ke_thread) + ke->ke_thread->td_kse = NULL; + kse_free(ke); + } + } +} + +void +sched_exit_kse(struct proc *parent, struct thread *childtd) +{ + struct ksegrp *kg; + struct kse *ke; + + kg = childtd->td_ksegrp; + ke = childtd->td_kse; + KASSERT((ke),("unexpected null KSE ptr in sched_exit_kse()")); + ke->ke_state = KES_UNQUEUED; + ke->ke_thread = NULL; + /* + * Decide what to do with the KSE attached to this thread. + */ + if (ke->ke_flags & KEF_EXIT) { + kse_unlink(ke); + if (kg->kg_kses == 0) { + sched_exit_ksegrp(parent, childtd); /* XXXKSE */ + ksegrp_unlink(kg); + } + } else { + kse_reassign(ke); + } + childtd->td_kse = NULL; + childtd->td_last_kse = NULL; +} + +void +sched_set_concurrancy(struct ksegrp *kg, int concurrancy) +{ + struct kse *newke; + + while (kg->kg_kses < concurrancy) { + newke = kse_alloc(); + bzero(&newke->ke_startzero, RANGEOF(struct kse, + ke_startzero, ke_endzero)); +#if 0 + mtx_lock_spin(&sched_lock); + bcopy(&ke->ke_startcopy, &newke->ke_startcopy, + RANGEOF(struct kse, ke_startcopy, ke_endcopy)); + mtx_unlock_spin(&sched_lock); +#endif + mtx_lock_spin(&sched_lock); + kse_link(kg->kg_proc, newke, kg); + sched_fork_kse(curthread, newke); + /* Add engine */ + kse_reassign(newke); + mtx_unlock_spin(&sched_lock); + } +} + +/* From kern_switch.c + * Parts Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/************************************************************************ + * Functions that manipulate runnability from a thread perspective. * + * These are a 'threading fairness' front-end to the BSD scheduler * + ************************************************************************/ +/*** +Here is the logic.. + +If there are N processors, then there are at most N KSEs (kernel +schedulable entities) working to process threads that belong to a +KSEGROUP (kg). If there are X of these KSEs actually running at the +moment in question, then there are at most M (N-X) of these KSEs on +the run queue, as running KSEs are not on the queue. + +Runnable threads are queued off the KSEGROUP in priority order. +If there are M or more threads runnable, the top M threads +(by priority) are 'preassigned' to the M KSEs not running. The KSEs take +their priority from those threads and are put on the system run queue. + +The last thread that had a priority high enough to have a KSE associated +with it, AND IS ON THE RUN QUEUE is pointed to by +kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs +assigned as all the available KSEs are activly running, or because there +are no threads queued, that pointer is NULL. + +When a KSE is removed from the run queue to become runnable, we know +it was associated with the highest priority thread in the queue (at the head +of the queue). If it is also the last assigned we know M was 1 and must +now be 0. Since the thread is no longer queued that pointer must be +removed from it. Since we know there were no more KSEs available, +(M was 1 and is now 0) and since we are not FREEING our KSE +but using it, we know there are STILL no more KSEs available, we can prove +that the next thread in the ksegrp list will not have a KSE to assign to +it, so we can show that the pointer must be made 'invalid' (NULL). + +The pointer exists so that when a new thread is made runnable, it can +have its priority compared with the last assigned thread to see if +it should 'steal' its KSE or not.. i.e. is it 'earlier' +on the list than that thread or later.. If it's earlier, then the KSE is +removed from the last assigned (which is now not assigned a KSE) +and reassigned to the new thread, which is placed earlier in the list. +The pointer is then backed up to the previous thread (which may or may not +be the new thread). + +When a thread sleeps or is removed, the KSE becomes available and if there +are queued threads that are not assigned KSEs, the highest priority one of +them is assigned the KSE, which is then placed back on the run queue at +the appropriate place, and the kg->kg_last_assigned pointer is adjusted down +to point to it. + +The following diagram shows 2 KSEs and 3 threads from a single process. + + RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) + \ \____ + \ \ + KSEGROUP---thread--thread--thread (queued in priority order) + \ / + \_______________/ + (last_assigned) + +The result of this scheme is that the M available KSEs are always +queued at the priorities they have inherrited from the M highest priority +threads for that KSEGROUP. If this situation changes, the KSEs are +reassigned to keep this true. +***/ + + +CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); + +#if 0 +static void runq_readjust(struct runq *rq, struct kse *ke); +#endif +/* + * Select the KSE that will be run next. From that find the thread, and + * remove it from the KSEGRP's run queue. If there is thread clustering, + * this will be what does it. + * XXX Change to take an argument indicating + * if the switch is voluntary or involuntary. + */ +struct thread * +choosethread(void) +{ + struct kse *ke; + struct thread *td; + struct ksegrp *kg; + +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) + if (smp_active == 0 && PCPU_GET(cpuid) != 0) { + /* Shutting down, run idlethread on AP's */ + td = PCPU_GET(idlethread); + ke = td->td_kse; + CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); + ke->ke_flags |= KEF_DIDRUN; + TD_SET_RUNNING(td); + return (td); + } +#endif + +retry: + ke = sched_choose(); + if (ke) { + td = ke->ke_thread; + KASSERT((td->td_kse == ke), ("kse/thread mismatch")); + kg = ke->ke_ksegrp; + if (td->td_proc->p_flag & P_SA) { + if (kg->kg_last_assigned == td) { + kg->kg_last_assigned = TAILQ_PREV(td, + threadqueue, td_runq); + } + TAILQ_REMOVE(&kg->kg_runq, td, td_runq); + } + kg->kg_runnable--; + CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d", + td, td->td_priority); + } else { + /* Simulate runq_choose() having returned the idle thread */ + td = PCPU_GET(idlethread); + ke = td->td_kse; + CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); + } + ke->ke_flags |= KEF_DIDRUN; + + /* + * If we are in panic, only allow system threads, + * plus the one we are running in, to be run. + */ + if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && + (td->td_flags & TDF_INPANIC) == 0)) { + /* note that it is no longer on the run queue */ + TD_SET_CAN_RUN(td); + goto retry; + } + + TD_SET_RUNNING(td); + return (td); +} + +/* + * Given a surplus KSE, either assign a new runable thread to it + * (and put it in the run queue) or put it in the ksegrp's idle KSE list. + * Assumes that the original thread is not runnable. + */ +void +kse_reassign(struct kse *ke) +{ + struct ksegrp *kg; + struct thread *td; + struct thread *original; + + mtx_assert(&sched_lock, MA_OWNED); + original = ke->ke_thread; + KASSERT(original == NULL || TD_IS_INHIBITED(original), + ("reassigning KSE with runnable thread")); + kg = ke->ke_ksegrp; + if (original) + original->td_kse = NULL; + + /* + * Find the first unassigned thread + */ + if ((td = kg->kg_last_assigned) != NULL) + td = TAILQ_NEXT(td, td_runq); + else + td = TAILQ_FIRST(&kg->kg_runq); + + /* + * If we found one, assign it the kse, otherwise idle the kse. + */ + if (td) { + kg->kg_last_assigned = td; + td->td_kse = ke; + ke->ke_thread = td; + sched_add(td); + CTR2(KTR_RUNQ, "kse_reassign: ke%p -> td%p", ke, td); + return; + } + + ke->ke_state = KES_IDLE; + ke->ke_thread = NULL; + TAILQ_INSERT_TAIL(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses++; + CTR1(KTR_RUNQ, "kse_reassign: ke%p on idle queue", ke); + return; +} + +#if 0 +/* + * Remove a thread from its KSEGRP's run queue. + * This in turn may remove it from a KSE if it was already assigned + * to one, possibly causing a new thread to be assigned to the KSE + * and the KSE getting a new priority. + */ +static void +remrunqueue(struct thread *td) +{ + struct thread *td2, *td3; + struct ksegrp *kg; + struct kse *ke; + + mtx_assert(&sched_lock, MA_OWNED); + KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue")); + kg = td->td_ksegrp; + ke = td->td_kse; + CTR1(KTR_RUNQ, "remrunqueue: td%p", td); + kg->kg_runnable--; + TD_SET_CAN_RUN(td); + /* + * If it is not a threaded process, take the shortcut. + */ + if ((td->td_proc->p_flag & P_SA) == 0) { + /* Bring its kse with it, leave the thread attached */ + sched_rem(td); + ke->ke_state = KES_THREAD; + return; + } + td3 = TAILQ_PREV(td, threadqueue, td_runq); + TAILQ_REMOVE(&kg->kg_runq, td, td_runq); + if (ke) { + /* + * This thread has been assigned to a KSE. + * We need to dissociate it and try assign the + * KSE to the next available thread. Then, we should + * see if we need to move the KSE in the run queues. + */ + sched_rem(td); + ke->ke_state = KES_THREAD; + td2 = kg->kg_last_assigned; + KASSERT((td2 != NULL), ("last assigned has wrong value")); + if (td2 == td) + kg->kg_last_assigned = td3; + kse_reassign(ke); + } +} +#endif + +/* + * Change the priority of a thread that is on the run queue. + */ +void +adjustrunqueue( struct thread *td, int newpri) +{ + struct ksegrp *kg; + struct kse *ke; + + mtx_assert(&sched_lock, MA_OWNED); + KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue")); + + ke = td->td_kse; + CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td); + /* + * If it is not a threaded process, take the shortcut. + */ + if ((td->td_proc->p_flag & P_SA) == 0) { + /* We only care about the kse in the run queue. */ + td->td_priority = newpri; + if (ke->ke_rqindex != (newpri / RQ_PPQ)) { + sched_rem(td); + sched_add(td); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200404110134.i3B1YjoA099508>