Date: Fri, 25 Jun 2004 07:32:10 GMT From: Julian Elischer <julian@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 55771 for review Message-ID: <200406250732.i5P7WAXp009027@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=55771 Change 55771 by julian@julian_jules1 on 2004/06/25 07:32:03 safety safe for churn nowhere near finished (or even compiles) Affected files ... .. //depot/projects/nsched/sys/kern/sched_4bsd.c#18 edit .. //depot/projects/nsched/sys/kern/sched_ule.c#7 edit Differences ... ==== //depot/projects/nsched/sys/kern/sched_4bsd.c#18 (text+ko) ==== @@ -52,12 +52,6 @@ #include <sys/queue.h> #include <machine/critical.h> #include <sys/thr.h> /* XXXKSE */ -#if 0 -#include <vm/vm.h> -#include <vm/vm_extern.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#endif #include <vm/uma.h> #include <machine/critical.h> ==== //depot/projects/nsched/sys/kern/sched_ule.c#7 (text+ko) ==== @@ -1,3 +1,4 @@ + /*- * Copyright (c) 2002-2003, Jeffrey Roberson <jeff@freebsd.org> * All rights reserved. @@ -34,6 +35,7 @@ #include <sys/lock.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/queue.h> #include <sys/resource.h> #include <sys/resourcevar.h> #include <sys/sched.h> @@ -55,6 +57,10 @@ #define KTR_ULE KTR_NFS +#include <vm/uma.h> +#include <machine/critical.h> + + /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ /* XXX This is bogus compatability crap for ps */ static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ @@ -78,45 +84,221 @@ * These datastructures are allocated within their parent datastructure but * are scheduler specific. */ +/*- + * Description of a process. + * + * Below is a key of locks used to protect each member of struct proc. The + * lock is indicated by a reference to a specific character in parens in the + * associated comment. + * * - not yet protected + * a - only touched by curproc or parent during fork/wait + * b - created at fork, never changes + * (exception aiods switch vmspaces, but they are also + * marked 'P_SYSTEM' so hopefully it will be left alone) + * c - locked by proc mtx + * d - locked by allproc_lock lock + * e - locked by proctree_lock lock + * f - session mtx + * g - process group mtx + * h - callout_lock mtx + * i - by curproc or the master session mtx + * j - locked by sched_lock mtx + * k - only accessed by curthread + * l - the attaching proc or attaching proc parent + * m - Giant + * n - not locked, lazy + * o - ktrace lock + * p - select lock (sellock) + * q - td_contested lock + * r - p_peers lock + * x - created at fork, only changes during single threading in exec + * z - zombie threads/kse/ksegroup lock + * + */ +/*************** + * In pictures: + With a single run queue used by all processors: + + RUNQ: --->KSE---KSE--... SLEEPQ:[]---THREAD---THREAD---THREAD + | / []---THREAD + KSEG---THREAD--THREAD--THREAD [] + []---THREAD---THREAD + + (processors run THREADs from the KSEG until they are exhausted or + the KSEG exhausts its quantum) + +With PER-CPU run queues: +KSEs on the separate run queues directly +They would be given priorities calculated from the KSEG. + + * + *****************/ +/************************************************************************ + * Definitions of the run queues we use here. + */ + +/* + * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD: src/sys/sys/runq.h,v 1.4 2002/05/25 01:12:23 jake Exp $ + */ + +#ifndef _RUNQ_H_ +#define _RUNQ_H_ + +#include <machine/runq.h> + +struct kse; + +/* + * Run queue parameters. + */ + +#define RQ_NQS (64) /* Number of run queues. */ +#define RQ_PPQ (4) /* Priorities per queue. */ -struct ke_sched { - int ske_slice; - struct runq *ske_runq; +/* + * Head of run queues. + */ +TAILQ_HEAD(rqhead, kse); + +/* + * Bit array which maintains the status of a run queue. When a queue is + * non-empty the bit corresponding to the queue number will be set. + */ +struct rqbits { + rqb_word_t rqb_bits[RQB_LEN]; +}; + +/* + * Run queue structure. Contains an array of run queues on which processes + * are placed, and a structure to maintain the status of each queue. + */ +struct runq { + struct rqbits rq_status; + struct rqhead rq_queues[RQ_NQS]; +}; + +#endif /* end of Jake copyright file */ + + +/* + * The schedulable entity that can be given a context to run. + * A process may have several of these. Probably one per processor + * but posibly a few more. In this universe they are grouped + * with a KSEG that contains the priority and niceness + * for the group. + */ +struct kse { + struct proc *ke_proc; /* (*) Associated process. */ + struct ksegrp *ke_ksegrp; /* (*) Associated KSEG. */ + TAILQ_ENTRY(kse) ke_kglist; /* (*) Queue of KSEs in ke_ksegrp. */ + TAILQ_ENTRY(kse) ke_kgrlist; /* (*) Queue of KSEs in this state. */ + TAILQ_ENTRY(kse) ke_procq; /* (j/z) Run queue. */ + +#define ke_startzero ke_flags + int ke_flags; /* (j) KEF_* flags. */ + struct thread *ke_thread; /* (*) Active associated thread. */ + fixpt_t ke_pctcpu; /* (j) %cpu during p_swtime. */ + u_char ke_oncpu; /* (j) Which cpu we are on. */ + char ke_rqindex; /* (j) Run queue index. */ + enum { + KES_UNUSED = 0x0, + KES_IDLE, + KES_ONRUNQ, + KES_UNQUEUED, /* in transit */ + KES_THREAD /* slaved to thread state */ + } ke_state; /* (j) KSE status. */ +#define ke_endzero ke_dummy + u_char ke_dummy; + int ke_slice; + struct runq *ke_runq; /* The following variables are only used for pctcpu calculation */ - int ske_ltick; /* Last tick that we were running on */ - int ske_ftick; /* First tick that we were running on */ - int ske_ticks; /* Tick count */ + int ke_ltick; /* Last tick that we were running on */ + int ke_ftick; /* First tick that we were running on */ + int ke_ticks; /* Tick count */ /* CPU that we have affinity for. */ - u_char ske_cpu; + u_char ke_cpu; }; -#define ke_slice ke_sched->ske_slice -#define ke_runq ke_sched->ske_runq -#define ke_ltick ke_sched->ske_ltick -#define ke_ftick ke_sched->ske_ftick -#define ke_ticks ke_sched->ske_ticks -#define ke_cpu ke_sched->ske_cpu -#define ke_assign ke_procq.tqe_next + +/* flags kept in ke_flags */ +#define KEF_ASSIGNED 0x00001 /* KSE is being migrated. */ +#define KEF_BOUND 0x00002 /* KSE can not migrate. */ +#define KEF_DIDRUN 0x02000 /* KSE actually ran. */ +#define KEF_EXIT 0x04000 /* KSE is being killed. */ + +#define FIRST_KSE_IN_KSEGRP(kg) TAILQ_FIRST(&(kg)->kg_kseq) +#define FIRST_KSE_IN_PROC(p) FIRST_KSE_IN_KSEGRP(FIRST_KSEGRP_IN_PROC(p)) + +static struct kse kse0; /* Primary kse in proc0. */ +static struct kse *kse_alloc(void); +static void kse_free(struct kse *ke); +static void kse_stash(struct kse *ke); +static void kse_unlink(struct kse *ke); +static void kse_reassign(struct kse *ke); +static void kse_link(struct kse *ke, struct ksegrp *kg); -#define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */ -#define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */ +/* + * Scheduler specific extensions to various structures. + */ struct kg_sched { int skg_slptime; /* Number of ticks we vol. slept */ int skg_runtime; /* Number of ticks we were running */ + TAILQ_HEAD(, kse) skg_kseq; /* (ke_kglist) All KSEs. */ + TAILQ_HEAD(, kse) skg_iq; /* (ke_kgrlist) All idle KSEs. */ + struct thread *skg_last_assigned; /* (j) Last thread assigned */ + /* ( to a KSE). */ + int skg_runq_kses; /* (j) Num KSEs on runq. */ + int skg_idle_kses; /* (j) Num KSEs on iq. */ + int skg_kses; /* (j) Num KSEs in group. */ + int skg_concurrancy; /* (j) desired concurrancy */ + }; #define kg_slptime kg_sched->skg_slptime #define kg_runtime kg_sched->skg_runtime +#define kg_kseq kg_sched->skg_kseq +#define kg_iq kg_sched->skg_iq +#define kg_last_assigned kg_sched->skg_last_assigned +#define kg_runq_kses kg_sched->skg_runq_kses +#define kg_idle_kses kg_sched->skg_idle_kses +#define kg_kses kg_sched->skg_kses + struct td_sched { int std_slptime; + struct kse *std_last_kse; /* (j) Previous value of td_kse. */ + struct kse *std_kse; /* (j) Current KSE if running. */ }; #define td_slptime td_sched->std_slptime +#define td_last_kse td_sched->std_last_kse +#define td_kse td_sched->std_kse struct td_sched td_sched; -struct ke_sched ke_sched; struct kg_sched kg_sched; -struct ke_sched *kse0_sched = &ke_sched; struct kg_sched *ksegrp0_sched = &kg_sched; struct p_sched *proc0_sched = NULL; struct td_sched *thread0_sched = &td_sched; @@ -286,6 +468,31 @@ static void kseq_notify(struct kse *ke, int cpu); static void kseq_assign(struct kseq *); static struct kse *kseq_steal(struct kseq *kseq, int stealidle); +#endif + +static void runq_add(struct runq *, struct kse *); +static int runq_check(struct runq *); +static struct kse *runq_choose(struct runq *); +static void runq_init(struct runq *); +static void runq_remove(struct runq *, struct kse *); + + +static void setup_runqs(void); +static void roundrobin(void *arg); +static void schedcpu(void); +static void schedcpu_thread(void); +static void maybe_resched(struct thread *td); +static void updatepri(struct ksegrp *kg); +static void resetpriority(struct ksegrp *kg); +static void sched_add(struct thread *td); +static void sched_rem(struct thread *td); +static struct kse * sched_choose(void); +static void adjustrunqueue( struct thread *td, int newpri) ; + +static void sched_fork_kse(struct thread *td, struct kse *child); +static void sched_exit_kse(struct kse *ke, struct thread *td); + +#ifdef SMP /* * On P4 Xeons the round-robin interrupt delivery is broken. As a result of * this, we can't pin interrupts to the cpu that they were delivered to, @@ -301,6 +508,7 @@ #endif /* !__i386__ */ #endif +#define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start)) void kseq_print(int cpu) { @@ -1123,7 +1331,7 @@ } void -sched_switch(struct thread *td) +sched_switch(struct thread *td , int flags) { struct thread *newtd; struct kse *ke; @@ -1158,7 +1366,7 @@ kse_reassign(ke); } } - newtd = choosethread(); + newtd = choosethread(flags); if (td != newtd) cpu_switch(td, newtd); sched_lock.mtx_lock = (uintptr_t)td; @@ -1247,20 +1455,25 @@ * priority. */ void -sched_fork(struct proc *p, struct proc *p1) +sched_fork(struct thread *td, struct proc *p1) { + struct proc *p; + + p = td->td_proc; + mtx_assert(&sched_lock, MA_OWNED); p1->p_nice = p->p_nice; - sched_fork_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(p1)); - sched_fork_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(p1)); - sched_fork_thread(FIRST_THREAD_IN_PROC(p), FIRST_THREAD_IN_PROC(p1)); + sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(p1)); + sched_fork_kse(td, FIRST_KSE_IN_PROC(p1)); + sched_fork_thread(td, FIRST_THREAD_IN_PROC(p1)); } -void -sched_fork_kse(struct kse *ke, struct kse *child) +static void +sched_fork_kse(struct thread *td, struct kse *child) { + struct kse *ke = td->td_kse; child->ke_slice = 1; /* Attempt to quickly learn interactivity. */ child->ke_cpu = ke->ke_cpu; @@ -1273,8 +1486,10 @@ } void -sched_fork_ksegrp(struct ksegrp *kg, struct ksegrp *child) +sched_fork_ksegrp(struct thread *td, struct ksegrp *child) { + struct ksegrp *kg = td->td_ksegrp; + PROC_LOCK_ASSERT(child->kg_proc, MA_OWNED); child->kg_slptime = kg->kg_slptime; @@ -1290,11 +1505,6 @@ } void -sched_fork_thread(struct thread *td, struct thread *child) -{ -} - -void sched_class(struct ksegrp *kg, int class) { struct kseq *kseq; @@ -1348,29 +1558,31 @@ * Return some of the child's priority and interactivity to the parent. */ void -sched_exit(struct proc *p, struct proc *child) +sched_exit(struct proc *p, struct thread *td ) { mtx_assert(&sched_lock, MA_OWNED); - sched_exit_kse(FIRST_KSE_IN_PROC(p), FIRST_KSE_IN_PROC(child)); - sched_exit_ksegrp(FIRST_KSEGRP_IN_PROC(p), FIRST_KSEGRP_IN_PROC(child)); + sched_exit_kse(FIRST_KSE_IN_PROC(p), td); + sched_exit_ksegrp(p, td); } void -sched_exit_kse(struct kse *ke, struct kse *child) +sched_exit_kse(struct kse *ke, struct thread *td) { - kseq_load_rem(KSEQ_CPU(child->ke_cpu), child); + kseq_load_rem(KSEQ_CPU(child->ke_cpu), td->td_kse); } void -sched_exit_ksegrp(struct ksegrp *kg, struct ksegrp *child) +sched_exit_ksegrp(struct proc *p, struct thread *td) { - /* kg->kg_slptime += child->kg_slptime; */ - kg->kg_runtime += child->kg_runtime; + struct ksegrp *kg = FIRST_KSEGRP_IN_PROC(p); + + /* kg->kg_slptime += td->td_ksegrp->kg_slptime; */ + kg->kg_runtime += td->td_ksegrp->kg_runtime; sched_interact_update(kg); } void -sched_exit_thread(struct thread *td, struct thread *child) +sched_exit_thread(struct proc *p, struct thread *child) { } @@ -1726,12 +1938,6 @@ } int -sched_sizeof_kse(void) -{ - return (sizeof(struct kse) + sizeof(struct ke_sched)); -} - -int sched_sizeof_ksegrp(void) { return (sizeof(struct ksegrp) + sizeof(struct kg_sched)); @@ -1748,3 +1954,1062 @@ { return (sizeof(struct thread) + sizeof(struct td_sched)); } + +/* + * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*** +Here is the logic.. + +If there are N processors, then there are at most N KSEs (kernel +schedulable entities) working to process threads that belong to a +KSEGROUP (kg). If there are X of these KSEs actually running at the +moment in question, then there are at most M (N-X) of these KSEs on +the run queue, as running KSEs are not on the queue. + +Runnable threads are queued off the KSEGROUP in priority order. +If there are M or more threads runnable, the top M threads +(by priority) are 'preassigned' to the M KSEs not running. The KSEs take +their priority from those threads and are put on the run queue. + +The last thread that had a priority high enough to have a KSE associated +with it, AND IS ON THE RUN QUEUE is pointed to by +kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs +assigned as all the available KSEs are activly running, or because there +are no threads queued, that pointer is NULL. + +When a KSE is removed from the run queue to become runnable, we know +it was associated with the highest priority thread in the queue (at the head +of the queue). If it is also the last assigned we know M was 1 and must +now be 0. Since the thread is no longer queued that pointer must be +removed from it. Since we know there were no more KSEs available, +(M was 1 and is now 0) and since we are not FREEING our KSE +but using it, we know there are STILL no more KSEs available, we can prove +that the next thread in the ksegrp list will not have a KSE to assign to +it, so we can show that the pointer must be made 'invalid' (NULL). + +The pointer exists so that when a new thread is made runnable, it can +have its priority compared with the last assigned thread to see if +it should 'steal' its KSE or not.. i.e. is it 'earlier' +on the list than that thread or later.. If it's earlier, then the KSE is +removed from the last assigned (which is now not assigned a KSE) +and reassigned to the new thread, which is placed earlier in the list. +The pointer is then backed up to the previous thread (which may or may not +be the new thread). + +When a thread sleeps or is removed, the KSE becomes available and if there +are queued threads that are not assigned KSEs, the highest priority one of +them is assigned the KSE, which is then placed back on the run queue at +the approipriate place, and the kg->kg_last_assigned pointer is adjusted down +to point to it. + +The following diagram shows 2 KSEs and 3 threads from a single process. + + RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads) + \ \____ + \ \ + KSEGROUP---thread--thread--thread (queued in priority order) + \ / + \_______________/ + (last_assigned) + +The result of this scheme is that the M available KSEs are always +queued at the priorities they have inherrited from the M highest priority +threads for that KSEGROUP. If this situation changes, the KSEs are +reassigned to keep this true. +***/ + + +CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); + + +/* END */ + +void +sched_thread_exit(struct thread *td) +{ + struct kse *ke; + + ke = td->td_kse; + + if ((td->td_proc->p_flag & P_SA) && ke != NULL) { + ke->ke_thread = NULL; + td->td_kse = NULL; + kse_reassign(ke); + } + if ((td->td_proc->p_flag & P_NOLOAD) == 0) + sched_tdcnt--; + +} + +/* + * special version of the above for thr.. + * work towards merging them. + * Special code for thr library thread + * Called from: + * thr_exit1() + */ +void +sched_thr_exit(struct thread *td) +{ + struct kse *ke; + + ke = td->td_kse; + + if ((td->td_proc->p_flag & P_NOLOAD) == 0) + sched_tdcnt--; + + /* td is about to be freed, but keep it clean */ + td->td_kse = NULL; + td->td_last_kse = NULL; + kse_unlink(ke); /* also frees it */ +} + + +/* + * Allocate any resources the scheduler needs to allocate or set up + * on a new process at fork() time. + * Called from: + * fork1() + */ +void +sched_fork(struct thread *td, struct proc *child) +{ + struct thread *newtd; + struct kse *newke; + + newtd = FIRST_THREAD_IN_PROC(child); + newke = FIRST_KSE_IN_PROC(child); + bzero(&newke->ke_startzero, + (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); + newke->ke_state = KES_THREAD; + newke->ke_cpticks = 0; + sched_fork_ksegrp(td, FIRST_KSEGRP_IN_PROC(child)); + newke->ke_thread = newtd; + newtd->td_kse = newke; +} + +static uma_zone_t kse_zone; + +struct kse kse0; +static struct kg_sched kg_sched0; +static struct td_sched td_sched0; + + +extern struct mtx kse_zombie_lock; +TAILQ_HEAD(, kse) zombie_kses = TAILQ_HEAD_INITIALIZER(zombie_kses); + +/* + * Occasionally the scheduler may need to do some GC.. + * Called from: + * thread_reap() + */ +void +sched_GC(void) +{ + struct kse *ke_first, *ke_next; + + if (!TAILQ_EMPTY(&zombie_kses)) { + mtx_lock_spin(&kse_zombie_lock); + ke_first = TAILQ_FIRST(&zombie_kses); + if (ke_first) + TAILQ_INIT(&zombie_kses); + mtx_unlock_spin(&kse_zombie_lock); + while (ke_first) { + ke_next = TAILQ_NEXT(ke_first, ke_procq); + kse_free(ke_first); + ke_first = ke_next; + } + } +} + +/* + * Very early in the boot some setup of scheduler-specific + * parts of proc0 and of soem scheduler resources needs to be done. + * Called from: + * proc0_init() + */ +void +schedinit(void) +{ + /* + * Set up the scheduler specific parts of proc0. + */ + ksegrp0.kg_sched = &kg_sched0; + proc0.p_sched = NULL; /* XXX */ + thread0.td_sched = &td_sched0; + + /* + * and link in our own per scheduler struct + */ + kse_link(&kse0, &ksegrp0); + /* + * and set it up as if BOUND and running + */ + kse0.ke_thread = &thread0; + thread0.td_kse = &kse0; /* we are running */ + kse0.ke_state = KES_THREAD; + + kse_zone = uma_zcreate("KSE", sizeof (struct kse), + NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); +} + +/* + * for now have special thr code + * later on, clean these up into common code. + * Called from: + * thr_create() + */ +int +sched_thr_newthread(struct thread *td, struct thread *newtd, int flags) +{ + struct kse *newke; + /* Initialize our kse structure. */ + newke = kse_alloc(); + bzero(&newke->ke_startzero, + RANGEOF(struct kse, ke_startzero, ke_endzero)); + + /* Link the thread and kse into the ksegrp and make it runnable. */ + mtx_lock_spin(&sched_lock); + + thread_link(newtd, td->td_ksegrp); + kse_link(newke, td->td_ksegrp); + + /* Bind this thread and kse together. */ + newtd->td_kse = newke; + newke->ke_thread = newtd; + bzero(&newke->ke_startzero, + (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); + newke->ke_state = KES_THREAD; + newke->ke_cpticks = 0; + sched_fork_kse(td->td_kse, newke); + + TD_SET_CAN_RUN(newtd); + if ((flags & THR_SUSPENDED) == 0) + setrunqueue(newtd); + + mtx_unlock_spin(&sched_lock); + return (0); /* the API could fail but not in this case */ +} + +/***************************** + * KSE zone/allocation methods. + */ +/* + * Allocate a kse. + */ +static struct kse * +kse_alloc(void) +{ + return (uma_zalloc(kse_zone, M_WAITOK)); +} + +/* + * Deallocate a kse. + */ +static void +kse_free(struct kse *td) +{ + uma_zfree(kse_zone, td); +} + +/* + * Stash an embarasingly extra kse into the zombie kse queue. + * Called from: + * kse_unlink() (local) + */ +static void +kse_stash(struct kse *ke) +{ + mtx_lock_spin(&kse_zombie_lock); + TAILQ_INSERT_HEAD(&zombie_kses, ke, ke_procq); + mtx_unlock_spin(&kse_zombie_lock); +} + +/* + * KSE is linked into kse group. + * Called from: + * sched_newproc() (local) + * sched_thr_newthread() (local) + * schedinit() (local) + * sched_set_concurrancy() (local) + * + */ +static void +kse_link( struct kse *ke, struct ksegrp *kg) +{ + TAILQ_INSERT_HEAD(&kg->kg_kseq, ke, ke_kglist); + kg->kg_kses++; + ke->ke_state = KES_UNQUEUED; + ke->ke_proc = kg->kg_proc; /* really just a shortcut */ + ke->ke_ksegrp = kg; + ke->ke_thread = NULL; + ke->ke_oncpu = NOCPU; + ke->ke_flags = 0; +} + +/* + * Allocate scheduler specific per-process resources. + * The thread and ksegrp have already been linked in. + * Called from: + * proc_init() (UMA init method) + */ +int +sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td) +{ + struct kse *ke; + + /* + * For a new process, allocate a single KSE to the ksegrp. + */ + ke = kse_alloc(); + if (ke) { + kse_link(ke, kg); + td->td_kse = ke; + ke->ke_thread = td; + return (0); + } + return (ENOMEM ); +} + +/* + * Ksegrp is being either created or recycled. + * Fix up the per-scheduler resources associated with it. + * Called from: + * ksegrp_dtor() + * ksegrp_initi() + */ +void +sched_init_ksegrp(struct ksegrp *kg) +{ + + TAILQ_INIT(&kg->kg_kseq); /* all kses in ksegrp */ + TAILQ_INIT(&kg->kg_iq); /* all idle kses in ksegrp */ + kg->kg_kses = 0; + kg->kg_runq_kses = 0; /* XXXKSE change name */ + kg->kg_idle_kses = 0; +} + +/* + * thread is being either created or recycled. + * Fix up the per-scheduler resources associated with it. + * Called from: + * thread_dtor() + * thread_initi() + */ +/* Assumes td->td_sched is already set up */ +void +sched_init_thread(struct thread *td) +{ + td->td_last_kse = NULL; + td->td_kse = NULL; +} + + +/* + * code to take the per-scheduler KSE structure + * off the ksegrp it is hanging off and free it + * Called from: + * sched_destroyproc() + * sched_thr_exit() + * sched_set_concurrancy() via REDUCE_KSES() + * kse_reassign() via REDUCE_KSES() + */ +static void +kse_unlink(struct kse *ke) +{ + struct ksegrp *kg; + + mtx_assert(&sched_lock, MA_OWNED); + kg = ke->ke_ksegrp; + TAILQ_REMOVE(&kg->kg_kseq, ke, ke_kglist); + if (ke->ke_state == KES_IDLE) { + TAILQ_REMOVE(&kg->kg_iq, ke, ke_kgrlist); + kg->kg_idle_kses--; + } + /* + * Aggregate stats from the KSE + * ## none yet ## + */ + + kse_stash(ke); +} + +/* + * Whenever we have idle KSEs and there are too many for the concurrancy, + * then free as many as we can. Don't free too many if we have threads + * to run/kill. + */ +#define REDUCE_KSES(kg, skg) \ +do { \ + while ((skg->skg_concurrancy < skg->skg_kses) && \ + (skg->skg_idle_kses > 0) && \ + (skg->skg_kses > kg->kg_numthreads)) { \ + kse_unlink(TAILQ_FIRST(&skg->skg_iq)); \ + } \ +} while (0) + +/* + * Called by the uma process fini routine.. + * undo anything we may have done in the uma_init method. + * Panic if it's not all 1:1:1:1 + * Called from: + * proc_fini() (UMA method) + */ +void +sched_destroyproc(struct proc *p) +{ + struct ksegrp *kg; + struct kg_sched *skg; + + KASSERT((p->p_numthreads == 1), ("Cached proc with > 1 thread ")); + KASSERT((p->p_numksegrps == 1), ("Cached proc with > 1 ksegrp ")); + + kg = FIRST_KSEGRP_IN_PROC(p); + + KASSERT((kg->kg_kses == 1), ("Cached proc with > 1 kse ")); + + skg = kg->kg_sched; + kse_unlink(TAILQ_FIRST(&skg->skg_iq)); \ +} + +/* + * (Re) assign resources to allow the ksegrp to implement + * teh requested concurrancy. At this time it means allocating + * or freeing KSE structures. + * Called from: + * kern_execve() (reverting to non threaded) + * kern_exit() (reverting to non threaded) + * thread_exit() (during removal of ksegrp) + * sched_exit_ksegrp() (local) + * kse_exit() (decreasing) + * kse_create() (increasing) + */ +void +sched_set_concurrancy(struct ksegrp *kg, int concurrancy) +{ + struct kse *newke; + struct kg_sched *skg; + + skg = kg->kg_sched; + skg->skg_concurrancy = concurrancy; + REDUCE_KSES(kg, skg); + while (skg->skg_kses < skg->skg_concurrancy) { + newke = kse_alloc(); + bzero(&newke->ke_startzero, RANGEOF(struct kse, + ke_startzero, ke_endzero)); +#if 0 + mtx_lock_spin(&sched_lock); + bcopy(&ke->ke_startcopy, &newke->ke_startcopy, + RANGEOF(struct kse, ke_startcopy, ke_endcopy)); + mtx_unlock_spin(&sched_lock); +#endif + mtx_lock_spin(&sched_lock); + kse_link(newke, kg); + bzero(&newke->ke_startzero, + (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero)); + newke->ke_state = KES_THREAD; + newke->ke_cpticks = 0; + /* Add engine */ + kse_reassign(newke); + mtx_unlock_spin(&sched_lock); + } +} + + +CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); + +#if 0 +static void runq_readjust(struct runq *rq, struct kse *ke); +#endif +/* + * Select the KSE that will be run next. From that find the thread, and + * remove it from the KSEGRP's run queue. If there is thread clustering, + * this will be what does it. + * XXX Change to take an argument indicating + * if the switch is voluntary or involuntary. + * Called from: + * thr_exit1() + * thread_exit() + * sched_switch() (local) + * init_secondary() (start up 2ndary processors) + */ +struct thread * +choosethread(int flags) +{ + struct kse *ke; + struct thread *td; + struct ksegrp *kg; + +#if defined(SMP) && (defined(__i386__) || defined(__amd64__)) + if (smp_active == 0 && PCPU_GET(cpuid) != 0) { + /* Shutting down, run idlethread on AP's */ + td = PCPU_GET(idlethread); + ke = td->td_kse; + CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td); + ke->ke_flags |= KEF_DIDRUN; + TD_SET_RUNNING(td); + return (td); + } +#endif + +retry: + kg = curthread->td_ksegrp; +#if 0 + if (flags & SW_VOL) { + if (kg->kg_runnable) { + td = TAILQ_FIRST(&kg->kg_runq); + } + } + if (ke == NULL) +#endif + ke = sched_choose(); + if (ke) { >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200406250732.i5P7WAXp009027>