Date: Sat, 5 Aug 2006 22:07:41 GMT From: John Birrell <jb@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 103283 for review Message-ID: <200608052207.k75M7fZs055142@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=103283 Change 103283 by jb@jb_freebsd2 on 2006/08/05 22:06:59 Merge KSE back in, but only if the kernel option KSE is defined. Affected files ... .. //depot/projects/dtrace/src/sys/kern/kern_idle.c#3 edit .. //depot/projects/dtrace/src/sys/kern/kern_intr.c#6 edit .. //depot/projects/dtrace/src/sys/kern/kern_mib.c#3 edit .. //depot/projects/dtrace/src/sys/sys/proc.h#11 edit .. //depot/projects/dtrace/src/sys/sys/rtprio.h#3 edit .. //depot/projects/dtrace/src/sys/sys/sched.h#4 edit Differences ... ==== //depot/projects/dtrace/src/sys/kern/kern_idle.c#3 (text+ko) ==== @@ -78,8 +78,13 @@ mtx_lock_spin(&sched_lock); td = FIRST_THREAD_IN_PROC(p); TD_SET_CAN_RUN(td); +#ifdef KSE + td->td_flags |= TDF_IDLETD; + sched_class(td->td_ksegrp, PRI_IDLE); +#else atomic_set_int(&td->td_flags, TDF_IDLETD); sched_class(td, PRI_IDLE); +#endif sched_prio(td, PRI_MAX_IDLE); mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); @@ -118,8 +123,12 @@ #ifdef SMP idle_cpus_mask &= ~mycpu; #endif +#ifdef KSE + mi_switch(SW_VOL, NULL); +#else if ((td = choosethread()) != curthread) sched_switch(curthread, td, SW_VOL); +#endif #ifdef SMP idle_cpus_mask |= mycpu; #endif ==== //depot/projects/dtrace/src/sys/kern/kern_intr.c#6 (text+ko) ==== @@ -296,7 +296,11 @@ panic("kthread_create() failed with %d", error); td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */ mtx_lock_spin(&sched_lock); +#ifdef KSE + td->td_ksegrp->kg_pri_class = PRI_ITHD; +#else td->td_pri_class = PRI_ITHD; +#endif TD_SET_IWAIT(td); mtx_unlock_spin(&sched_lock); td->td_pflags |= TDP_ITHREAD; @@ -531,7 +535,11 @@ CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, p->p_comm); TD_CLR_IWAIT(td); +#ifdef KSE + setrunqueue(td, SRQ_INTR); +#else sched_run_ithread(td); +#endif } else { CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", __func__, p->p_pid, p->p_comm, it->it_need, td->td_state); ==== //depot/projects/dtrace/src/sys/kern/kern_mib.c#3 (text+ko) ==== @@ -146,7 +146,7 @@ SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD, 0, BYTE_ORDER, "System byte order"); -SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD | CTLFLAG_MPSAFE, +SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, 0, PAGE_SIZE, "System memory page size"); static int ==== //depot/projects/dtrace/src/sys/sys/proc.h#11 (text+ko) ==== @@ -152,23 +152,120 @@ */ struct auditinfo; struct kaudit_record; +#ifdef KSE +struct kg_sched; +#else struct td_sched; +#endif struct nlminfo; struct kaioinfo; struct p_sched; struct proc; struct sleepqueue; +#ifdef KSE +struct td_sched; +#else struct thread; +#endif struct trapframe; struct turnstile; struct mqueue_notifier; +#ifdef KSE +/* + * Here we define the three structures used for process information. + * + * The first is the thread. It might be thought of as a "Kernel + * Schedulable Entity Context". + * This structure contains all the information as to where a thread of + * execution is now, or was when it was suspended, why it was suspended, + * and anything else that will be needed to restart it when it is + * rescheduled. Always associated with a KSE when running, but can be + * reassigned to an equivalent KSE when being restarted for + * load balancing. Each of these is associated with a kernel stack + * and a pcb. + * + * It is important to remember that a particular thread structure may only + * exist as long as the system call or kernel entrance (e.g. by pagefault) + * which it is currently executing. It should therefore NEVER be referenced + * by pointers in long lived structures that live longer than a single + * request. If several threads complete their work at the same time, + * they will all rewind their stacks to the user boundary, report their + * completion state, and all but one will be freed. That last one will + * be kept to provide a kernel stack and pcb for the NEXT syscall or kernel + * entrance (basically to save freeing and then re-allocating it). The existing + * thread keeps a cached spare thread available to allow it to quickly + * get one when it needs a new one. There is also a system + * cache of free threads. Threads have priority and partake in priority + * inheritance schemes. + */ +struct thread; + +/* + * The KSEGRP is allocated resources across a number of CPUs. + * (Including a number of CPUxQUANTA. It parcels these QUANTA up among + * its threads, each of which should be running in a different CPU. + * BASE priority and total available quanta are properties of a KSEGRP. + * Multiple KSEGRPs in a single process compete against each other + * for total quanta in the same way that a forked child competes against + * it's parent process. + */ +struct ksegrp; + /* + * A process is the owner of all system resources allocated to a task + * except CPU quanta. + * All KSEGs under one process see, and have the same access to, these + * resources (e.g. files, memory, sockets, credential, kqueues). + * A process may compete for CPU cycles on the same basis as a + * forked process cluster by spawning several KSEGRPs. + */ +struct proc; + +/*************** + * In pictures: + With a single run queue used by all processors: + + RUNQ: --->KSE---KSE--... SLEEPQ:[]---THREAD---THREAD---THREAD + \ \ []---THREAD + KSEG---THREAD--THREAD--THREAD [] + []---THREAD---THREAD + + (processors run THREADs from the KSEG until they are exhausted or + the KSEG exhausts its quantum) + +With PER-CPU run queues: +KSEs on the separate run queues directly +They would be given priorities calculated from the KSEG. + + * + *****************/ +#endif + +#ifdef KSE +/* + * Kernel runnable context (thread). + * This is what is put to sleep and reactivated. + * The first KSE available in the correct group will run this thread. + * If several are available, use the one on the same CPU as last time. + * When waiting to be run, threads are hung off the KSEGRP in priority order. + * With N runnable and queued KSEs in the KSEGRP, the first N threads + * are linked to them. Other threads are not yet assigned. + */ +#else +/* * Thread context. Processes may have multiple threads. */ +#endif struct thread { struct proc *td_proc; /* (*) Associated process. */ +#ifdef KSE + struct ksegrp *td_ksegrp; /* (*) Associated KSEG. */ +#endif TAILQ_ENTRY(thread) td_plist; /* (*) All threads in this proc. */ +#ifdef KSE + TAILQ_ENTRY(thread) td_kglist; /* (*) All threads in this ksegrp. */ +#endif /* The two queues below should someday be merged. */ TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. */ @@ -202,9 +299,17 @@ struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */ int td_intr_nesting_level; /* (k) Interrupt recursion. */ int td_pinned; /* (k) Temporary cpu pin count. */ +#ifdef KSE + struct kse_thr_mailbox *td_mailbox; /* (*) Userland mailbox address. */ +#endif struct ucred *td_ucred; /* (k) Reference to credentials. */ +#ifdef KSE + struct thread *td_standin; /* (k + a) Use this for an upcall. */ + struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */ +#else u_int td_estcpu; /* (j) Sum of the same field in KSEs. */ u_int td_slptime; /* (j) How long completely blocked. */ +#endif u_int td_pticks; /* (k) Statclock hits for profiling */ u_int td_sticks; /* (k) Statclock hits in system mode. */ u_int td_iticks; /* (k) Statclock hits in intr mode. */ @@ -216,6 +321,9 @@ sigset_t td_sigmask; /* (c) Current signal mask. */ volatile u_int td_generation; /* (k) For detection of preemption */ stack_t td_sigstk; /* (k) Stack ptr and on-stack flag. */ +#ifdef KSE + int td_kflags; /* (c) Flags for KSE threading. */ +#endif int td_xsig; /* (c) Signal for ptrace */ int td_xsig_why; /* (c) reason for ptrace signal PL_EVENT_* */ u_long td_profil_addr; /* (k) Temporary addr until AST. */ @@ -334,15 +442,27 @@ #define TDP_OLDMASK 0x00000001 /* Need to restore mask after suspend. */ #define TDP_INKTR 0x00000002 /* Thread is currently in KTR code. */ #define TDP_INKTRACE 0x00000004 /* Thread is currently in KTRACE code. */ +#ifdef KSE +#define TDP_UPCALLING 0x00000008 /* This thread is doing an upcall. */ +#else /* 0x00000008 */ +#endif #define TDP_COWINPROGRESS 0x00000010 /* Snapshot copy-on-write in progress. */ #define TDP_ALTSTACK 0x00000020 /* Have alternate signal stack. */ #define TDP_DEADLKTREAT 0x00000040 /* Lock aquisition - deadlock treatment. */ +#ifdef KSE +#define TDP_SA 0x00000080 /* A scheduler activation based thread. */ +#else /* 0x00000080 */ +#endif #define TDP_NOSLEEPING 0x00000100 /* Thread is not allowed to sleep on a sq. */ #define TDP_OWEUPC 0x00000200 /* Call addupc() at next AST. */ #define TDP_ITHREAD 0x00000400 /* Thread is an interrupt thread. */ +#ifdef KSE +#define TDP_CAN_UNBIND 0x00000800 /* Only temporarily bound. */ +#else /* 0x00000800 */ +#endif #define TDP_SCHED1 0x00001000 /* Reserved for scheduler private use */ #define TDP_SCHED2 0x00002000 /* Reserved for scheduler private use */ #define TDP_SCHED3 0x00004000 /* Reserved for scheduler private use */ @@ -363,6 +483,19 @@ #define TDI_LOCK 0x0008 /* Stopped on a lock. */ #define TDI_IWAIT 0x0010 /* Awaiting interrupt. */ +#ifdef KSE +/* + * flags (in kflags) related to M:N threading. + */ +#define TDK_KSEREL 0x0001 /* Blocked in msleep on kg->kg_completed. */ +#define TDK_KSERELSIG 0x0002 /* Blocked in msleep on p->p_siglist. */ +#define TDK_WAKEUP 0x0004 /* Thread has been woken by kse_wakeup. */ + +#define TD_CAN_UNBIND(td) \ + (((td)->td_pflags & TDP_CAN_UNBIND) && \ + ((td)->td_upcall != NULL)) +#endif + #define TD_IS_SLEEPING(td) ((td)->td_inhibitors & TDI_SLEEPING) #define TD_ON_SLEEPQ(td) ((td)->td_wchan != NULL) #define TD_IS_SUSPENDED(td) ((td)->td_inhibitors & TDI_SUSPENDED) @@ -408,7 +541,57 @@ #define TD_SET_RUNQ(td) (td)->td_state = TDS_RUNQ #define TD_SET_CAN_RUN(td) (td)->td_state = TDS_CAN_RUN +#ifdef KSE +/* + * An upcall is used when returning to userland. If a thread does not have + * an upcall on return to userland the thread exports its context and exits. + */ +struct kse_upcall { + TAILQ_ENTRY(kse_upcall) ku_link; /* List of upcalls in KSEG. */ + struct ksegrp *ku_ksegrp; /* Associated KSEG. */ + struct thread *ku_owner; /* Owning thread. */ + int ku_flags; /* KUF_* flags. */ + struct kse_mailbox *ku_mailbox; /* Userland mailbox address. */ + stack_t ku_stack; /* Userland upcall stack. */ + void *ku_func; /* Userland upcall function. */ + unsigned int ku_mflags; /* Cached upcall mbox flags. */ +}; + +#define KUF_DOUPCALL 0x00001 /* Do upcall now; don't wait. */ +#define KUF_EXITING 0x00002 /* Upcall structure is exiting. */ + /* + * Kernel-scheduled entity group (KSEG). The scheduler considers each KSEG to + * be an indivisible unit from a time-sharing perspective, though each KSEG may + * contain multiple KSEs. + */ +struct ksegrp { + struct proc *kg_proc; /* (*) Proc that contains this KSEG. */ + TAILQ_ENTRY(ksegrp) kg_ksegrp; /* (*) Queue of KSEGs in kg_proc. */ + TAILQ_HEAD(, thread) kg_threads;/* (td_kglist) All threads. */ + TAILQ_HEAD(, thread) kg_runq; /* (td_runq) waiting RUNNABLE threads */ + TAILQ_HEAD(, kse_upcall) kg_upcalls; /* All upcalls in the group. */ + +#define kg_startzero kg_estcpu + u_int kg_estcpu; /* (j) Sum of the same field in KSEs. */ + u_int kg_slptime; /* (j) How long completely blocked. */ + int kg_numupcalls; /* (j) Num upcalls. */ + int kg_upsleeps; /* (c) Num threads in kse_release(). */ + struct kse_thr_mailbox *kg_completed; /* (c) Completed thread mboxes. */ + int kg_nextupcall; /* (n) Next upcall time. */ + int kg_upquantum; /* (n) Quantum to schedule an upcall. */ +#define kg_endzero kg_pri_class + +#define kg_startcopy kg_endzero + u_char kg_pri_class; /* (j) Scheduling class. */ + u_char kg_user_pri; /* (j) User pri from estcpu and nice. */ +#define kg_endcopy kg_numthreads + int kg_numthreads; /* (j) Num threads in total. */ + struct kg_sched *kg_sched; /* (*) Scheduler-specific data. */ +}; +#endif + +/* * XXX: Does this belong in resource.h or resourcevar.h instead? * Resource usage extension. The times in rusage structs in the kernel are * never up to date. The actual times are kept as runtimes and tick counts @@ -434,6 +617,9 @@ */ struct proc { LIST_ENTRY(proc) p_list; /* (d) List of all processes. */ +#ifdef KSE + TAILQ_HEAD(, ksegrp) p_ksegrps; /* (c)(kg_ksegrp) All KSEGs. */ +#endif TAILQ_HEAD(, thread) p_threads; /* (j)(td_plist) Threads. (shortcut) */ TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */ struct ucred *p_ucred; /* (c) Process owner's identity. */ @@ -496,6 +682,9 @@ int p_suspcount; /* (c) Num threads in suspended mode. */ struct thread *p_xthread; /* (c) Trap thread */ int p_boundary_count;/* (c) Num threads at user boundary */ +#ifdef KSE + struct ksegrp *p_procscopegrp; +#endif int p_pendingcnt; /* how many signals are pending */ struct itimers *p_itimers; /* (c) POSIX interval timers. */ /* End area that is zeroed on creation. */ @@ -516,6 +705,9 @@ u_short p_xstat; /* (c) Exit status; also stop sig. */ struct knlist p_klist; /* (c) Knotes attached to this proc. */ int p_numthreads; /* (j) Number of threads. */ +#ifdef KSE + int p_numksegrps; /* (c) Number of ksegrps. */ +#endif struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ @@ -627,11 +819,22 @@ #define FOREACH_PROC_IN_SYSTEM(p) \ LIST_FOREACH((p), &allproc, p_list) +#ifdef KSE +#define FOREACH_KSEGRP_IN_PROC(p, kg) \ + TAILQ_FOREACH((kg), &(p)->p_ksegrps, kg_ksegrp) +#define FOREACH_THREAD_IN_GROUP(kg, td) \ + TAILQ_FOREACH((td), &(kg)->kg_threads, td_kglist) +#define FOREACH_UPCALL_IN_GROUP(kg, ku) \ + TAILQ_FOREACH((ku), &(kg)->kg_upcalls, ku_link) +#endif #define FOREACH_THREAD_IN_PROC(p, td) \ TAILQ_FOREACH((td), &(p)->p_threads, td_plist) /* XXXKSE the following lines should probably only be used in 1:1 code: */ #define FIRST_THREAD_IN_PROC(p) TAILQ_FIRST(&(p)->p_threads) +#ifdef KSE +#define FIRST_KSEGRP_IN_PROC(p) TAILQ_FIRST(&(p)->p_ksegrps) +#endif /* * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t, @@ -742,6 +945,9 @@ extern struct sx allproc_lock; extern struct sx proctree_lock; extern struct mtx ppeers_lock; +#ifdef KSE +extern struct ksegrp ksegrp0; /* Primary ksegrp in proc0. */ +#endif extern struct proc proc0; /* Process slot for swapper. */ extern struct thread thread0; /* Primary thread in proc0. */ extern struct vmspace vmspace0; /* VM space for proc0. */ @@ -792,7 +998,11 @@ void pargs_free(struct pargs *pa); void pargs_hold(struct pargs *pa); void procinit(void); +#ifdef KSE +void proc_linkup(struct proc *p, struct ksegrp *kg, struct thread *td); +#else void proc_linkup(struct proc *p, struct thread *td); +#endif void proc_reparent(struct proc *child, struct proc *newparent); struct pstats *pstats_alloc(void); void pstats_fork(struct pstats *src, struct pstats *dst); @@ -820,6 +1030,11 @@ void cpu_set_fork_handler(struct thread *, void (*)(void *), void *); /* New in KSE. */ +#ifdef KSE +struct ksegrp *ksegrp_alloc(void); +void ksegrp_free(struct ksegrp *kg); +void ksegrp_stash(struct ksegrp *kg); +#endif void kse_GC(void); void kseinit(void); void cpu_set_upcall(struct thread *td, struct thread *td0); @@ -830,13 +1045,24 @@ void cpu_thread_setup(struct thread *td); void cpu_thread_swapin(struct thread *); void cpu_thread_swapout(struct thread *); +#ifdef KSE +void ksegrp_link(struct ksegrp *kg, struct proc *p); +void ksegrp_unlink(struct ksegrp *kg); +#endif struct thread *thread_alloc(void); void thread_continued(struct proc *p); void thread_exit(void) __dead2; int thread_export_context(struct thread *td, int willexit); void thread_free(struct thread *td); +#ifdef KSE +void thread_link(struct thread *td, struct ksegrp *kg); +#else void thread_link(struct thread *td, struct proc *p); +#endif void thread_reap(void); +#ifdef KSE +struct thread *thread_schedule_upcall(struct thread *td, struct kse_upcall *ku); +#endif void thread_signal_add(struct thread *td, ksiginfo_t *); int thread_single(int how); void thread_single_end(void); @@ -854,9 +1080,21 @@ void thread_unsuspend(struct proc *p); void thread_unsuspend_one(struct thread *td); void thread_unthread(struct thread *td); +#ifdef KSE +int thread_userret(struct thread *td, struct trapframe *frame); +void thread_user_enter(struct thread *td); +#endif void thread_wait(struct proc *p); struct thread *thread_find(struct proc *p, lwpid_t tid); void thr_exit1(void); +#ifdef KSE +struct kse_upcall *upcall_alloc(void); +void upcall_free(struct kse_upcall *ku); +void upcall_link(struct kse_upcall *ku, struct ksegrp *kg); +void upcall_unlink(struct kse_upcall *ku); +void upcall_remove(struct thread *td); +void upcall_stash(struct kse_upcall *ke); +#endif #endif /* _KERNEL */ ==== //depot/projects/dtrace/src/sys/sys/rtprio.h#3 (text+ko) ==== @@ -75,11 +75,17 @@ }; #ifdef _KERNEL +#ifdef KSE +struct ksegrp; +int rtp_to_pri(struct rtprio *, struct ksegrp *); +void pri_to_rtp(struct ksegrp *, struct rtprio *); +#else struct thread; int rtp_to_pri(struct rtprio *, struct thread *); void pri_to_rtp(struct thread *, struct rtprio *); #endif #endif +#endif #ifndef _KERNEL #include <sys/cdefs.h> ==== //depot/projects/dtrace/src/sys/sys/sched.h#4 (text+ko) ==== @@ -52,19 +52,32 @@ * KSE Groups contain scheduling priority information. They record the * behavior of groups of KSEs and threads. */ +#ifdef KSE +void sched_class(struct ksegrp *kg, int class); +void sched_exit_ksegrp(struct ksegrp *kg, struct thread *childtd); +void sched_fork_ksegrp(struct thread *td, struct ksegrp *child); +#else void sched_class(struct thread *td, int class); +#endif void sched_nice(struct proc *p, int nice); /* * Threads are switched in and out, block on resources, have temporary * priorities inherited from their ksegs, and use up cpu time. */ +#ifdef KSE +void sched_exit_thread(struct thread *td, struct thread *child); +void sched_fork_thread(struct thread *td, struct thread *child); +#endif fixpt_t sched_pctcpu(struct thread *td); void sched_prio(struct thread *td, u_char prio); void sched_lend_prio(struct thread *td, u_char prio); void sched_sleep(struct thread *td); void sched_switch(struct thread *td, struct thread *newtd, int flags); void sched_unlend_prio(struct thread *td, u_char prio); +#ifdef KSE +void sched_userret(struct thread *td); +#endif void sched_wakeup(struct thread *td); /* @@ -75,7 +88,9 @@ void sched_rem(struct thread *td); void sched_tick(void); void sched_relinquish(struct thread *td); +#ifndef KSE void sched_run_ithread(struct thread *td); +#endif /* * Binding makes cpu affinity permanent while pinning is used to temporarily @@ -91,6 +106,9 @@ * These procedures tell the process data structure allocation code how * many bytes to actually allocate. */ +#ifdef KSE +int sched_sizeof_ksegrp(void); +#endif int sched_sizeof_proc(void); int sched_sizeof_thread(void); @@ -108,7 +126,15 @@ /* temporarily here */ void schedinit(void); +#ifdef KSE +void sched_init_concurrency(struct ksegrp *kg); +void sched_set_concurrency(struct ksegrp *kg, int cuncurrency); +#endif void sched_schedinit(void); +#ifdef KSE +void sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td); +void sched_thread_exit(struct thread *td); +#endif void sched_newthread(struct thread *td); #endif /* !_SYS_SCHED_H_ */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200608052207.k75M7fZs055142>