Date: Mon, 21 Aug 2006 07:30:51 GMT From: Chris Jones <cdjones@FreeBSD.org> To: Perforce Change Reviews <perforce@FreeBSD.org> Subject: PERFORCE change 104659 for review Message-ID: <200608210730.k7L7UpOL037532@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=104659 Change 104659 by cdjones@cdjones-impulse on 2006/08/21 07:30:17 Rename sched_hier.c to sched_4bsd.c Affected files ... .. //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 integrate Differences ... ==== //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 (text+ko) ==== @@ -41,6 +41,7 @@ #include <sys/param.h> #include <sys/systm.h> +#include <sys/jail.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> @@ -176,6 +177,11 @@ static int forward_wakeup(int cpunum); #endif +static uint32_t total_cpu_sched_shares; +static u_int total_est_cpu; +extern struct mtx allprison_mtx; +extern int prisoncount; + static struct kproc_desc sched_kp = { "schedcpu", schedcpu_thread, @@ -289,6 +295,18 @@ &sched_kgfollowons, 0, "number of followons done in a ksegrp"); +static int sched_limitjailcpu = 0; +SYSCTL_INT(_kern_sched, OID_AUTO, limit_jail_cpu, + CTLFLAG_RW, + &sched_limitjailcpu, 0, + "limit jailed process cpu usage"); + +static int sched_unjailedProcessShares = 0; +SYSCTL_INT(_kern_sched, OID_AUTO, system_cpu_shares, + CTLTYPE_INT | CTLFLAG_RW, + &sched_unjailedProcessShares, 0, + "number of shares to allocate to unjailed processes"); + static __inline void sched_load_add(void) { @@ -435,10 +453,23 @@ struct proc *p; struct kse *ke; struct ksegrp *kg; + struct prison *pr; int awake, realstathz; realstathz = stathz ? stathz : hz; + /* + * Need to acquire each jail's mutex and hold throughout to keep + * everything out while we recalculate per-jail CPU usage. + * TODO: this is excessively icky. + */ sx_slock(&allproc_lock); + mtx_lock(&allprison_mtx); + if (prisoncount) { + LIST_FOREACH(pr, &allprison, pr_list) { + pr->pr_estcpu = 0; + } + } + total_est_cpu = 0; FOREACH_PROC_IN_SYSTEM(p) { /* * Prevent state changes and protect run queue. @@ -523,6 +554,12 @@ if (kg->kg_slptime > 1) continue; kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); + total_est_cpu += kg->kg_estcpu; + if (sched_limitjailcpu && + NULL != kg->kg_proc->p_ucred && + NULL != kg->kg_proc->p_ucred->cr_prison) + kg->kg_proc->p_ucred->cr_prison->pr_estcpu += + kg->kg_estcpu; resetpriority(kg); FOREACH_THREAD_IN_GROUP(kg, td) { resetpriority_thread(td, kg); @@ -530,6 +567,7 @@ } /* end of ksegrp loop */ mtx_unlock_spin(&sched_lock); } /* end of process loop */ + mtx_unlock(&allprison_mtx); sx_sunlock(&allproc_lock); } @@ -540,8 +578,29 @@ schedcpu_thread(void) { int nowake; + struct prison *pr; + u_int32_t shares = 0; for (;;) { + if (sched_limitjailcpu) { + /* + * Update total jail CPU shares in case they've changed. + * Safe to read pr_sched_shares without mutex because + * in worst case, we get a bogus value which will be + * corrected on the next pass. + * + * TODO: this should be done by forcing a recalculation + * when jail CPU shares are added / changed, rather than + * doing it every secondc. + */ + + shares = sched_unjailedProcessShares; + LIST_FOREACH(pr, &allprison, pr_list) { + shares += pr->pr_sched_shares; + } + total_cpu_sched_shares = shares; + } + schedcpu(); tsleep(&nowake, 0, "-", hz); } @@ -579,12 +638,43 @@ resetpriority(struct ksegrp *kg) { register unsigned int newpriority; + struct prison *pr = NULL; + if (NULL != kg->kg_proc->p_ucred) + pr = kg->kg_proc->p_ucred->cr_prison; if (kg->kg_pri_class == PRI_TIMESHARE) { newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + - NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); - newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), - PRI_MAX_TIMESHARE); + NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); + if (sched_limitjailcpu && NULL != pr) { + /* + * Skew the priority by the jail's share of CPU resources. + * The unjailed processes get half the CPU time. + * + * TODO: this is a hard limit. We should really also have + * soft limits available. Also, the amount of CPU time + * reserved to unjailed processes really should be sysctl'd. + */ + register unsigned int np = newpriority; + register unsigned int skew; + skew = pr->pr_estcpu * total_cpu_sched_shares; + skew /= max(total_est_cpu, 1) * max(pr->pr_sched_shares, 1); + if (skew > 0) { + /* wait your turn until your cpu usage's proportionate */ + newpriority = PRI_MAX_IDLE; + } else { + newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), + PRI_MAX_TIMESHARE); + } + printf("skew KSE %p (%d / %d cpu, %d / %d shares) from %d to %d\n", + &kg, pr->pr_estcpu, total_est_cpu, + pr->pr_sched_shares, + total_cpu_sched_shares, + np, newpriority); + } else { + newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), + PRI_MAX_TIMESHARE); + } + kg->kg_user_pri = newpriority; } }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200608210730.k7L7UpOL037532>