From owner-p4-projects@FreeBSD.ORG Mon Aug 21 07:30:52 2006 Return-Path: X-Original-To: p4-projects@freebsd.org Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 5221616A4E6; Mon, 21 Aug 2006 07:30:52 +0000 (UTC) X-Original-To: perforce@FreeBSD.org Delivered-To: perforce@FreeBSD.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 1323A16A4DE for ; Mon, 21 Aug 2006 07:30:52 +0000 (UTC) (envelope-from cdjones@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [216.136.204.115]) by mx1.FreeBSD.org (Postfix) with ESMTP id C95FA43D6A for ; Mon, 21 Aug 2006 07:30:51 +0000 (GMT) (envelope-from cdjones@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.13.6/8.13.6) with ESMTP id k7L7UpeT037536 for ; Mon, 21 Aug 2006 07:30:51 GMT (envelope-from cdjones@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.13.6/8.13.4/Submit) id k7L7UpOL037532 for perforce@freebsd.org; Mon, 21 Aug 2006 07:30:51 GMT (envelope-from cdjones@FreeBSD.org) Date: Mon, 21 Aug 2006 07:30:51 GMT Message-Id: <200608210730.k7L7UpOL037532@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to cdjones@FreeBSD.org using -f From: Chris Jones To: Perforce Change Reviews Cc: Subject: PERFORCE change 104659 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 21 Aug 2006 07:30:52 -0000 http://perforce.freebsd.org/chv.cgi?CH=104659 Change 104659 by cdjones@cdjones-impulse on 2006/08/21 07:30:17 Rename sched_hier.c to sched_4bsd.c Affected files ... .. //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 integrate Differences ... ==== //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 (text+ko) ==== @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -176,6 +177,11 @@ static int forward_wakeup(int cpunum); #endif +static uint32_t total_cpu_sched_shares; +static u_int total_est_cpu; +extern struct mtx allprison_mtx; +extern int prisoncount; + static struct kproc_desc sched_kp = { "schedcpu", schedcpu_thread, @@ -289,6 +295,18 @@ &sched_kgfollowons, 0, "number of followons done in a ksegrp"); +static int sched_limitjailcpu = 0; +SYSCTL_INT(_kern_sched, OID_AUTO, limit_jail_cpu, + CTLFLAG_RW, + &sched_limitjailcpu, 0, + "limit jailed process cpu usage"); + +static int sched_unjailedProcessShares = 0; +SYSCTL_INT(_kern_sched, OID_AUTO, system_cpu_shares, + CTLTYPE_INT | CTLFLAG_RW, + &sched_unjailedProcessShares, 0, + "number of shares to allocate to unjailed processes"); + static __inline void sched_load_add(void) { @@ -435,10 +453,23 @@ struct proc *p; struct kse *ke; struct ksegrp *kg; + struct prison *pr; int awake, realstathz; realstathz = stathz ? stathz : hz; + /* + * Need to acquire each jail's mutex and hold throughout to keep + * everything out while we recalculate per-jail CPU usage. + * TODO: this is excessively icky. + */ sx_slock(&allproc_lock); + mtx_lock(&allprison_mtx); + if (prisoncount) { + LIST_FOREACH(pr, &allprison, pr_list) { + pr->pr_estcpu = 0; + } + } + total_est_cpu = 0; FOREACH_PROC_IN_SYSTEM(p) { /* * Prevent state changes and protect run queue. @@ -523,6 +554,12 @@ if (kg->kg_slptime > 1) continue; kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu); + total_est_cpu += kg->kg_estcpu; + if (sched_limitjailcpu && + NULL != kg->kg_proc->p_ucred && + NULL != kg->kg_proc->p_ucred->cr_prison) + kg->kg_proc->p_ucred->cr_prison->pr_estcpu += + kg->kg_estcpu; resetpriority(kg); FOREACH_THREAD_IN_GROUP(kg, td) { resetpriority_thread(td, kg); @@ -530,6 +567,7 @@ } /* end of ksegrp loop */ mtx_unlock_spin(&sched_lock); } /* end of process loop */ + mtx_unlock(&allprison_mtx); sx_sunlock(&allproc_lock); } @@ -540,8 +578,29 @@ schedcpu_thread(void) { int nowake; + struct prison *pr; + u_int32_t shares = 0; for (;;) { + if (sched_limitjailcpu) { + /* + * Update total jail CPU shares in case they've changed. + * Safe to read pr_sched_shares without mutex because + * in worst case, we get a bogus value which will be + * corrected on the next pass. + * + * TODO: this should be done by forcing a recalculation + * when jail CPU shares are added / changed, rather than + * doing it every secondc. + */ + + shares = sched_unjailedProcessShares; + LIST_FOREACH(pr, &allprison, pr_list) { + shares += pr->pr_sched_shares; + } + total_cpu_sched_shares = shares; + } + schedcpu(); tsleep(&nowake, 0, "-", hz); } @@ -579,12 +638,43 @@ resetpriority(struct ksegrp *kg) { register unsigned int newpriority; + struct prison *pr = NULL; + if (NULL != kg->kg_proc->p_ucred) + pr = kg->kg_proc->p_ucred->cr_prison; if (kg->kg_pri_class == PRI_TIMESHARE) { newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT + - NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); - newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), - PRI_MAX_TIMESHARE); + NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN); + if (sched_limitjailcpu && NULL != pr) { + /* + * Skew the priority by the jail's share of CPU resources. + * The unjailed processes get half the CPU time. + * + * TODO: this is a hard limit. We should really also have + * soft limits available. Also, the amount of CPU time + * reserved to unjailed processes really should be sysctl'd. + */ + register unsigned int np = newpriority; + register unsigned int skew; + skew = pr->pr_estcpu * total_cpu_sched_shares; + skew /= max(total_est_cpu, 1) * max(pr->pr_sched_shares, 1); + if (skew > 0) { + /* wait your turn until your cpu usage's proportionate */ + newpriority = PRI_MAX_IDLE; + } else { + newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), + PRI_MAX_TIMESHARE); + } + printf("skew KSE %p (%d / %d cpu, %d / %d shares) from %d to %d\n", + &kg, pr->pr_estcpu, total_est_cpu, + pr->pr_sched_shares, + total_cpu_sched_shares, + np, newpriority); + } else { + newpriority = min(max(newpriority, PRI_MIN_TIMESHARE), + PRI_MAX_TIMESHARE); + } + kg->kg_user_pri = newpriority; } }