Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 21 Aug 2006 07:30:51 GMT
From:      Chris Jones <cdjones@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 104659 for review
Message-ID:  <200608210730.k7L7UpOL037532@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=104659

Change 104659 by cdjones@cdjones-impulse on 2006/08/21 07:30:17

	Rename sched_hier.c to sched_4bsd.c

Affected files ...

.. //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 integrate

Differences ...

==== //depot/projects/soc2006/cdjones_jail/src/sys/kern/sched_4bsd.c#6 (text+ko) ====

@@ -41,6 +41,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -176,6 +177,11 @@
 static int	forward_wakeup(int  cpunum);
 #endif
 
+static uint32_t total_cpu_sched_shares;
+static u_int total_est_cpu;
+extern struct mtx allprison_mtx;
+extern int prisoncount;
+
 static struct kproc_desc sched_kp = {
         "schedcpu",
         schedcpu_thread,
@@ -289,6 +295,18 @@
 	   &sched_kgfollowons, 0,
 	   "number of followons done in a ksegrp");
 
+static int sched_limitjailcpu = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, limit_jail_cpu, 
+	   CTLFLAG_RW, 
+	   &sched_limitjailcpu, 0,
+	   "limit jailed process cpu usage");
+
+static int sched_unjailedProcessShares = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, system_cpu_shares, 
+	   CTLTYPE_INT | CTLFLAG_RW,
+	   &sched_unjailedProcessShares, 0,
+	   "number of shares to allocate to unjailed processes");
+
 static __inline void
 sched_load_add(void)
 {
@@ -435,10 +453,23 @@
 	struct proc *p;
 	struct kse *ke;
 	struct ksegrp *kg;
+	struct prison *pr;
 	int awake, realstathz;
 
 	realstathz = stathz ? stathz : hz;
+	/* 
+	 * Need to acquire each jail's mutex and hold throughout to keep 
+	 * everything out while we recalculate per-jail CPU usage.
+	 * TODO: this is excessively icky.
+	 */
 	sx_slock(&allproc_lock);
+	mtx_lock(&allprison_mtx);
+	if (prisoncount) {
+		LIST_FOREACH(pr, &allprison, pr_list) {
+			pr->pr_estcpu = 0;
+		}
+	}
+	total_est_cpu = 0;
 	FOREACH_PROC_IN_SYSTEM(p) {
 		/*
 		 * Prevent state changes and protect run queue.
@@ -523,6 +554,12 @@
 			if (kg->kg_slptime > 1)
 				continue;
 			kg->kg_estcpu = decay_cpu(loadfac, kg->kg_estcpu);
+			total_est_cpu += kg->kg_estcpu;
+			if (sched_limitjailcpu && 
+			    NULL != kg->kg_proc->p_ucred &&
+			    NULL != kg->kg_proc->p_ucred->cr_prison)
+				kg->kg_proc->p_ucred->cr_prison->pr_estcpu += 
+					kg->kg_estcpu;
 		      	resetpriority(kg);
 			FOREACH_THREAD_IN_GROUP(kg, td) {
 				resetpriority_thread(td, kg);
@@ -530,6 +567,7 @@
 		} /* end of ksegrp loop */
 		mtx_unlock_spin(&sched_lock);
 	} /* end of process loop */
+	mtx_unlock(&allprison_mtx);
 	sx_sunlock(&allproc_lock);
 }
 
@@ -540,8 +578,29 @@
 schedcpu_thread(void)
 {
 	int nowake;
+	struct prison *pr;
+	u_int32_t shares = 0;
 
 	for (;;) {
+		if (sched_limitjailcpu) {
+			/* 
+			 * Update total jail CPU shares in case they've changed.
+			 * Safe to read pr_sched_shares without mutex because
+			 * in worst case, we get a bogus value which will be 
+			 * corrected on the next pass.
+			 *
+			 * TODO: this should be done by forcing a recalculation
+			 * when jail CPU shares are added / changed, rather than
+			 * doing it every secondc.
+			 */
+			
+			shares = sched_unjailedProcessShares;
+			LIST_FOREACH(pr, &allprison, pr_list) {
+				shares += pr->pr_sched_shares;
+			}
+			total_cpu_sched_shares = shares;
+		}
+		
 		schedcpu();
 		tsleep(&nowake, 0, "-", hz);
 	}
@@ -579,12 +638,43 @@
 resetpriority(struct ksegrp *kg)
 {
 	register unsigned int newpriority;
+	struct prison *pr = NULL;
+	if (NULL != kg->kg_proc->p_ucred)
+		pr = kg->kg_proc->p_ucred->cr_prison;
 
 	if (kg->kg_pri_class == PRI_TIMESHARE) {
 		newpriority = PUSER + kg->kg_estcpu / INVERSE_ESTCPU_WEIGHT +
-		    NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN);
-		newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
-		    PRI_MAX_TIMESHARE);
+			NICE_WEIGHT * (kg->kg_proc->p_nice - PRIO_MIN);
+		if (sched_limitjailcpu && NULL != pr) {
+			/* 
+			 * Skew the priority by the jail's share of CPU resources.
+			 * The unjailed processes get half the CPU time.
+			 *
+			 * TODO: this is a hard limit.  We should really also have
+			 * soft limits available.  Also, the amount of CPU time 
+			 * reserved to unjailed processes really should be sysctl'd.
+			 */ 
+			register unsigned int np = newpriority;
+			register unsigned int skew;
+			skew = pr->pr_estcpu * total_cpu_sched_shares;
+			skew /= max(total_est_cpu, 1) * max(pr->pr_sched_shares, 1);
+			if (skew > 0) {
+				/* wait your turn until your cpu usage's proportionate */
+				newpriority = PRI_MAX_IDLE;
+			} else {
+				newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
+						  PRI_MAX_TIMESHARE);
+			}
+			printf("skew KSE %p (%d / %d cpu, %d / %d shares) from %d to %d\n",
+			       &kg, pr->pr_estcpu, total_est_cpu,
+			       pr->pr_sched_shares, 
+			       total_cpu_sched_shares,
+			       np, newpriority);
+		} else {
+			newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
+					  PRI_MAX_TIMESHARE);
+		}
+		
 		kg->kg_user_pri = newpriority;
 	}
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200608210730.k7L7UpOL037532>