Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 13 May 2007 08:27:52 GMT
From:      Chris Jones <cdjones@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 119762 for review
Message-ID:  <200705130827.l4D8RqZG088088@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=119762

Change 119762 by cdjones@cdjones_iconoclast on 2007/05/13 08:27:07

	Well, it compiles and links --- and runs (without jails) okay.  Now, to test....

Affected files ...

.. //depot/projects/soc2006/cdjones_jail_current/src/sys/compat/freebsd32/syscalls.master#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/kern_jail.c#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/kern_mib.c#2 integrate
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/sched_4bsd.c#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/syscalls.master#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/sys/jail.h#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/vm/vm_pageout.c#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/sys/vm/vm_pageout.h#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jail/jail.8#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jail/jail.c#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jls/jls.8#2 edit
.. //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jtune/Makefile#1 add
.. //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jtune/jtune.8#1 add
.. //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jtune/jtune.c#1 add

Differences ...

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/compat/freebsd32/syscalls.master#2 (text+ko) ====

@@ -781,3 +781,5 @@
 474	AUE_NULL	NOPROTO	{ int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr * from, __socklen_t *fromlenaddr, \
 				    struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
+475     AUE_NULL        STD     { int jail_set_resource_limits(unsigned int jid, \
+                                    int cpushares, int memlimit); }

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/kern_jail.c#2 (text+ko) ====

@@ -5,6 +5,35 @@
  * can do whatever you want with this stuff. If we meet some day, and you think
  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
  * ----------------------------------------------------------------------------
+ *
+ *  Portions copyright (c) 2006 Chris Jones
+ *  All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Chris Jones
+ * thanks to the support of Google's Summer of Code program and
+ * mentoring by Kip Macy.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. 
+ *
  */
 
 #include <sys/cdefs.h>
@@ -15,6 +44,7 @@
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
+#include <sys/kthread.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/sysproto.h>
@@ -33,6 +63,10 @@
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
 #include <net/if.h>
 #include <netinet/in.h>
 
@@ -78,6 +112,17 @@
     &jail_mount_allowed, 0,
     "Processes in jail can mount/unmount jail-friendly file systems");
 
+int     jail_limit_memory = 0;
+SYSCTL_INT(_security_jail, OID_AUTO, limit_jail_memory, CTLFLAG_RW,
+	   &jail_limit_memory, 0,
+	   "Limit jails' memory usage");
+
+int     jail_memory_pager_interval = 5;
+SYSCTL_INT(_security_jail, OID_AUTO, jail_pager_interval,
+	   CTLTYPE_INT | CTLFLAG_RW,
+	   &jail_memory_pager_interval, 0,
+	   "Interval between jail memory limit checks");
+
 /* allprison, lastprid, and prisoncount are protected by allprison_lock. */
 struct	prisonlist allprison;
 struct	sx allprison_lock;
@@ -114,6 +159,99 @@
 
 SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL);
 
+static void
+jpager_td(void *arg)
+{
+	struct proc *p;
+	struct prison *pr = arg;
+	struct thread *td;
+	long limit, cursize, newsize, usage;
+	int breakout;
+	int flags = J_PAGER_TD_ACTIVE;
+	pr->pr_pager_flags_ptr = &flags;
+	
+	for (;;) {
+		if (flags & J_PAGER_TD_DIE)
+			break;
+	       
+		if (jail_limit_memory && pr->pr_mem_limit) {
+			/* 
+			 * TODO: consider whether it might be better to start
+			 * pushing back when we approach the limit, rather than
+			 * when we hit it.
+			 * 
+			 */
+			limit = prison_memory_limit(pr);
+			usage = prison_memory(pr);
+			
+			/*
+			 * The logic from vm_daemon() really needs to go here.
+			 * Problem: we want to push things below their rlimits,
+			 * and vm_daemon doesn't do that.  It'd be better to 
+			 * refactor vm_daemon to fit, but this'll do for now.
+			 *
+			 */
+			
+			if ((usage - limit) > 0) {
+				sx_slock(&allproc_lock);
+				LIST_FOREACH(p, &allproc, p_list) {
+					
+					if (pr != p->p_ucred->cr_prison)
+						continue;
+					
+					PROC_LOCK(p);
+					if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
+						PROC_UNLOCK(p);
+						continue;
+					}
+					
+					mtx_lock_spin(&sched_lock);
+					breakout = 0;
+					FOREACH_THREAD_IN_PROC(p, td) {
+						if (!TD_ON_RUNQ(td) &&
+						    !TD_IS_RUNNING(td) &&
+						    !TD_IS_SLEEPING(td)) {
+							breakout = 1;
+							break;
+						}
+					}
+					mtx_unlock_spin(&sched_lock);
+					if (breakout) {
+						PROC_UNLOCK(p);
+						continue;
+					}
+					
+					/* NOTE: we differ here from vm_daemon b/c we don't 
+					 * care about the rlimit; things that are exceeding that will
+					 * get caught in due course.  We need, however, to decrease
+					 * the pressure on our permitted memory allocation.  Fortunately, 
+					 * we only care about eventually hitting the limit, so if we
+					 * don't get there right away, it's okay.
+					 */      
+					
+					/* TODO: this arbitrarily reduces each process's space by
+					 * 6.25% (until it's completely swapped out) while
+					 * we're under memory pressure.  A better way would be 
+					 * to either hit large processes first, or to hit the
+					 * least-active processes first, or go proportionally,
+					 * or .... 
+					 */
+					newsize = cursize = vmspace_resident_count(p->p_vmspace);
+					newsize -= newsize / 16;
+					if (cursize < 0)
+						newsize = 0;
+					PROC_UNLOCK(p);
+					vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, newsize);
+				} /* end LIST_FOREACH procs */
+				sx_sunlock(&allproc_lock);
+			}
+		}
+		tsleep(pr, 0, "-", jail_memory_pager_interval * hz); 
+	}
+	
+	kthread_exit(0);
+}
+
 /*
  * struct jail_args {
  *	struct jail *jail;
@@ -126,6 +264,7 @@
 	struct prison *pr, *tpr;
 	struct prison_service *psrv;
 	struct jail j;
+	struct proc *j_pager_proc = NULL;
 	struct jail_attach_args jaa;
 	int vfslocked, error, tryprid;
 
@@ -156,6 +295,8 @@
 		goto e_dropvnref;
 	pr->pr_ip = j.ip_number;
 	pr->pr_linux = NULL;
+	pr->pr_mem_limit = j.mem_limit;
+	pr->pr_sched_shares = j.sched_shares;
 	pr->pr_securelevel = securelevel;
 	if (prison_service_slots == 0)
 		pr->pr_slots = NULL;
@@ -190,6 +331,11 @@
 	}
 	sx_sunlock(&allprison_lock);
 
+	if (kthread_create(jpager_td, pr, (void *) j_pager_proc, 0, 0, "jpager %d", pr->pr_id))
+		goto e_dropprref;
+	KASSERT(j_pager_proc != NULL, ("NULL j_pager_proc"));
+	pr->pr_pager = j_pager_proc;
+
 	error = jail_attach(td, &jaa);
 	if (error)
 		goto e_dropprref;
@@ -199,6 +345,10 @@
 	td->td_retval[0] = jaa.jid;
 	return (0);
 e_dropprref:
+	if (j_pager_proc != NULL) {
+		*pr->pr_pager_flags_ptr = J_PAGER_TD_DIE;
+		wakeup(pr);
+	}
 	sx_xlock(&allprison_lock);
 	LIST_REMOVE(pr, pr_list);
 	prisoncount--;
@@ -314,6 +464,9 @@
 	pr->pr_ref--;
 	if (pr->pr_ref == 0) {
 		mtx_unlock(&pr->pr_mtx);
+		/* Kill pager; no need to wait. */
+		*pr->pr_pager_flags_ptr = J_PAGER_TD_DIE;
+		wakeup(pr);
 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 		return;
@@ -435,6 +588,40 @@
 		ok = 0;
 	return (ok);
 }
+/* Given credential, return memory usage in bytes. */
+long
+prison_memory(struct prison *pr)
+{
+	struct proc *p;
+	long mem_used = 0;
+	
+	/* 
+	 * TODO: this is a really bad way of doing the
+	 * search, as we end up going across all processes
+	 * for each jail.  It'd be more efficient to just do 
+	 * this once in a period and update the relevant jail.
+	 *
+	 */
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (!jailed(p->p_ucred) ||
+		    (pr != p->p_ucred->cr_prison))
+			continue;
+		mem_used += vmspace_resident_count(p->p_vmspace);
+	}
+       	mem_used *= PAGE_SIZE;
+	return mem_used;
+}
+
+/* Given credential, return permitted memory usage in bytes. */
+long
+prison_memory_limit(struct prison *pr)
+{
+	vm_pindex_t memlimit;
+	mtx_lock(&pr->pr_mtx);
+	memlimit = (vm_pindex_t) pr->pr_mem_limit;
+	mtx_unlock(&pr->pr_mtx);
+	return memlimit;
+}
 
 /*
  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
@@ -566,6 +753,52 @@
 	}
 }
 
+/* 
+ * Change resource limit for a prison.
+ * 
+ * unsigned int jid: id of jail to mess with
+ *
+ * int cpushares:  0 -> remove prison from cpu limits
+ *                -1 -> don't change existing shares
+ *                >0 -> set cpu shares
+ *
+ * int memlimit:   0 -> remove prison from mem limits
+ *                -1 -> don't change existing limit
+ *                >1 -> set memory limit (bytes)
+ *
+ * TODO: might this be better handled via a writable 
+ * sysctl than with a new syscall?
+ */
+int
+jail_set_resource_limits(struct thread *td, struct jail_set_resource_limits_args *uap)
+{
+	struct prison *pr;
+	int error;
+
+	error = suser(td);
+	if (error)
+		return (error);
+
+	sx_slock(&allprison_lock);
+	LIST_FOREACH(pr, &allprison, pr_list) {
+		if (pr->pr_id == uap->jid)
+			break;
+	}
+	if (NULL == pr) {
+		sx_sunlock(&allprison_lock);
+		return 1;
+	}
+
+	mtx_lock(&pr->pr_mtx);
+	if (-1 != uap->cpushares)
+		pr->pr_sched_shares = uap->cpushares;
+	if (-1 != uap->memlimit)
+		pr->pr_mem_limit = uap->memlimit;
+	mtx_unlock(&pr->pr_mtx);
+	sx_sunlock(&allprison_lock);
+	return 0;
+}
+
 /*
  * Check with permission for a specific privilege is granted within jail.  We
  * have a specific list of accepted privileges; the rest are denied.
@@ -954,6 +1187,10 @@
 		xp->pr_version = XPRISON_VERSION;
 		xp->pr_id = pr->pr_id;
 		xp->pr_ip = pr->pr_ip;
+		xp->pr_sched_shares = pr->pr_sched_shares;
+		xp->pr_estcpu = pr->pr_estcpu;
+		xp->pr_mem_limit = pr->pr_mem_limit;
+		xp->pr_mem_usage = pr->pr_mem_usage;
 		strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path));
 		mtx_lock(&pr->pr_mtx);
 		strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host));

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/kern_mib.c#2 (text+ko) ====

@@ -36,12 +36,13 @@
  */
 
 #include <sys/cdefs.h>
-__FBSDID("$FreeBSD: src/sys/kern/kern_mib.c,v 1.77 2007/04/09 19:18:09 pjd Exp $");
+__FBSDID("$FreeBSD: src/sys/kern/kern_mib.c,v 1.78 2007/05/12 19:38:18 wkoszek Exp $");
 
 #include "opt_posix.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/sbuf.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
@@ -295,6 +296,38 @@
     CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_kern_securelvl,
     "I", "Current secure level");
 
+/* Actual kernel configuration options. */
+extern char kernconfstring[];
+
+static int
+sysctl_kern_config(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf *sb;
+	int error;
+	char *p;
+
+	sb = sbuf_new(NULL, NULL, 2048, SBUF_AUTOEXTEND);
+	if (sb == NULL)
+		return (ENOMEM);
+	sbuf_clear(sb);
+	p = kernconfstring;
+	if (p == NULL || *p == '\0') {
+		sbuf_printf(sb, "No kernel configuration\n");
+	} else {
+		sbuf_printf(sb, "%s", p);
+	}
+	sbuf_trim(sb);
+	sbuf_putc(sb, '\n');
+	sbuf_finish(sb);
+	error = sysctl_handle_string(oidp, sbuf_data(sb), sbuf_len(sb), req);
+	if (error)
+		return (error);
+	sbuf_delete(sb);
+	return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW, 
+    0, 0, sysctl_kern_config, "", "Kernel configuration file");
+
 char domainname[MAXHOSTNAMELEN];
 SYSCTL_STRING(_kern, KERN_NISDOMAINNAME, domainname, CTLFLAG_RW,
     &domainname, sizeof(domainname), "Name of the current YP/NIS domain");

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/sched_4bsd.c#2 (text+ko) ====

@@ -39,6 +39,7 @@
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -122,6 +123,10 @@
 static int	forward_wakeup(int  cpunum);
 #endif
 
+static uint32_t total_cpu_sched_shares;
+static u_int total_est_cpu;
+extern int prisoncount;
+
 static struct kproc_desc sched_kp = {
         "schedcpu",
         schedcpu_thread,
@@ -227,6 +232,18 @@
 	   "allow threads to share a quantum");
 #endif
 
+static int sched_limitjailcpu = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, limit_jail_cpu, 
+	   CTLFLAG_RW, 
+	   &sched_limitjailcpu, 0,
+	   "limit jailed process cpu usage");
+
+static int sched_unjailedProcessShares = 0;
+SYSCTL_INT(_kern_sched, OID_AUTO, system_cpu_shares, 
+	   CTLTYPE_INT | CTLFLAG_RW,
+	   &sched_unjailedProcessShares, 0,
+	   "number of shares to allocate to unjailed processes");
+
 static __inline void
 sched_load_add(void)
 {
@@ -372,10 +389,23 @@
 	struct thread *td;
 	struct proc *p;
 	struct td_sched *ts;
+	struct prison *pr;
 	int awake, realstathz;
 
 	realstathz = stathz ? stathz : hz;
+	/* 
+	 * Need to acquire each jail's mutex and hold throughout to keep 
+	 * everything out while we recalculate per-jail CPU usage.
+	 * TODO: this is excessively icky.
+	 */
 	sx_slock(&allproc_lock);
+	sx_slock(&allprison_lock);
+	if (prisoncount) {
+		LIST_FOREACH(pr, &allprison, pr_list) {
+			pr->pr_estcpu = 0;
+		}
+	}
+	total_est_cpu = 0;
 	FOREACH_PROC_IN_SYSTEM(p) {
 		/*
 		 * Prevent state changes and protect run queue.
@@ -459,11 +489,18 @@
 			if (td->td_slptime > 1)
 				continue;
 			td->td_estcpu = decay_cpu(loadfac, td->td_estcpu);
+			total_est_cpu += td->td_estcpu;
+			if (sched_limitjailcpu &&
+				NULL != td->td_proc->p_ucred &&
+				NULL != td->td_proc->p_ucred->cr_prison)
+				td->td_proc->p_ucred->cr_prison->pr_estcpu +=
+					td->td_estcpu;
 		      	resetpriority(td);
 			resetpriority_thread(td);
 		} /* end of thread loop */
 		mtx_unlock_spin(&sched_lock);
 	} /* end of process loop */
+	sx_sunlock(&allprison_lock);
 	sx_sunlock(&allproc_lock);
 }
 
@@ -473,8 +510,29 @@
 static void
 schedcpu_thread(void)
 {
+struct prison *pr;
+u_int32_t shares = 0;
 
 	for (;;) {
+		if (sched_limitjailcpu) {
+			/* 
+			 * Update total jail CPU shares in case they've changed.
+			 * Safe to read pr_sched_shares without mutex because
+			 * in worst case, we get a bogus value which will be 
+			 * corrected on the next pass.
+			 *
+			 * TODO: this should be done by forcing a recalculation
+			 * when jail CPU shares are added / changed, rather than
+			 * doing it every second.
+			 */
+			
+			shares = sched_unjailedProcessShares;
+			LIST_FOREACH(pr, &allprison, pr_list) {
+				shares += pr->pr_sched_shares;
+			}
+			total_cpu_sched_shares = shares;
+		}
+
 		schedcpu();
 		pause("-", hz);
 	}
@@ -512,12 +570,37 @@
 resetpriority(struct thread *td)
 {
 	register unsigned int newpriority;
+	struct prison *pr = NULL;
+	if (NULL != td->td_proc->p_ucred)
+		pr = td->td_proc->p_ucred->cr_prison;
 
 	if (td->td_pri_class == PRI_TIMESHARE) {
 		newpriority = PUSER + td->td_estcpu / INVERSE_ESTCPU_WEIGHT +
 		    NICE_WEIGHT * (td->td_proc->p_nice - PRIO_MIN);
-		newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
-		    PRI_MAX_TIMESHARE);
+		if (sched_limitjailcpu && NULL != pr) {
+			/* 
+			 * Skew the priority by the jail's share of CPU resources.
+			 * The unjailed processes get half the CPU time.
+			 *
+			 * TODO: this is a hard limit.  We should really also have
+			 * soft limits available.  Also, the amount of CPU time 
+			 * reserved to unjailed processes really should be sysctl'd.
+			 */ 
+			register unsigned int skew;
+			skew = pr->pr_estcpu * total_cpu_sched_shares;
+			skew /= max(total_est_cpu, 1) * max(pr->pr_sched_shares, 1);
+			if (skew > 0) {
+				/* wait your turn until your cpu usage's proportionate */
+				newpriority = PRI_MAX_IDLE - 1;
+			} else {
+				newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
+						  PRI_MAX_TIMESHARE);
+			}
+		} else {
+			newpriority = min(max(newpriority, PRI_MIN_TIMESHARE),
+					  PRI_MAX_TIMESHARE);
+		}
+		
 		sched_user_prio(td, newpriority);
 	}
 }

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/kern/syscalls.master#2 (text+ko) ====

@@ -835,5 +835,8 @@
 474     AUE_NULL        STD    { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr * from, __socklen_t *fromlenaddr, \
 				    struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
+475	AUE_NULL	STD	{ int jail_set_resource_limits(unsigned int jid, \
+				    int cpushares, int memlimit); }
+
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/sys/jail.h#2 (text+ko) ====

@@ -18,6 +18,8 @@
 	char		*path;
 	char		*hostname;
 	u_int32_t	ip_number;
+	unsigned int    sched_shares;
+	unsigned int    mem_limit;
 };
 
 struct xprison {
@@ -26,13 +28,24 @@
 	char		 pr_path[MAXPATHLEN];
 	char 		 pr_host[MAXHOSTNAMELEN];
 	u_int32_t	 pr_ip;
+        unsigned int     pr_sched_shares;
+        unsigned int     pr_estcpu;
+        unsigned int     pr_mem_limit;
+        unsigned int     pr_mem_usage;
 };
-#define	XPRISON_VERSION	1
+#define	XPRISON_VERSION	2
+
+#define JAIL_MINIMUM_SHARES 1
+
+#define J_PAGER_TD_ACTIVE	0x01
+#define J_PAGER_TD_DIE		0x02
+#define J_PAGER_TD_DEAD		0x04
 
 #ifndef _KERNEL
 
 int jail(struct jail *);
 int jail_attach(int);
+int jail_set_resource_limits(unsigned int, int, int);
 
 #else /* _KERNEL */
 
@@ -74,6 +87,12 @@
 	struct task	 pr_task;			/* (d) destroy task */
 	struct mtx	 pr_mtx;
 	void		**pr_slots;			/* (p) additional data */
+	u_int32_t        pr_sched_shares;		/* (p) jail priority */
+	u_int		 pr_estcpu;			/* (p) est. cpu of jail */
+	struct proc     *pr_pager;                      /* (c) pager pid */
+	int             *pr_pager_flags_ptr;            /* (p) communication to pager */
+	size_t           pr_mem_limit;                  /* (p) memory allocation limit */
+	size_t           pr_mem_usage;                  /* (p) memory in use */
 };
 #endif /* _KERNEL || _WANT_PRISON */
 
@@ -114,6 +133,8 @@
 int prison_if(struct ucred *cred, struct sockaddr *sa);
 int prison_ip(struct ucred *cred, int flag, u_int32_t *ip);
 int prison_priv_check(struct ucred *cred, int priv);
+long prison_memory(struct prison *pr);
+long prison_memory_limit(struct prison *pr);
 void prison_remote_ip(struct ucred *cred, int flags, u_int32_t *ip);
 
 /*

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/vm/vm_pageout.c#2 (text+ko) ====

@@ -204,7 +204,6 @@
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 
 #if !defined(NO_SWAPPING)
-static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(void);
 #endif
@@ -596,7 +595,7 @@
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
-static void
+void
 vm_pageout_map_deactivate_pages(map, desired)
 	vm_map_t map;
 	long desired;

==== //depot/projects/soc2006/cdjones_jail_current/src/sys/vm/vm_pageout.h#2 (text+ko) ====

@@ -87,6 +87,8 @@
  *	Exported routines.
  */
 
+void vm_pageout_map_deactivate_pages(vm_map_t map, long desired);
+
 /*
  *	Signal pageout-daemon and wait for it.
  */

==== //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jail/jail.8#2 (text+ko) ====

@@ -45,6 +45,8 @@
 .Op Fl J Ar jid_file
 .Op Fl s Ar securelevel
 .Op Fl l u Ar username | Fl U Ar username
+.Op Fl S Ar cpu_shares
+.Op Fl M Ar mem_limit
 .Ar path hostname ip-number command ...
 .Sh DESCRIPTION
 The
@@ -88,6 +90,10 @@
 The user name from jailed environment as whom the
 .Ar command
 should run.
+.It Fl S Ar cpu_shares
+CPU shares to assign to the prison.
+.It Fl M Ar mem_limit
+Amount of memory (in MB) to allow the prison to use.
 .It Ar path
 Directory which is to be the root of the prison.
 .It Ar hostname
@@ -546,6 +552,17 @@
 privileged, and may manipulate system file flags subject to the usual
 constraints on
 .Va kern.securelevel .
+.It Va security.jail.limit_jail_memory, Va security.jail.jail_pager_interval
+These MIB entries determine whether and how often (in seconds) a
+jail's memory-limit monitoring daemon will run, and consequently the 
+period during which a jail can be overcommitted for resident memory.
+.It Va kern.sched.limit_jail_cpu
+This MIB entry sets whether CPU usage limits will be enforced 
+against processes in jails with CPU limits.
+.It Va kern.sched.system_cpu_shares
+Number of CPU usage shares to allocate to unjailed processes for the 
+purposes of determining CPU usage permitted for jailed processes.  
+Unjailed processes are not subject to CPU usage limits.
 .It Va security.jail.mount_allowed
 This MIB entry determines if a privileged user inside a jail will be
 able to mount and unmount file system types marked as jail-friendly.

==== //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jail/jail.c#2 (text+ko) ====

@@ -56,6 +56,8 @@
 	struct in_addr in;
 	gid_t groups[NGROUPS];
 	int ch, i, iflag, Jflag, lflag, ngroups, securelevel, uflag, Uflag;
+	unsigned int mem_limit = 0;
+	unsigned int sched_shares = 0;
 	char path[PATH_MAX], *ep, *username, *JidFile;
 	static char *cleanenv;
 	const char *shell, *p = NULL;
@@ -67,7 +69,7 @@
 	username = JidFile = cleanenv = NULL;
 	fp = NULL;
 
-	while ((ch = getopt(argc, argv, "ils:u:U:J:")) != -1) {
+	while ((ch = getopt(argc, argv, "ilS:M:s:u:U:J:")) != -1) {
 		switch (ch) {
 		case 'i':
 			iflag = 1;
@@ -76,6 +78,13 @@
 			JidFile = optarg;
 			Jflag = 1;
 			break;
+		case 'M':
+			mem_limit = atoi(optarg);
+			mem_limit *= 1024 * 1024;
+			break;
+		case 'S':
+			sched_shares = atoi(optarg);
+			break;
 		case 's':
 			ltmp = strtol(optarg, &ep, 0);
 			if (*ep || ep == optarg || ltmp > INT_MAX || !ltmp)
@@ -118,6 +127,8 @@
 	if (inet_aton(argv[2], &in) == 0)
 		errx(1, "Could not make sense of ip-number: %s", argv[2]);
 	j.ip_number = ntohl(in.s_addr);
+	j.mem_limit = mem_limit;
+	j.sched_shares = sched_shares;
 	if (Jflag) {
 		fp = fopen(JidFile, "w");
 		if (fp == NULL)
@@ -183,7 +194,8 @@
 {
 
 	(void)fprintf(stderr, "%s%s%s\n",
-	     "usage: jail [-i] [-J jid_file] [-s securelevel] [-l -u ",
+	     "usage: jail [-i] [-J jid_file] [-M mem_limit] ",
+	     "[-S cpu_shares] [-s securelevel] [-l -u ",
 	     "username | -U username]",
 	     " path hostname ip-number command ...");
 	exit(1);

==== //depot/projects/soc2006/cdjones_jail_current/src/usr.sbin/jls/jls.8#2 (text+ko) ====

@@ -42,7 +42,8 @@
 .Sh SEE ALSO
 .Xr jail 2 ,
 .Xr jail 8 ,
-.Xr jexec 8
+.Xr jexec 8 ,
+.Xr jtune 8
 .Sh HISTORY
 The
 .Nm



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200705130827.l4D8RqZG088088>