Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 27 Jul 2006 09:24:31 GMT
From:      Chris Jones <cdjones@FreeBSD.org>
To:        Perforce Change Reviews <perforce@FreeBSD.org>
Subject:   PERFORCE change 102540 for review
Message-ID:  <200607270924.k6R9OVeM055896@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=102540

Change 102540 by cdjones@cdjones-impulse on 2006/07/27 09:24:09

	Under memory pressure, remove a page from each process in the jail until the pressure goes away.  This doesn't prevent a jail from exceeding its memory limit, but rather tends to return it to the limit; there may merit in pushing it further below the limit.

Affected files ...

.. //depot/projects/soc2006/cdjones_jail/src/sys/kern/kern_jail.c#14 edit
.. //depot/projects/soc2006/cdjones_jail/src/sys/sys/jail.h#14 edit
.. //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.c#3 edit
.. //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.h#2 edit

Differences ...

==== //depot/projects/soc2006/cdjones_jail/src/sys/kern/kern_jail.c#14 (text+ko) ====

@@ -27,6 +27,7 @@
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
 #include <sys/taskqueue.h>
 #include <sys/jail.h>
 #include <sys/lock.h>
@@ -119,22 +120,88 @@
 static void
 jpager_td(void *arg)
 {
+  struct proc *p;
   struct prison *pr;
+  struct thread *td;
+  vm_pindex_t limit, size, usage;
+  int breakout;
+
   pr = arg;
 
   for (;;) {
     if (pr->pr_pager_flags & J_PAGER_TD_DIE)
       break;
 
-    if (prison_memory(pr) > prison_memory_limit(pr)) {
-      /* the logic from vm_daemon() really needs to go here.
-	 TODO: refactor vm_daemon to optionally act on specific jails. */
+    /* TODO: consider whether it might be better to start
+     * pushing back when we approach the limit, rather than
+     * when we hit it.
+     */
+    limit = prison_memory_limit(pr);
+    usage = prison_memory(pr);
+
+    if ((usage - limit) > 0)
+      continue;
+
+    /* The logic from vm_daemon() really needs to go here.
+     * Problem: we want to push things below their rlimits.
+     *
+     * TODO: refactor vm_daemon to optionally act on specific jails?
+     */
+    
+    sx_slock(&allproc_lock);
+    LIST_FOREACH(p, &allproc, p_list) {
+      
+      if (pr != p->p_ucred->cr_prison)
+	continue;
+      
+      PROC_LOCK(p);
+      if (p->p_flag & (P_SYSTEM | P_WEXIT)) {
+	PROC_UNLOCK(p);
+	continue;
+      }
+      
+      mtx_lock_spin(&sched_lock);
+      breakout = 0;
+      FOREACH_THREAD_IN_PROC(p, td) {
+	if (!TD_ON_RUNQ(td) &&
+	    !TD_IS_RUNNING(td) &&
+	    !TD_IS_SLEEPING(td)) {
+	  breakout = 1;
+	  break;
+	}
+      }
+      mtx_unlock_spin(&sched_lock);
+      if (breakout) {
+	PROC_UNLOCK(p);
+	continue;
+      }
+
+      /* NOTE: we differ here from vm_daemon b/c we don't 
+       * care about the rlimit; things that are exceeding that will
+       * get caught in due course.  We need, however, to decrease
+       * the pressure on our permitted memory allocation.  Fortunately, 
+       * we only care about eventually hitting the limit, so if we
+       * don't get there right away, it's okay.
+       */      
+
+      /* TODO: this arbitrarily reduces each process's space by
+       * one page (until it's completely swapped out) while
+       * we're under memory pressure.  A better way would be 
+       * to either hit large processes first, or to hit the
+       * least-active processes first, or .... 
+       */
+      size = vmspace_resident_count(p->p_vmspace) - 1;
+      if (size < 0)
+	size = 0;
+      vm_pageout_map_deactivate_pages(&p->p_vmspace->vm_map, size);
+      
+      sx_sunlock(&allproc_lock);
     }
-
+    
     /* TODO --- make interval into a sysctl? */
     tsleep(pr, 0, "-", hz); 
   }
-
+  
   pr->pr_pager_flags = J_PAGER_TD_DEAD;
   kthread_exit(0);
 }
@@ -462,7 +529,7 @@
 }
 
 /* Given credential, return memory usage in bytes. */
-int
+vm_pindex_t
 prison_memory(struct prison *pr)
 {
   struct proc *p;
@@ -491,7 +558,7 @@
 }
 
 /* Given credential, return permitted memory usage in bytes. */
-int 
+vm_pindex_t
 prison_memory_limit(struct prison *pr)
 {
   return pr->pr_mem_limit;

==== //depot/projects/soc2006/cdjones_jail/src/sys/sys/jail.h#14 (text+ko) ====

@@ -143,8 +143,8 @@
 void prison_hold(struct prison *pr);
 int prison_if(struct ucred *cred, struct sockaddr *sa);
 int prison_ip(struct ucred *cred, int flag, u_int32_t *ip);
-int prison_memory(struct prison *pr);
-int prison_memory_limit(struct prison *pr);
+vm_pindex_t prison_memory(struct prison *pr);
+vm_pindex_t prison_memory_limit(struct prison *pr);
 void prison_remote_ip(struct ucred *cred, int flags, u_int32_t *ip);
 
 #endif /* _KERNEL */

==== //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.c#3 (text+ko) ====

@@ -205,7 +205,7 @@
 int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 
 #if !defined(NO_SWAPPING)
-static void vm_pageout_map_deactivate_pages(vm_map_t, long);
+/* static void vm_pageout_map_deactivate_pages(vm_map_t, long); */
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(void);
 #endif
@@ -592,7 +592,7 @@
  * deactivate some number of pages in a map, try to do it fairly, but
  * that is really hard to do.
  */
-static void
+void
 vm_pageout_map_deactivate_pages(map, desired)
 	vm_map_t map;
 	long desired;

==== //depot/projects/soc2006/cdjones_jail/src/sys/vm/vm_pageout.h#2 (text+ko) ====

@@ -87,6 +87,8 @@
  *	Exported routines.
  */
 
+void vm_pageout_map_deactivate_pages(vm_map_t map, long desired);
+
 /*
  *	Signal pageout-daemon and wait for it.
  */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200607270924.k6R9OVeM055896>