Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 16 Aug 2019 09:43:49 +0000 (UTC)
From:      Konstantin Belousov <kib@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r351114 - head/sys/vm
Message-ID:  <201908160943.x7G9hnH3027751@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kib
Date: Fri Aug 16 09:43:49 2019
New Revision: 351114
URL: https://svnweb.freebsd.org/changeset/base/351114

Log:
  Fix OOM handling of some corner cases.
  
  In addition to pagedaemon initiating OOM, also do it from the
  vm_fault() internals.  Namely, if the thread waits for a free page to
  satisfy page fault some preconfigured amount of time, trigger OOM.
  These triggers are rate-limited, due to a usual case of several
  threads of the same multi-threaded process to enter fault handler
  simultaneously.  The faults from pagedaemon threads participate in the
  calculation of OOM rate, but are not under the limit.
  
  Reviewed by:	markj (previous version)
  Tested by:	pho
  Discussed with:	alc
  Sponsored by:	The FreeBSD Foundation
  MFC after:	2 weeks
  Differential revision:	https://reviews.freebsd.org/D13671

Modified:
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_page.c
  head/sys/vm/vm_pageout.c
  head/sys/vm/vm_pageout.h

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Fri Aug 16 05:06:54 2019	(r351113)
+++ head/sys/vm/vm_fault.c	Fri Aug 16 09:43:49 2019	(r351114)
@@ -135,6 +135,18 @@ static void vm_fault_dontneed(const struct faultstate 
 static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra,
 	    int backward, int forward, bool obj_locked);
 
+static int vm_pfault_oom_attempts = 3;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_attempts, CTLFLAG_RWTUN,
+    &vm_pfault_oom_attempts, 0,
+    "Number of page allocation attempts in page fault handler before it "
+    "triggers OOM handling");
+
+static int vm_pfault_oom_wait = 10;
+SYSCTL_INT(_vm, OID_AUTO, pfault_oom_wait, CTLFLAG_RWTUN,
+    &vm_pfault_oom_wait, 0,
+    "Number of seconds to wait for free pages before retrying "
+    "the page fault handler");
+
 static inline void
 release_page(struct faultstate *fs)
 {
@@ -570,7 +582,7 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot
 	vm_pindex_t retry_pindex;
 	vm_prot_t prot, retry_prot;
 	int ahead, alloc_req, behind, cluster_offset, error, era, faultcount;
-	int locked, nera, result, rv;
+	int locked, nera, oom, result, rv;
 	u_char behavior;
 	boolean_t wired;	/* Passed by reference. */
 	bool dead, hardfault, is_first_object_locked;
@@ -581,7 +593,9 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot
 	nera = -1;
 	hardfault = false;
 
-RetryFault:;
+RetryFault:
+	oom = 0;
+RetryFault_oom:
 
 	/*
 	 * Find the backing store object and offset into it to begin the
@@ -827,7 +841,18 @@ RetryFault:;
 			}
 			if (fs.m == NULL) {
 				unlock_and_deallocate(&fs);
-				vm_waitpfault(dset);
+				if (vm_pfault_oom_attempts < 0 ||
+				    oom < vm_pfault_oom_attempts) {
+					oom++;
+					vm_waitpfault(dset,
+					    vm_pfault_oom_wait * hz);
+					goto RetryFault_oom;
+				}
+				if (bootverbose)
+					printf(
+	"proc %d (%s) failed to alloc page on fault, starting OOM\n",
+					    curproc->p_pid, curproc->p_comm);
+				vm_pageout_oom(VM_OOM_MEM_PF);
 				goto RetryFault;
 			}
 		}

Modified: head/sys/vm/vm_page.c
==============================================================================
--- head/sys/vm/vm_page.c	Fri Aug 16 05:06:54 2019	(r351113)
+++ head/sys/vm/vm_page.c	Fri Aug 16 09:43:49 2019	(r351114)
@@ -3032,7 +3032,7 @@ vm_domain_alloc_fail(struct vm_domain *vmd, vm_object_
  *	  this balance without careful testing first.
  */
 void
-vm_waitpfault(struct domainset *dset)
+vm_waitpfault(struct domainset *dset, int timo)
 {
 
 	/*
@@ -3044,7 +3044,7 @@ vm_waitpfault(struct domainset *dset)
 	if (vm_page_count_min_set(&dset->ds_mask)) {
 		vm_min_waiters++;
 		msleep(&vm_min_domains, &vm_domainset_lock, PUSER | PDROP,
-		    "pfault", 0);
+		    "pfault", timo);
 	} else
 		mtx_unlock(&vm_domainset_lock);
 }

Modified: head/sys/vm/vm_pageout.c
==============================================================================
--- head/sys/vm/vm_pageout.c	Fri Aug 16 05:06:54 2019	(r351113)
+++ head/sys/vm/vm_pageout.c	Fri Aug 16 09:43:49 2019	(r351114)
@@ -1720,6 +1720,12 @@ vm_pageout_oom_pagecount(struct vmspace *vmspace)
 	return (res);
 }
 
+static int vm_oom_ratelim_last;
+static int vm_oom_pf_secs = 10;
+SYSCTL_INT(_vm, OID_AUTO, oom_pf_secs, CTLFLAG_RWTUN, &vm_oom_pf_secs, 0,
+    "");
+static struct mtx vm_oom_ratelim_mtx;
+
 void
 vm_pageout_oom(int shortage)
 {
@@ -1727,9 +1733,31 @@ vm_pageout_oom(int shortage)
 	vm_offset_t size, bigsize;
 	struct thread *td;
 	struct vmspace *vm;
+	int now;
 	bool breakout;
 
 	/*
+	 * For OOM requests originating from vm_fault(), there is a high
+	 * chance that a single large process faults simultaneously in
+	 * several threads.  Also, on an active system running many
+	 * processes of middle-size, like buildworld, all of them
+	 * could fault almost simultaneously as well.
+	 *
+	 * To avoid killing too many processes, rate-limit OOMs
+	 * initiated by vm_fault() time-outs on the waits for free
+	 * pages.
+	 */
+	mtx_lock(&vm_oom_ratelim_mtx);
+	now = ticks;
+	if (shortage == VM_OOM_MEM_PF &&
+	    (u_int)(now - vm_oom_ratelim_last) < hz * vm_oom_pf_secs) {
+		mtx_unlock(&vm_oom_ratelim_mtx);
+		return;
+	}
+	vm_oom_ratelim_last = now;
+	mtx_unlock(&vm_oom_ratelim_mtx);
+
+	/*
 	 * We keep the process bigproc locked once we find it to keep anyone
 	 * from messing with it; however, there is a possibility of
 	 * deadlock if process B is bigproc and one of its child processes
@@ -1793,7 +1821,7 @@ vm_pageout_oom(int shortage)
 			continue;
 		}
 		size = vmspace_swap_count(vm);
-		if (shortage == VM_OOM_MEM)
+		if (shortage == VM_OOM_MEM || shortage == VM_OOM_MEM_PF)
 			size += vm_pageout_oom_pagecount(vm);
 		vm_map_unlock_read(&vm->vm_map);
 		vmspace_free(vm);
@@ -2048,6 +2076,7 @@ vm_pageout(void)
 	p = curproc;
 	td = curthread;
 
+	mtx_init(&vm_oom_ratelim_mtx, "vmoomr", NULL, MTX_DEF);
 	swap_pager_swap_init();
 	for (first = -1, i = 0; i < vm_ndomains; i++) {
 		if (VM_DOMAIN_EMPTY(i)) {

Modified: head/sys/vm/vm_pageout.h
==============================================================================
--- head/sys/vm/vm_pageout.h	Fri Aug 16 05:06:54 2019	(r351113)
+++ head/sys/vm/vm_pageout.h	Fri Aug 16 09:43:49 2019	(r351114)
@@ -79,7 +79,8 @@ extern u_long vm_page_max_user_wired;
 extern int vm_pageout_page_count;
 
 #define	VM_OOM_MEM	1
-#define	VM_OOM_SWAPZ	2
+#define	VM_OOM_MEM_PF	2
+#define	VM_OOM_SWAPZ	3
 
 /*
  * vm_lowmem flags.
@@ -96,7 +97,7 @@ extern int vm_pageout_page_count;
  */
 
 void vm_wait(vm_object_t obj);
-void vm_waitpfault(struct domainset *);
+void vm_waitpfault(struct domainset *, int timo);
 void vm_wait_domain(int domain);
 void vm_wait_min(void);
 void vm_wait_severe(void);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201908160943.x7G9hnH3027751>