Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 20 Dec 2015 02:05:33 +0000 (UTC)
From:      "Jonathan T. Looney" <jtl@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r292484 - in head: share/man/man9 sys/kern sys/vm
Message-ID:  <201512200205.tBK25Y2H039542@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jtl
Date: Sun Dec 20 02:05:33 2015
New Revision: 292484
URL: https://svnweb.freebsd.org/changeset/base/292484

Log:
  Add a safety net to reclaim mbufs when one of the mbuf zones become
  exhausted.
  
  It is possible for a bug in the code (or, theoretically, even unusual
  network conditions) to exhaust all possible mbufs or mbuf clusters.
  When this occurs, things can grind to a halt fairly quickly. However,
  we currently do not call mb_reclaim() unless the entire system is
  experiencing a low-memory condition.
  
  While it is best to try to prevent exhaustion of one of the mbuf zones,
  it would also be useful to have a mechanism to attempt to recover from
  these situations by freeing "expendable" mbufs.
  
  This patch makes two changes:
  
  a) The patch adds a generic API to the UMA zone allocator to set a
  function that should be called when an allocation fails because the
  zone limit has been reached. Because of the way this function can be
  called, it really should do minimal work.
  
  b) The patch uses this API to try to free mbufs when an allocation
  fails from one of the mbuf zones because the zone limit has been
  reached. The function schedules a callout to run mb_reclaim().
  
  Differential Revision:	https://reviews.freebsd.org/D3864
  Reviewed by:	gnn
  Comments by:	rrs, glebius
  MFC after:	2 weeks
  Sponsored by:	Juniper Networks

Modified:
  head/share/man/man9/Makefile
  head/share/man/man9/zone.9
  head/sys/kern/kern_mbuf.c
  head/sys/vm/uma.h
  head/sys/vm/uma_core.c
  head/sys/vm/uma_int.h

Modified: head/share/man/man9/Makefile
==============================================================================
--- head/share/man/man9/Makefile	Sun Dec 20 00:58:22 2015	(r292483)
+++ head/share/man/man9/Makefile	Sun Dec 20 02:05:33 2015	(r292484)
@@ -1911,6 +1911,7 @@ MLINKS+=zone.9 uma.9 \
 	zone.9 uma_zone_get_cur.9 \
 	zone.9 uma_zone_get_max.9 \
 	zone.9 uma_zone_set_max.9 \
-	zone.9 uma_zone_set_warning.9
+	zone.9 uma_zone_set_warning.9 \
+	zone.9 uma_zone_set_maxaction.9
 
 .include <bsd.prog.mk>

Modified: head/share/man/man9/zone.9
==============================================================================
--- head/share/man/man9/zone.9	Sun Dec 20 00:58:22 2015	(r292483)
+++ head/share/man/man9/zone.9	Sun Dec 20 02:05:33 2015	(r292484)
@@ -25,7 +25,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd February 7, 2014
+.Dd December 20, 2015
 .Dt ZONE 9
 .Os
 .Sh NAME
@@ -39,7 +39,8 @@
 .Nm uma_zone_set_max,
 .Nm uma_zone_get_max,
 .Nm uma_zone_get_cur,
-.Nm uma_zone_set_warning
+.Nm uma_zone_set_warning,
+.Nm uma_zone_set_maxaction
 .Nd zone allocator
 .Sh SYNOPSIS
 .In sys/param.h
@@ -71,6 +72,8 @@
 .Fn uma_zone_get_cur "uma_zone_t zone"
 .Ft void
 .Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
+.Ft void
+.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
 .In sys/sysctl.h
 .Fn SYSCTL_UMA_MAX parent nbr name access zone descr
 .Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
@@ -307,13 +310,21 @@ The
 .Fn uma_zone_set_warning
 function sets a warning that will be printed on the system console when the
 given zone becomes full and fails to allocate an item.
-The warning will be printed not often than every five minutes.
+The warning will be printed no more often than every five minutes.
 Warnings can be turned off globally by setting the
 .Va vm.zone_warnings
 sysctl tunable to
 .Va 0 .
 .Pp
 The
+.Fn uma_zone_set_maxaction
+function sets a function that will be called when the given zone becomes full
+and fails to allocate an item.
+The function will be called with the zone locked. Also, the function
+that called the allocation function may have held additional locks. Therefore,
+this function should do very little work (similar to a signal handler).
+.Pp
+The
 .Fn SYSCTL_UMA_MAX parent nbr name access zone descr
 macro declares a static
 .Xr sysctl

Modified: head/sys/kern/kern_mbuf.c
==============================================================================
--- head/sys/kern/kern_mbuf.c	Sun Dec 20 00:58:22 2015	(r292483)
+++ head/sys/kern/kern_mbuf.c	Sun Dec 20 02:05:33 2015	(r292484)
@@ -32,11 +32,14 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/malloc.h>
+#include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/protosw.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -272,6 +275,12 @@ uma_zone_t	zone_jumbo16;
 uma_zone_t	zone_ext_refcnt;
 
 /*
+ * Callout to assist us in freeing mbufs.
+ */
+static struct callout	mb_reclaim_callout;
+static struct mtx	mb_reclaim_callout_mtx;
+
+/*
  * Local prototypes.
  */
 static int	mb_ctor_mbuf(void *, int, void *, int);
@@ -285,6 +294,7 @@ static void	mb_zfini_pack(void *, int);
 
 static void	mb_reclaim(void *);
 static void    *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
+static void	mb_maxaction(uma_zone_t);
 
 /* Ensure that MSIZE is a power of 2. */
 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE);
@@ -310,6 +320,7 @@ mbuf_init(void *dummy)
 	if (nmbufs > 0)
 		nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
 	uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached");
+	uma_zone_set_maxaction(zone_mbuf, mb_maxaction);
 
 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -322,6 +333,7 @@ mbuf_init(void *dummy)
 	if (nmbclusters > 0)
 		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
 	uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
+	uma_zone_set_maxaction(zone_clust, mb_maxaction);
 
 	zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack,
 	    mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf);
@@ -338,6 +350,7 @@ mbuf_init(void *dummy)
 	if (nmbjumbop > 0)
 		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
 	uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
+	uma_zone_set_maxaction(zone_jumbop, mb_maxaction);
 
 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -351,6 +364,7 @@ mbuf_init(void *dummy)
 	if (nmbjumbo9 > 0)
 		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
 	uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached");
+	uma_zone_set_maxaction(zone_jumbo9, mb_maxaction);
 
 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
 	    mb_ctor_clust, mb_dtor_clust,
@@ -364,6 +378,7 @@ mbuf_init(void *dummy)
 	if (nmbjumbo16 > 0)
 		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
 	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
+	uma_zone_set_maxaction(zone_jumbo16, mb_maxaction);
 
 	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
 	    NULL, NULL,
@@ -372,6 +387,11 @@ mbuf_init(void *dummy)
 
 	/* uma_prealloc() goes here... */
 
+	/* Initialize the mb_reclaim() callout. */
+	mtx_init(&mb_reclaim_callout_mtx, "mb_reclaim_callout_mtx", NULL,
+	    MTX_DEF);
+	callout_init(&mb_reclaim_callout, 1);
+
 	/*
 	 * Hook event handler for low-memory situation, used to
 	 * drain protocols and push data back to the caches (UMA
@@ -678,3 +698,61 @@ mb_reclaim(void *junk)
 			if (pr->pr_drain != NULL)
 				(*pr->pr_drain)();
 }
+
+/*
+ * This is the function called by the mb_reclaim_callout, which is
+ * used when we hit the maximum for a zone.
+ *
+ * (See mb_maxaction() below.)
+ */
+static void
+mb_reclaim_timer(void *junk __unused)
+{
+
+	mtx_lock(&mb_reclaim_callout_mtx);
+
+	/*
+	 * Avoid running this function extra times by skipping this invocation
+	 * if the callout has already been rescheduled.
+	 */
+	if (callout_pending(&mb_reclaim_callout) ||
+	    !callout_active(&mb_reclaim_callout)) {
+		mtx_unlock(&mb_reclaim_callout_mtx);
+		return;
+	}
+	mtx_unlock(&mb_reclaim_callout_mtx);
+
+	mb_reclaim(NULL);
+
+	mtx_lock(&mb_reclaim_callout_mtx);
+	callout_deactivate(&mb_reclaim_callout);
+	mtx_unlock(&mb_reclaim_callout_mtx);
+}
+
+/*
+ * This function is called when we hit the maximum for a zone.
+ *
+ * At that point, we want to call the protocol drain routine to free up some
+ * mbufs. However, we will use the callout routines to schedule this to
+ * occur in another thread. (The thread calling this function holds the
+ * zone lock.)
+ */
+static void
+mb_maxaction(uma_zone_t zone __unused)
+{
+
+	/*
+	 * If we can't immediately obtain the lock, either the callout
+	 * is currently running, or another thread is scheduling the
+	 * callout.
+	 */
+	if (!mtx_trylock(&mb_reclaim_callout_mtx))
+		return;
+
+	/* If not already scheduled/running, schedule the callout. */
+	if (!callout_active(&mb_reclaim_callout)) {
+		callout_reset(&mb_reclaim_callout, 1, mb_reclaim_timer, NULL);
+	}
+
+	mtx_unlock(&mb_reclaim_callout_mtx);
+}

Modified: head/sys/vm/uma.h
==============================================================================
--- head/sys/vm/uma.h	Sun Dec 20 00:58:22 2015	(r292483)
+++ head/sys/vm/uma.h	Sun Dec 20 02:05:33 2015	(r292484)
@@ -521,6 +521,19 @@ int uma_zone_get_max(uma_zone_t zone);
 void uma_zone_set_warning(uma_zone_t zone, const char *warning);
 
 /*
+ * Sets a function to run when limit is reached
+ *
+ * Arguments:
+ *	zone  The zone to which this applies
+ *	fx  The function ro run
+ *
+ * Returns:
+ *	Nothing
+ */
+typedef void (*uma_maxaction_t)(uma_zone_t);
+void uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t);
+
+/*
  * Obtains the approximate current number of items allocated from a zone
  *
  * Arguments:

Modified: head/sys/vm/uma_core.c
==============================================================================
--- head/sys/vm/uma_core.c	Sun Dec 20 00:58:22 2015	(r292483)
+++ head/sys/vm/uma_core.c	Sun Dec 20 02:05:33 2015	(r292484)
@@ -431,6 +431,13 @@ zone_log_warning(uma_zone_t zone)
 		printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
 }
 
+static inline void
+zone_maxaction(uma_zone_t zone)
+{
+	if (zone->uz_maxaction)
+		(*zone->uz_maxaction)(zone);
+}
+
 static void
 zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
 {
@@ -1578,6 +1585,7 @@ zone_ctor(void *mem, int size, void *uda
 	zone->uz_flags = 0;
 	zone->uz_warning = NULL;
 	timevalclear(&zone->uz_ratecheck);
+	zone->uz_maxaction = NULL;
 	keg = arg->keg;
 
 	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
@@ -2382,6 +2390,7 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t
 			if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
 				zone->uz_flags |= UMA_ZFLAG_FULL;
 				zone_log_warning(zone);
+				zone_maxaction(zone);
 			}
 			if (flags & M_NOWAIT)
 				break;
@@ -2501,6 +2510,7 @@ zone_fetch_slab_multi(uma_zone_t zone, u
 			zone->uz_flags |= UMA_ZFLAG_FULL;
 			zone->uz_sleeps++;
 			zone_log_warning(zone);
+			zone_maxaction(zone);
 			msleep(zone, zone->uz_lockptr, PVM,
 			    "zonelimit", hz/100);
 			zone->uz_flags &= ~UMA_ZFLAG_FULL;
@@ -3007,6 +3017,16 @@ uma_zone_set_warning(uma_zone_t zone, co
 }
 
 /* See uma.h */
+void
+uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
+{
+
+	ZONE_LOCK(zone);
+	zone->uz_maxaction = maxaction;
+	ZONE_UNLOCK(zone);
+}
+
+/* See uma.h */
 int
 uma_zone_get_cur(uma_zone_t zone)
 {

Modified: head/sys/vm/uma_int.h
==============================================================================
--- head/sys/vm/uma_int.h	Sun Dec 20 00:58:22 2015	(r292483)
+++ head/sys/vm/uma_int.h	Sun Dec 20 02:05:33 2015	(r292484)
@@ -303,10 +303,12 @@ struct uma_zone {
 	uint16_t	uz_count;	/* Amount of items in full bucket */
 	uint16_t	uz_count_min;	/* Minimal amount of items there */
 
-	/* The next three fields are used to print a rate-limited warnings. */
+	/* The next two fields are used to print a rate-limited warnings. */
 	const char	*uz_warning;	/* Warning to print on failure */
 	struct timeval	uz_ratecheck;	/* Warnings rate-limiting */
 
+	uma_maxaction_t	uz_maxaction;	/* Function to run when at limit */
+
 	/*
 	 * This HAS to be the last item because we adjust the zone size
 	 * based on NCPU and then allocate the space for the zones.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512200205.tBK25Y2H039542>