Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 6 May 2018 00:19:48 +0000 (UTC)
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r333281 - in head/sys: kern sys
Message-ID:  <201805060019.w460Jm1g069229@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: markj
Date: Sun May  6 00:19:48 2018
New Revision: 333281
URL: https://svnweb.freebsd.org/changeset/base/333281

Log:
  Add an mbuf allocator for netdump.
  
  The aim is to permit mbuf allocations after a panic without calling into
  the page allocator, without imposing any runtime overhead during regular
  operation of the system, and without modifying driver code. The approach
  taken is to preallocate a number of mbufs and clusters, storing them
  in linked lists, and using the lists to back some UMA cache zones. At
  panic time, the mbuf and cluster zone pointers are overwritten with
  those of the cache zones so that the mbuf allocator returns
  preallocated items.
  
  Using this scheme, drivers which cache mbuf zone pointers from
  m_getzone() require special handling when implementing netdump support.
  
  Reviewed by:	cem (earlier version), julian, sbruno
  MFC after:	1 month
  Sponsored by:	Dell EMC Isilon
  Differential Revision:	https://reviews.freebsd.org/D15251

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==============================================================================
--- head/sys/kern/kern_mbuf.c	Sun May  6 00:11:30 2018	(r333280)
+++ head/sys/kern/kern_mbuf.c	Sun May  6 00:19:48 2018	(r333281)
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/kernel.h>
+#include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/protosw.h>
@@ -378,6 +379,199 @@ mbuf_init(void *dummy)
 	    EVENTHANDLER_PRI_FIRST);
 }
 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL);
+
+#ifdef NETDUMP
+/*
+ * netdump makes use of a pre-allocated pool of mbufs and clusters.  When
+ * netdump is configured, we initialize a set of UMA cache zones which return
+ * items from this pool.  At panic-time, the regular UMA zone pointers are
+ * overwritten with those of the cache zones so that drivers may allocate and
+ * free mbufs and clusters without attempting to allocate physical memory.
+ *
+ * We keep mbufs and clusters in a pair of mbuf queues.  In particular, for
+ * the purpose of caching clusters, we treat them as mbufs.
+ */
+static struct mbufq nd_mbufq =
+    { STAILQ_HEAD_INITIALIZER(nd_mbufq.mq_head), 0, INT_MAX };
+static struct mbufq nd_clustq =
+    { STAILQ_HEAD_INITIALIZER(nd_clustq.mq_head), 0, INT_MAX };
+
+static int nd_clsize;
+static uma_zone_t nd_zone_mbuf;
+static uma_zone_t nd_zone_clust;
+static uma_zone_t nd_zone_pack;
+
+static int
+nd_buf_import(void *arg, void **store, int count, int domain __unused,
+    int flags)
+{
+	struct mbufq *q;
+	struct mbuf *m;
+	int i;
+
+	q = arg;
+
+	for (i = 0; i < count; i++) {
+		m = mbufq_dequeue(q);
+		if (m == NULL)
+			break;
+		trash_init(m, q == &nd_mbufq ? MSIZE : nd_clsize, flags);
+		store[i] = m;
+	}
+	return (i);
+}
+
+static void
+nd_buf_release(void *arg, void **store, int count)
+{
+	struct mbufq *q;
+	struct mbuf *m;
+	int i;
+
+	q = arg;
+
+	for (i = 0; i < count; i++) {
+		m = store[i];
+		(void)mbufq_enqueue(q, m);
+	}
+}
+
+static int
+nd_pack_import(void *arg __unused, void **store, int count, int domain __unused,
+    int flags __unused)
+{
+	struct mbuf *m;
+	void *clust;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		m = m_get(MT_DATA, M_NOWAIT);
+		if (m == NULL)
+			break;
+		clust = uma_zalloc(nd_zone_clust, M_NOWAIT);
+		if (clust == NULL) {
+			m_free(m);
+			break;
+		}
+		mb_ctor_clust(clust, nd_clsize, m, 0);
+		store[i] = m;
+	}
+	return (i);
+}
+
+static void
+nd_pack_release(void *arg __unused, void **store, int count)
+{
+	struct mbuf *m;
+	void *clust;
+	int i;
+
+	for (i = 0; i < count; i++) {
+		m = store[i];
+		clust = m->m_ext.ext_buf;
+		uma_zfree(nd_zone_clust, clust);
+		uma_zfree(nd_zone_mbuf, m);
+	}
+}
+
+/*
+ * Free the pre-allocated mbufs and clusters reserved for netdump, and destroy
+ * the corresponding UMA cache zones.
+ */
+void
+netdump_mbuf_drain(void)
+{
+	struct mbuf *m;
+	void *item;
+
+	if (nd_zone_mbuf != NULL) {
+		uma_zdestroy(nd_zone_mbuf);
+		nd_zone_mbuf = NULL;
+	}
+	if (nd_zone_clust != NULL) {
+		uma_zdestroy(nd_zone_clust);
+		nd_zone_clust = NULL;
+	}
+	if (nd_zone_pack != NULL) {
+		uma_zdestroy(nd_zone_pack);
+		nd_zone_pack = NULL;
+	}
+
+	while ((m = mbufq_dequeue(&nd_mbufq)) != NULL)
+		m_free(m);
+	while ((item = mbufq_dequeue(&nd_clustq)) != NULL)
+		uma_zfree(m_getzone(nd_clsize), item);
+}
+
+/*
+ * Callback invoked immediately prior to starting a netdump.
+ */
+void
+netdump_mbuf_dump(void)
+{
+
+	/*
+	 * All cluster zones return buffers of the size requested by the
+	 * drivers.  It's up to the driver to reinitialize the zones if the
+	 * MTU of a netdump-enabled interface changes.
+	 */
+	printf("netdump: overwriting mbuf zone pointers\n");
+	zone_mbuf = nd_zone_mbuf;
+	zone_clust = nd_zone_clust;
+	zone_pack = nd_zone_pack;
+	zone_jumbop = nd_zone_clust;
+	zone_jumbo9 = nd_zone_clust;
+	zone_jumbo16 = nd_zone_clust;
+}
+
+/*
+ * Reinitialize the netdump mbuf+cluster pool and cache zones.
+ */
+void
+netdump_mbuf_reinit(int nmbuf, int nclust, int clsize)
+{
+	struct mbuf *m;
+	void *item;
+
+	netdump_mbuf_drain();
+
+	nd_clsize = clsize;
+
+	nd_zone_mbuf = uma_zcache_create("netdump_" MBUF_MEM_NAME,
+	    MSIZE, mb_ctor_mbuf, mb_dtor_mbuf,
+#ifdef INVARIANTS
+	    trash_init, trash_fini,
+#else
+	    NULL, NULL,
+#endif
+	    nd_buf_import, nd_buf_release,
+	    &nd_mbufq, UMA_ZONE_NOBUCKET);
+
+	nd_zone_clust = uma_zcache_create("netdump_" MBUF_CLUSTER_MEM_NAME,
+	    clsize, mb_ctor_clust,
+#ifdef INVARIANTS
+	    trash_dtor, trash_init, trash_fini,
+#else
+	    NULL, NULL, NULL,
+#endif
+	    nd_buf_import, nd_buf_release,
+	    &nd_clustq, UMA_ZONE_NOBUCKET);
+
+	nd_zone_pack = uma_zcache_create("netdump_" MBUF_PACKET_MEM_NAME,
+	    MCLBYTES, mb_ctor_pack, mb_dtor_pack, NULL, NULL,
+	    nd_pack_import, nd_pack_release,
+	    NULL, UMA_ZONE_NOBUCKET);
+
+	while (nmbuf-- > 0) {
+		m = m_get(MT_DATA, M_WAITOK);
+		uma_zfree(nd_zone_mbuf, m);
+	}
+	while (nclust-- > 0) {
+		item = uma_zalloc(m_getzone(nd_clsize), M_WAITOK);
+		uma_zfree(nd_zone_clust, item);
+	}
+}
+#endif /* NETDUMP */
 
 /*
  * UMA backend page allocator for the jumbo frame zones.

Modified: head/sys/sys/mbuf.h
==============================================================================
--- head/sys/sys/mbuf.h	Sun May  6 00:11:30 2018	(r333280)
+++ head/sys/sys/mbuf.h	Sun May  6 00:19:48 2018	(r333281)
@@ -1377,5 +1377,12 @@ mbuf_tstmp2timespec(struct mbuf *m, struct timespec *t
 }
 #endif
 
+#ifdef NETDUMP
+/* Invoked from the netdump client code. */
+void	netdump_mbuf_drain(void);
+void	netdump_mbuf_dump(void);
+void	netdump_mbuf_reinit(int nmbuf, int nclust, int clsize);
+#endif
+
 #endif /* _KERNEL */
 #endif /* !_SYS_MBUF_H_ */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805060019.w460Jm1g069229>