Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 1 Mar 2016 00:17:14 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r296242 - in head/sys: dev/xen/netback kern netinet6 sys
Message-ID:  <201603010017.u210HEBE049312@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Tue Mar  1 00:17:14 2016
New Revision: 296242
URL: https://svnweb.freebsd.org/changeset/base/296242

Log:
  New way to manage reference counting of mbuf external storage.
  
  The m_ext.ext_cnt pointer becomes a union. It can now hold the refcount
  value itself. To tell that m_ext.ext_flags flag EXT_FLAG_EMBREF is used.
  The first mbuf to attach a cluster stores the refcount. The further mbufs
  to reference the cluster point at refcount in the first mbuf. The first
  mbuf is freed only when the last reference is freed.
  
  The benefit over refcounts stored in separate slabs is that now refcounts
  of different, unrelated mbufs do not share a cache line.
  
  For EXT_EXTREF mbufs the zone_ext_refcnt is no longer needed, and m_extadd()
  becomes void, making widely used M_EXTADD macro safe.
  
  For EXT_SFBUF mbufs the sf_ext_ref() is removed, which was an optimization
  exactly against the cache aliasing problem with regular refcounting.
  
  Discussed with:		rrs, rwatson, gnn, hiren, sbruno, np
  Reviewed by:		rrs
  Differential Revision:	https://reviews.freebsd.org/D5396
  Sponsored by:		Netflix

Modified:
  head/sys/dev/xen/netback/netback.c
  head/sys/kern/kern_mbuf.c
  head/sys/kern/kern_sendfile.c
  head/sys/kern/uipc_mbuf.c
  head/sys/netinet6/ip6_output.c
  head/sys/netinet6/ip6_var.h
  head/sys/sys/mbuf.h

Modified: head/sys/dev/xen/netback/netback.c
==============================================================================
--- head/sys/dev/xen/netback/netback.c	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/dev/xen/netback/netback.c	Tue Mar  1 00:17:14 2016	(r296242)
@@ -164,7 +164,7 @@ static void	xnb_txpkt2rsp(const struct x
 			      netif_tx_back_ring_t *ring, int error);
 static struct mbuf *xnb_pkt2mbufc(const struct xnb_pkt *pkt, struct ifnet *ifp);
 static int	xnb_txpkt2gnttab(const struct xnb_pkt *pkt,
-				 const struct mbuf *mbufc,
+				 struct mbuf *mbufc,
 				 gnttab_copy_table gnttab,
 				 const netif_tx_back_ring_t *txb,
 				 domid_t otherend_id);
@@ -1709,12 +1709,12 @@ xnb_pkt2mbufc(const struct xnb_pkt *pkt,
  * \return 		The number of gnttab entries filled
  */
 static int
-xnb_txpkt2gnttab(const struct xnb_pkt *pkt, const struct mbuf *mbufc,
+xnb_txpkt2gnttab(const struct xnb_pkt *pkt, struct mbuf *mbufc,
 		 gnttab_copy_table gnttab, const netif_tx_back_ring_t *txb,
 		 domid_t otherend_id)
 {
 
-	const struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
+	struct mbuf *mbuf = mbufc;/* current mbuf within the chain */
 	int gnt_idx = 0;		/* index into grant table */
 	RING_IDX r_idx = pkt->car;	/* index into tx ring buffer */
 	int r_ofs = 0;	/* offset of next data within tx request's data area */

Modified: head/sys/kern/kern_mbuf.c
==============================================================================
--- head/sys/kern/kern_mbuf.c	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/kern/kern_mbuf.c	Tue Mar  1 00:17:14 2016	(r296242)
@@ -269,7 +269,6 @@ uma_zone_t	zone_pack;
 uma_zone_t	zone_jumbop;
 uma_zone_t	zone_jumbo9;
 uma_zone_t	zone_jumbo16;
-uma_zone_t	zone_ext_refcnt;
 
 /*
  * Local prototypes.
@@ -278,7 +277,6 @@ static int	mb_ctor_mbuf(void *, int, voi
 static int	mb_ctor_clust(void *, int, void *, int);
 static int	mb_ctor_pack(void *, int, void *, int);
 static void	mb_dtor_mbuf(void *, int, void *);
-static void	mb_dtor_clust(void *, int, void *);
 static void	mb_dtor_pack(void *, int, void *);
 static int	mb_zinit_pack(void *, int, int);
 static void	mb_zfini_pack(void *, int);
@@ -312,13 +310,13 @@ mbuf_init(void *dummy)
 	uma_zone_set_maxaction(zone_mbuf, mb_reclaim);
 
 	zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES,
-	    mb_ctor_clust, mb_dtor_clust,
+	    mb_ctor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	if (nmbclusters > 0)
 		nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
 	uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached");
@@ -329,26 +327,26 @@ mbuf_init(void *dummy)
 
 	/* Make jumbo frame zone too. Page size, 9k and 16k. */
 	zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE,
-	    mb_ctor_clust, mb_dtor_clust,
+	    mb_ctor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	if (nmbjumbop > 0)
 		nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
 	uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached");
 	uma_zone_set_maxaction(zone_jumbop, mb_reclaim);
 
 	zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES,
-	    mb_ctor_clust, mb_dtor_clust,
+	    mb_ctor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc);
 	if (nmbjumbo9 > 0)
 		nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
@@ -356,24 +354,19 @@ mbuf_init(void *dummy)
 	uma_zone_set_maxaction(zone_jumbo9, mb_reclaim);
 
 	zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES,
-	    mb_ctor_clust, mb_dtor_clust,
+	    mb_ctor_clust,
 #ifdef INVARIANTS
-	    trash_init, trash_fini,
+	    trash_dtor, trash_init, trash_fini,
 #else
-	    NULL, NULL,
+	    NULL, NULL, NULL,
 #endif
-	    UMA_ALIGN_PTR, UMA_ZONE_REFCNT);
+	    UMA_ALIGN_PTR, 0);
 	uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc);
 	if (nmbjumbo16 > 0)
 		nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
 	uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached");
 	uma_zone_set_maxaction(zone_jumbo16, mb_reclaim);
 
-	zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int),
-	    NULL, NULL,
-	    NULL, NULL,
-	    UMA_ALIGN_PTR, UMA_ZONE_ZINIT);
-
 	/*
 	 * Hook event handler for low-memory situation, used to
 	 * drain protocols and push data back to the caches (UMA
@@ -477,7 +470,6 @@ mb_dtor_pack(void *mem, int size, void *
 	KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__));
 	KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__));
 	KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__));
-	KASSERT(*m->m_ext.ext_cnt == 1, ("%s: ext_cnt != 1", __func__));
 #ifdef INVARIANTS
 	trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg);
 #endif
@@ -505,40 +497,11 @@ static int
 mb_ctor_clust(void *mem, int size, void *arg, int how)
 {
 	struct mbuf *m;
-	u_int *refcnt;
-	int type;
-	uma_zone_t zone;
 
 #ifdef INVARIANTS
 	trash_ctor(mem, size, arg, how);
 #endif
-	switch (size) {
-	case MCLBYTES:
-		type = EXT_CLUSTER;
-		zone = zone_clust;
-		break;
-#if MJUMPAGESIZE != MCLBYTES
-	case MJUMPAGESIZE:
-		type = EXT_JUMBOP;
-		zone = zone_jumbop;
-		break;
-#endif
-	case MJUM9BYTES:
-		type = EXT_JUMBO9;
-		zone = zone_jumbo9;
-		break;
-	case MJUM16BYTES:
-		type = EXT_JUMBO16;
-		zone = zone_jumbo16;
-		break;
-	default:
-		panic("unknown cluster size");
-		break;
-	}
-
 	m = (struct mbuf *)arg;
-	refcnt = uma_find_refcnt(zone, mem);
-	*refcnt = 1;
 	if (m != NULL) {
 		m->m_ext.ext_buf = (caddr_t)mem;
 		m->m_data = m->m_ext.ext_buf;
@@ -547,33 +510,15 @@ mb_ctor_clust(void *mem, int size, void 
 		m->m_ext.ext_arg1 = NULL;
 		m->m_ext.ext_arg2 = NULL;
 		m->m_ext.ext_size = size;
-		m->m_ext.ext_type = type;
-		m->m_ext.ext_flags = 0;
-		m->m_ext.ext_cnt = refcnt;
+		m->m_ext.ext_type = m_gettype(size);
+		m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+		m->m_ext.ext_count = 1;
 	}
 
 	return (0);
 }
 
 /*
- * The Mbuf Cluster zone destructor.
- */
-static void
-mb_dtor_clust(void *mem, int size, void *arg)
-{
-#ifdef INVARIANTS
-	uma_zone_t zone;
-
-	zone = m_getzone(size);
-	KASSERT(*(uma_find_refcnt(zone, mem)) <= 1,
-		("%s: refcnt incorrect %u", __func__,
-		 *(uma_find_refcnt(zone, mem))) );
-
-	trash_dtor(mem, size, arg);
-#endif
-}
-
-/*
  * The Packet secondary zone's init routine, executed on the
  * object's transition from mbuf keg slab to zone cache.
  */
@@ -670,58 +615,69 @@ mb_reclaim(uma_zone_t zone __unused, int
 void
 mb_free_ext(struct mbuf *m)
 {
+	volatile u_int *refcnt;
+	struct mbuf *mref;
 	int freembuf;
 
 	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
 
+	/* See if this is the mbuf that holds the embedded refcount. */
+	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+		refcnt = &m->m_ext.ext_count;
+		mref = m;
+	} else {
+		KASSERT(m->m_ext.ext_cnt != NULL,
+		    ("%s: no refcounting pointer on %p", __func__, m));
+		refcnt = m->m_ext.ext_cnt;
+		mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
+	}
+
 	/*
-	 * Check if the header is embedded in the cluster.
+	 * Check if the header is embedded in the cluster.  It is
+	 * important that we can't touch any of the mbuf fields
+	 * after we have freed the external storage, since mbuf
+	 * could have been embedded in it.
 	 */
 	freembuf = (m->m_flags & M_NOFREE) ? 0 : 1;
 
-	switch (m->m_ext.ext_type) {
-	case EXT_SFBUF:
-		sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
-		break;
-	case EXT_SFBUF_NOCACHE:
-		sf_ext_free_nocache(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
-		break;
-	default:
-		KASSERT(m->m_ext.ext_cnt != NULL,
-		    ("%s: no refcounting pointer on %p", __func__, m));
-		/* 
-		 * Free attached storage if this mbuf is the only
-		 * reference to it.
-		 */
-		if (*(m->m_ext.ext_cnt) != 1) {
-			if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1)
-				break;
-		}
-
+	/* Free attached storage if this mbuf is the only reference to it. */
+	if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) {
 		switch (m->m_ext.ext_type) {
-		case EXT_PACKET:	/* The packet zone is special. */
-			if (*(m->m_ext.ext_cnt) == 0)
-				*(m->m_ext.ext_cnt) = 1;
-			uma_zfree(zone_pack, m);
-			return;		/* Job done. */
+		case EXT_PACKET:
+			/* The packet zone is special. */
+			if (*refcnt == 0)
+				*refcnt = 1;
+			uma_zfree(zone_pack, mref);
+			break;
 		case EXT_CLUSTER:
 			uma_zfree(zone_clust, m->m_ext.ext_buf);
+			uma_zfree(zone_mbuf, mref);
 			break;
 		case EXT_JUMBOP:
 			uma_zfree(zone_jumbop, m->m_ext.ext_buf);
+			uma_zfree(zone_mbuf, mref);
 			break;
 		case EXT_JUMBO9:
 			uma_zfree(zone_jumbo9, m->m_ext.ext_buf);
+			uma_zfree(zone_mbuf, mref);
 			break;
 		case EXT_JUMBO16:
 			uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
+			uma_zfree(zone_mbuf, mref);
+			break;
+		case EXT_SFBUF:
+			sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
+			uma_zfree(zone_mbuf, mref);
+			break;
+		case EXT_SFBUF_NOCACHE:
+			sf_ext_free_nocache(m->m_ext.ext_arg1,
+			    m->m_ext.ext_arg2);
+			uma_zfree(zone_mbuf, mref);
 			break;
 		case EXT_NET_DRV:
 		case EXT_MOD_TYPE:
 		case EXT_DISPOSABLE:
-			*(m->m_ext.ext_cnt) = 0;
-			uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *,
-				m->m_ext.ext_cnt));
+			uma_zfree(zone_mbuf, mref);
 			/* FALLTHROUGH */
 		case EXT_EXTREF:
 			KASSERT(m->m_ext.ext_free != NULL,
@@ -735,7 +691,7 @@ mb_free_ext(struct mbuf *m)
 		}
 	}
 
-	if (freembuf)
+	if (freembuf && m != mref)
 		uma_zfree(zone_mbuf, m);
 }
 
@@ -925,9 +881,7 @@ m_getm2(struct mbuf *m, int len, int how
 
 /*-
  * Configure a provided mbuf to refer to the provided external storage
- * buffer and setup a reference count for said buffer.  If the setting
- * up of the reference count fails, the M_EXT bit will not be set.  If
- * successfull, the M_EXT bit is set in the mbuf's flags.
+ * buffer and setup a reference count for said buffer.
  *
  * Arguments:
  *    mb     The existing mbuf to which to attach the provided buffer.
@@ -944,20 +898,14 @@ m_getm2(struct mbuf *m, int len, int how
  * Returns:
  *    Nothing.
  */
-int
+void
 m_extadd(struct mbuf *mb, caddr_t buf, u_int size,
     void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2,
-    int flags, int type, int wait)
+    int flags, int type)
 {
-	KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
 
-	if (type != EXT_EXTREF)
-		mb->m_ext.ext_cnt = uma_zalloc(zone_ext_refcnt, wait);
-
-	if (mb->m_ext.ext_cnt == NULL)
-		return (ENOMEM);
+	KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__));
 
-	*(mb->m_ext.ext_cnt) = 1;
 	mb->m_flags |= (M_EXT | flags);
 	mb->m_ext.ext_buf = buf;
 	mb->m_data = mb->m_ext.ext_buf;
@@ -966,9 +914,12 @@ m_extadd(struct mbuf *mb, caddr_t buf, u
 	mb->m_ext.ext_arg1 = arg1;
 	mb->m_ext.ext_arg2 = arg2;
 	mb->m_ext.ext_type = type;
-	mb->m_ext.ext_flags = 0;
 
-	return (0);
+	if (type != EXT_EXTREF) {
+		mb->m_ext.ext_count = 1;
+		mb->m_ext.ext_flags = EXT_FLAG_EMBREF;
+	} else
+		mb->m_ext.ext_flags = 0;
 }
 
 /*

Modified: head/sys/kern/kern_sendfile.c
==============================================================================
--- head/sys/kern/kern_sendfile.c	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/kern/kern_sendfile.c	Tue Mar  1 00:17:14 2016	(r296242)
@@ -119,31 +119,6 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat,
     NULL, 0, sfstat_sysctl, "I", "sendfile statistics");
 
 /*
- * Add more references to a vm_page + sf_buf + sendfile_sync.  Called
- * by mbuf(9) code to add extra references to a page.
- */
-void
-sf_ext_ref(void *arg1, void *arg2)
-{
-	struct sf_buf *sf = arg1;
-	struct sendfile_sync *sfs = arg2;
-	vm_page_t pg = sf_buf_page(sf);
-
-	sf_buf_ref(sf);
-
-	vm_page_lock(pg);
-	vm_page_wire(pg);
-	vm_page_unlock(pg);
-
-	if (sfs != NULL) {
-		mtx_lock(&sfs->mtx);
-		KASSERT(sfs->count > 0, ("Sendfile sync botchup count == 0"));
-		sfs->count++;
-		mtx_unlock(&sfs->mtx);
-	}
-}
-
-/*
  * Detach mapped page and release resources back to the system.  Called
  * by mbuf(9) code when last reference to a page is freed.
  */
@@ -807,7 +782,8 @@ retry_space:
 				m0->m_ext.ext_type = EXT_SFBUF;
 			else
 				m0->m_ext.ext_type = EXT_SFBUF_NOCACHE;
-			m0->m_ext.ext_flags = 0;
+			m0->m_ext.ext_flags = EXT_FLAG_EMBREF;
+			m0->m_ext.ext_count = 1;
 			m0->m_flags |= (M_EXT | M_RDONLY);
 			if (nios)
 				m0->m_flags |= M_NOTREADY;

Modified: head/sys/kern/uipc_mbuf.c
==============================================================================
--- head/sys/kern/uipc_mbuf.c	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/kern/uipc_mbuf.c	Tue Mar  1 00:17:14 2016	(r296242)
@@ -138,29 +138,31 @@ CTASSERT(sizeof(m_assertbuf.m_stailqpkt)
  * and bump the refcount of the cluster.
  */
 void
-mb_dupcl(struct mbuf *n, const struct mbuf *m)
+mb_dupcl(struct mbuf *n, struct mbuf *m)
 {
+	volatile u_int *refcnt;
 
 	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m));
 	KASSERT(!(n->m_flags & M_EXT), ("%s: M_EXT set on %p", __func__, n));
 
-	switch (m->m_ext.ext_type) {
-	case EXT_SFBUF:
-	case EXT_SFBUF_NOCACHE:
-		sf_ext_ref(m->m_ext.ext_arg1, m->m_ext.ext_arg2);
-		break;
-	default:
+	n->m_ext = m->m_ext;
+	n->m_flags |= M_EXT;
+	n->m_flags |= m->m_flags & M_RDONLY;
+
+	/* See if this is the mbuf that holds the embedded refcount. */
+	if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
+		refcnt = n->m_ext.ext_cnt = &m->m_ext.ext_count;
+		n->m_ext.ext_flags &= ~EXT_FLAG_EMBREF;
+	} else {
 		KASSERT(m->m_ext.ext_cnt != NULL,
 		    ("%s: no refcounting pointer on %p", __func__, m));
-		if (*(m->m_ext.ext_cnt) == 1)
-			*(m->m_ext.ext_cnt) += 1;
-		else
-			atomic_add_int(m->m_ext.ext_cnt, 1);
+		refcnt = m->m_ext.ext_cnt;
 	}
 
-	n->m_ext = m->m_ext;
-	n->m_flags |= M_EXT;
-	n->m_flags |= m->m_flags & M_RDONLY;
+	if (*refcnt == 1)
+		*refcnt += 1;
+	else
+		atomic_add_int(refcnt, 1);
 }
 
 void
@@ -394,7 +396,7 @@ m_prepend(struct mbuf *m, int len, int h
  * only their reference counts are incremented.
  */
 struct mbuf *
-m_copym(const struct mbuf *m, int off0, int len, int wait)
+m_copym(struct mbuf *m, int off0, int len, int wait)
 {
 	struct mbuf *n, **np;
 	int off = off0;

Modified: head/sys/netinet6/ip6_output.c
==============================================================================
--- head/sys/netinet6/ip6_output.c	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/netinet6/ip6_output.c	Tue Mar  1 00:17:14 2016	(r296242)
@@ -2972,7 +2972,7 @@ ip6_setpktopt(int optname, u_char *buf, 
  * pointer that might NOT be &loif -- easier than replicating that code here.
  */
 void
-ip6_mloopback(struct ifnet *ifp, const struct mbuf *m)
+ip6_mloopback(struct ifnet *ifp, struct mbuf *m)
 {
 	struct mbuf *copym;
 	struct ip6_hdr *ip6;

Modified: head/sys/netinet6/ip6_var.h
==============================================================================
--- head/sys/netinet6/ip6_var.h	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/netinet6/ip6_var.h	Tue Mar  1 00:17:14 2016	(r296242)
@@ -382,7 +382,7 @@ int	ip6_sysctl(int *, u_int, void *, siz
 
 void	ip6_forward(struct mbuf *, int);
 
-void	ip6_mloopback(struct ifnet *, const struct mbuf *);
+void	ip6_mloopback(struct ifnet *, struct mbuf *);
 int	ip6_output(struct mbuf *, struct ip6_pktopts *,
 			struct route_in6 *,
 			int,

Modified: head/sys/sys/mbuf.h
==============================================================================
--- head/sys/sys/mbuf.h	Mon Feb 29 23:09:31 2016	(r296241)
+++ head/sys/sys/mbuf.h	Tue Mar  1 00:17:14 2016	(r296242)
@@ -160,7 +160,10 @@ struct pkthdr {
  * they are correct.
  */
 struct m_ext {
-	volatile u_int	*ext_cnt;	/* pointer to ref count info */
+	union {
+		volatile u_int	 ext_count;	/* value of ref count info */
+		volatile u_int	*ext_cnt;	/* pointer to ref count info */
+	};
 	caddr_t		 ext_buf;	/* start of buffer */
 	uint32_t	 ext_size;	/* size of buffer, for ext_free */
 	uint32_t	 ext_type:8,	/* type of external storage */
@@ -370,7 +373,7 @@ struct mbuf {
  * Flags for external mbuf buffer types.
  * NB: limited to the lower 24 bits.
  */
-#define	EXT_FLAG_EMBREF		0x000001	/* embedded ext_cnt, notyet */
+#define	EXT_FLAG_EMBREF		0x000001	/* embedded ext_count */
 #define	EXT_FLAG_EXTREF		0x000002	/* external ext_cnt, notyet */
 #define	EXT_FLAG_NOFREE		0x000010	/* don't free mbuf to pool, notyet */
 
@@ -396,7 +399,6 @@ struct mbuf {
 /*
  * External reference/free functions.
  */
-void sf_ext_ref(void *, void *);
 void sf_ext_free(void *, void *);
 void sf_ext_free_nocache(void *, void *);
 
@@ -524,9 +526,8 @@ extern uma_zone_t	zone_pack;
 extern uma_zone_t	zone_jumbop;
 extern uma_zone_t	zone_jumbo9;
 extern uma_zone_t	zone_jumbo16;
-extern uma_zone_t	zone_ext_refcnt;
 
-void		 mb_dupcl(struct mbuf *, const struct mbuf *);
+void		 mb_dupcl(struct mbuf *, struct mbuf *);
 void		 mb_free_ext(struct mbuf *);
 void		 m_adj(struct mbuf *, int);
 int		 m_apply(struct mbuf *, int, int,
@@ -539,7 +540,7 @@ void 		*m_cljget(struct mbuf *m, int how
 struct mbuf	*m_collapse(struct mbuf *, int, int);
 void		 m_copyback(struct mbuf *, int, int, c_caddr_t);
 void		 m_copydata(const struct mbuf *, int, int, caddr_t);
-struct mbuf	*m_copym(const struct mbuf *, int, int, int);
+struct mbuf	*m_copym(struct mbuf *, int, int, int);
 struct mbuf	*m_copypacket(struct mbuf *, int);
 void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_copyup(struct mbuf *, int, int);
@@ -550,9 +551,9 @@ struct mbuf	*m_devget(char *, int, int, 
 		    void (*)(char *, caddr_t, u_int));
 struct mbuf	*m_dup(const struct mbuf *, int);
 int		 m_dup_pkthdr(struct mbuf *, const struct mbuf *, int);
-int		 m_extadd(struct mbuf *, caddr_t, u_int,
+void		 m_extadd(struct mbuf *, caddr_t, u_int,
 		    void (*)(struct mbuf *, void *, void *), void *, void *,
-		    int, int, int);
+		    int, int);
 u_int		 m_fixhdr(struct mbuf *);
 struct mbuf	*m_fragment(struct mbuf *, int, int);
 void		 m_freem(struct mbuf *);
@@ -709,30 +710,30 @@ m_getcl(int how, short type, int flags)
 	return (uma_zalloc_arg(zone_pack, &args, how));
 }
 
+/*
+ * XXX: m_cljset() is a dangerous API.  One must attach only a new,
+ * unreferenced cluster to an mbuf(9).  It is not possible to assert
+ * that, so care can be taken only by users of the API.
+ */
 static __inline void
 m_cljset(struct mbuf *m, void *cl, int type)
 {
-	uma_zone_t zone;
 	int size;
 
 	switch (type) {
 	case EXT_CLUSTER:
 		size = MCLBYTES;
-		zone = zone_clust;
 		break;
 #if MJUMPAGESIZE != MCLBYTES
 	case EXT_JUMBOP:
 		size = MJUMPAGESIZE;
-		zone = zone_jumbop;
 		break;
 #endif
 	case EXT_JUMBO9:
 		size = MJUM9BYTES;
-		zone = zone_jumbo9;
 		break;
 	case EXT_JUMBO16:
 		size = MJUM16BYTES;
-		zone = zone_jumbo16;
 		break;
 	default:
 		panic("%s: unknown cluster type %d", __func__, type);
@@ -743,10 +744,9 @@ m_cljset(struct mbuf *m, void *cl, int t
 	m->m_ext.ext_free = m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
 	m->m_ext.ext_size = size;
 	m->m_ext.ext_type = type;
-	m->m_ext.ext_flags = 0;
-	m->m_ext.ext_cnt = uma_find_refcnt(zone, cl);
+	m->m_ext.ext_flags = EXT_FLAG_EMBREF;
+	m->m_ext.ext_count = 1;
 	m->m_flags |= M_EXT;
-
 }
 
 static __inline void
@@ -775,6 +775,16 @@ m_last(struct mbuf *m)
 	return (m);
 }
 
+static inline u_int
+m_extrefcnt(struct mbuf *m)
+{
+
+	KASSERT(m->m_flags & M_EXT, ("%s: M_EXT missing", __func__));
+
+	return ((m->m_ext.ext_flags & EXT_FLAG_EMBREF) ? m->m_ext.ext_count :
+	    *m->m_ext.ext_cnt);
+}
+
 /*
  * mbuf, cluster, and external object allocation macros (for compatibility
  * purposes).
@@ -784,8 +794,8 @@ m_last(struct mbuf *m)
 #define	MGETHDR(m, how, type)	((m) = m_gethdr((how), (type)))
 #define	MCLGET(m, how)		m_clget((m), (how))
 #define	MEXTADD(m, buf, size, free, arg1, arg2, flags, type)		\
-    (void )m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),\
-    (flags), (type), M_NOWAIT)
+    m_extadd((m), (caddr_t)(buf), (size), (free), (arg1), (arg2),	\
+    (flags), (type))
 #define	m_getm(m, len, how, type)					\
     m_getm2((m), (len), (how), (type), M_PKTHDR)
 
@@ -796,7 +806,7 @@ m_last(struct mbuf *m)
  */
 #define	M_WRITABLE(m)	(!((m)->m_flags & M_RDONLY) &&			\
 			 (!(((m)->m_flags & M_EXT)) ||			\
-			 (*((m)->m_ext.ext_cnt) == 1)) )		\
+			 (m_extrefcnt(m) == 1)))
 
 /* Check if the supplied mbuf has a packet header, or else panic. */
 #define	M_ASSERTPKTHDR(m)						\



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201603010017.u210HEBE049312>