Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 24 Dec 2013 14:14:05 +0000 (UTC)
From:      Gleb Smirnoff <glebius@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r259821 - in projects/sendfile/sys: dev/cxgb/ulp/tom dev/cxgbe/tom kern netinet sys
Message-ID:  <201312241414.rBOEE5Oq017066@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: glebius
Date: Tue Dec 24 14:14:05 2013
New Revision: 259821
URL: http://svnweb.freebsd.org/changeset/base/259821

Log:
  A work in progress on making socket buffers capable to accept an mbufs
  that are "not ready". These mbufs are now being populated by some I/O
  thread in background.
  
  sb_cc is split into sb_ccc and sb_acc.
  sb_ccc means "claimed character count", it is usually of interest
  for those parties that write to socket buffer and compare sb_ccc
  against limits.
  sb_acc means "available character count", how many data can we take
  from buffer right now.
  
  Not ready data is marked with M_NOTREADY flag. Data that stands in
  the buffer beyond not ready data, but is ready itself is marked as
  M_BLOCKED. To optimize things we keep pointer to the first M_NOTREADY
  mbuf.
  
  Writes to sockets can now have PRUS_NOTREADY flag, which will put
  M_NOTREADY on all written data.
  
  In the process of the change I tried to reduce lurking of various
  layers in the struct sockbuf, utilizing at least inlined functions.
  
  This isn't properly tested, just a milestone in a wip. Only TCP and
  UDP are compilable, SCTP and some other kernel code are not.
  
  Sponsored by:	Netflix
  Sponsored by:	Nginx, Inc.

Modified:
  projects/sendfile/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
  projects/sendfile/sys/dev/cxgbe/tom/t4_cpl_io.c
  projects/sendfile/sys/dev/cxgbe/tom/t4_ddp.c
  projects/sendfile/sys/kern/sys_socket.c
  projects/sendfile/sys/kern/uipc_debug.c
  projects/sendfile/sys/kern/uipc_mbuf.c
  projects/sendfile/sys/kern/uipc_sockbuf.c
  projects/sendfile/sys/kern/uipc_socket.c
  projects/sendfile/sys/kern/uipc_usrreq.c
  projects/sendfile/sys/netinet/tcp_input.c
  projects/sendfile/sys/netinet/tcp_output.c
  projects/sendfile/sys/netinet/tcp_reass.c
  projects/sendfile/sys/netinet/tcp_usrreq.c
  projects/sendfile/sys/sys/mbuf.h
  projects/sendfile/sys/sys/protosw.h
  projects/sendfile/sys/sys/sockbuf.h
  projects/sendfile/sys/sys/socketvar.h

Modified: projects/sendfile/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c
==============================================================================
--- projects/sendfile/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -1199,7 +1199,7 @@ do_rx_data(struct sge_qset *qs, struct r
 	}
 
 	toep->tp_enqueued += m->m_pkthdr.len;
-	sbappendstream_locked(so_rcv, m);
+	sbappendstream_locked(so_rcv, m, 0);
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(so_rcv);
 

Modified: projects/sendfile/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- projects/sendfile/sys/dev/cxgbe/tom/t4_cpl_io.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/dev/cxgbe/tom/t4_cpl_io.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -870,7 +870,7 @@ do_peer_close(struct sge_iq *iq, const s
 #ifdef USE_DDP_RX_FLOW_CONTROL
 		toep->rx_credits -= m->m_len;	/* adjust for F_RX_FC_DDP */
 #endif
-		sbappendstream_locked(sb, m);
+		sbappendstream_locked(sb, m, 0);
 		toep->sb_cc = sb->sb_cc;
 	}
 	socantrcvmore_locked(so);	/* unlocks the sockbuf */
@@ -1285,7 +1285,7 @@ do_rx_data(struct sge_iq *iq, const stru
 	    ("%s: sb %p has more data (%d) than last time (%d).",
 	    __func__, sb, sb->sb_cc, toep->sb_cc));
 	toep->rx_credits += toep->sb_cc - sb->sb_cc;
-	sbappendstream_locked(sb, m);
+	sbappendstream_locked(sb, m, 0);
 	toep->sb_cc = sb->sb_cc;
 	sorwakeup_locked(so);
 	SOCKBUF_UNLOCK_ASSERT(sb);

Modified: projects/sendfile/sys/dev/cxgbe/tom/t4_ddp.c
==============================================================================
--- projects/sendfile/sys/dev/cxgbe/tom/t4_ddp.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/dev/cxgbe/tom/t4_ddp.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -231,7 +231,7 @@ insert_ddp_data(struct toepcb *toep, uin
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	toep->rx_credits -= n;	/* adjust for F_RX_FC_DDP */
 #endif
-	sbappendstream_locked(sb, m);
+	sbappendstream_locked(sb, m, 0);
 	toep->sb_cc = sb->sb_cc;
 }
 
@@ -466,7 +466,7 @@ handle_ddp_data(struct toepcb *toep, __b
 #ifdef USE_DDP_RX_FLOW_CONTROL
 	toep->rx_credits -= len;	/* adjust for F_RX_FC_DDP */
 #endif
-	sbappendstream_locked(sb, m);
+	sbappendstream_locked(sb, m, 0);
 	toep->sb_cc = sb->sb_cc;
 wakeup:
 	KASSERT(toep->ddp_flags & db_flag,

Modified: projects/sendfile/sys/kern/sys_socket.c
==============================================================================
--- projects/sendfile/sys/kern/sys_socket.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/kern/sys_socket.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -167,20 +167,17 @@ soo_ioctl(struct file *fp, u_long cmd, v
 
 	case FIONREAD:
 		/* Unlocked read. */
-		*(int *)data = so->so_rcv.sb_cc;
+		*(int *)data = sbavail(&so->so_rcv);
 		break;
 
 	case FIONWRITE:
 		/* Unlocked read. */
-		*(int *)data = so->so_snd.sb_cc;
+		*(int *)data = sbavail(&so->so_snd);
 		break;
 
 	case FIONSPACE:
-		if ((so->so_snd.sb_hiwat < so->so_snd.sb_cc) ||
-		    (so->so_snd.sb_mbmax < so->so_snd.sb_mbcnt))
-			*(int *)data = 0;
-		else
-			*(int *)data = sbspace(&so->so_snd);
+		/* Unlocked read. */
+		*(int *)data = sbspace(&so->so_snd);
 		break;
 
 	case FIOSETOWN:
@@ -246,6 +243,7 @@ soo_stat(struct file *fp, struct stat *u
     struct thread *td)
 {
 	struct socket *so = fp->f_data;
+	struct sockbuf *sb;
 #ifdef MAC
 	int error;
 #endif
@@ -261,15 +259,18 @@ soo_stat(struct file *fp, struct stat *u
 	 * If SBS_CANTRCVMORE is set, but there's still data left in the
 	 * receive buffer, the socket is still readable.
 	 */
-	SOCKBUF_LOCK(&so->so_rcv);
-	if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0 ||
-	    so->so_rcv.sb_cc != 0)
+	sb = &so->so_rcv;
+	SOCKBUF_LOCK(sb);
+	if ((sb->sb_state & SBS_CANTRCVMORE) == 0 || sbavail(sb))
 		ub->st_mode |= S_IRUSR | S_IRGRP | S_IROTH;
-	ub->st_size = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
-	SOCKBUF_UNLOCK(&so->so_rcv);
-	/* Unlocked read. */
-	if ((so->so_snd.sb_state & SBS_CANTSENDMORE) == 0)
+	ub->st_size = sbavail(sb) - sb->sb_ctl;
+	SOCKBUF_UNLOCK(sb);
+
+	sb = &so->so_snd;
+	SOCKBUF_LOCK(sb);
+	if ((sb->sb_state & SBS_CANTSENDMORE) == 0)
 		ub->st_mode |= S_IWUSR | S_IWGRP | S_IWOTH;
+	SOCKBUF_UNLOCK(sb);
 	ub->st_uid = so->so_cred->cr_uid;
 	ub->st_gid = so->so_cred->cr_gid;
 	return (*so->so_proto->pr_usrreqs->pru_sense)(so, ub);

Modified: projects/sendfile/sys/kern/uipc_debug.c
==============================================================================
--- projects/sendfile/sys/kern/uipc_debug.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/kern/uipc_debug.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -403,7 +403,8 @@ db_print_sockbuf(struct sockbuf *sb, con
 	db_printf("sb_sndptroff: %u\n", sb->sb_sndptroff);
 
 	db_print_indent(indent);
-	db_printf("sb_cc: %u   ", sb->sb_cc);
+	db_printf("sb_acc: %u   ", sb->sb_acc);
+	db_printf("sb_ccc: %u   ", sb->sb_ccc);
 	db_printf("sb_hiwat: %u   ", sb->sb_hiwat);
 	db_printf("sb_mbcnt: %u   ", sb->sb_mbcnt);
 	db_printf("sb_mbmax: %u\n", sb->sb_mbmax);

Modified: projects/sendfile/sys/kern/uipc_mbuf.c
==============================================================================
--- projects/sendfile/sys/kern/uipc_mbuf.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/kern/uipc_mbuf.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -389,7 +389,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
  * cleaned too.
  */
 void
-m_demote(struct mbuf *m0, int all)
+m_demote(struct mbuf *m0, int all, int flags)
 {
 	struct mbuf *m;
 
@@ -405,7 +405,7 @@ m_demote(struct mbuf *m0, int all)
 			m_freem(m->m_nextpkt);
 			m->m_nextpkt = NULL;
 		}
-		m->m_flags = m->m_flags & (M_EXT|M_RDONLY|M_NOFREE);
+		m->m_flags = m->m_flags & (M_EXT | M_RDONLY | M_NOFREE | flags);
 	}
 }
 

Modified: projects/sendfile/sys/kern/uipc_sockbuf.c
==============================================================================
--- projects/sendfile/sys/kern/uipc_sockbuf.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/kern/uipc_sockbuf.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -68,6 +68,104 @@ static	u_long sb_efficiency = 8;	/* para
 static struct mbuf	*sbcut_internal(struct sockbuf *sb, int len);
 static void	sbflush_internal(struct sockbuf *sb);
 
+static void
+sb_shift_nrdy(struct sockbuf *sb, struct mbuf *m)
+{
+
+	KASSERT(m->m_flags & M_NOTREADY, ("%s: m %p !M_NOTREADY", __func__, m));
+
+	m = m->m_next;
+	while (m != NULL && !(m->m_flags & M_NOTREADY)) {
+		m->m_flags &= ~M_BLOCKED;
+		sb->sb_acc += m->m_len;
+		m = m->m_next;
+	}
+
+	sb->sb_fnrdy = m;
+}
+
+/*
+ * Adjust sockbuf state reflecting allocation of m.
+ */
+void
+sballoc(struct sockbuf *sb, struct mbuf *m)
+{
+
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	sb->sb_ccc += m->m_len;
+
+	if (sb->sb_fnrdy == NULL) {
+		if (m->m_flags & M_NOTREADY)
+			sb->sb_fnrdy = m;
+		else
+			sb->sb_acc += m->m_len;
+	} else
+		m->m_flags |= M_BLOCKED;
+
+	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
+		sb->sb_ctl += m->m_len;
+
+	sb->sb_mbcnt += MSIZE;
+	sb->sb_mcnt += 1;
+
+	if (m->m_flags & M_EXT) {
+		sb->sb_mbcnt += m->m_ext.ext_size;
+		sb->sb_ccnt += 1;
+	}
+}
+
+/*
+ * Adjust sockbuf state reflecting freeing of m.
+ */
+void
+sbfree(struct sockbuf *sb, struct mbuf *m)
+{
+
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	sb->sb_ccc -= m->m_len;
+
+	if (!(m->m_flags & M_NOTAVAIL))
+		sb->sb_acc -= m->m_len;
+
+	if (sb->sb_fnrdy == m)
+		sb_shift_nrdy(sb, m);
+
+	if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
+		sb->sb_ctl -= m->m_len;
+
+	sb->sb_mbcnt -= MSIZE;
+	sb->sb_mcnt -= 1;
+	if (m->m_flags & M_EXT) {
+		sb->sb_mbcnt -= m->m_ext.ext_size;
+		sb->sb_ccnt -= 1;
+	}
+
+	if (sb->sb_sndptr == m) {
+		sb->sb_sndptr = NULL;
+		sb->sb_sndptroff = 0;
+	}
+	if (sb->sb_sndptroff != 0)
+		sb->sb_sndptroff -= m->m_len;
+}
+
+/*
+ * Trim some amount of data from (first?) mbuf in buffer.
+ */
+void
+sbmtrim(struct sockbuf *sb, struct mbuf *m, int len)
+{
+
+	SOCKBUF_LOCK_ASSERT(sb);
+	KASSERT(len < m->m_len, ("%s: m %p len %d", __func__, m, len));
+
+	m->m_data += len;
+	m->m_len -= len;
+	sb->sb_acc -= len;
+	sb->sb_ccc -= len;
+}
+
 /*
  * Socantsendmore indicates that no more data will be sent on the socket; it
  * would normally be applied to a socket when the user informs the system
@@ -127,7 +225,7 @@ sbwait(struct sockbuf *sb)
 	SOCKBUF_LOCK_ASSERT(sb);
 
 	sb->sb_flags |= SB_WAIT;
-	return (msleep_sbt(&sb->sb_cc, &sb->sb_mtx,
+	return (msleep_sbt(&sb->sb_acc, &sb->sb_mtx,
 	    (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, "sbwait",
 	    sb->sb_timeo, 0, 0));
 }
@@ -184,7 +282,7 @@ sowakeup(struct socket *so, struct sockb
 		sb->sb_flags &= ~SB_SEL;
 	if (sb->sb_flags & SB_WAIT) {
 		sb->sb_flags &= ~SB_WAIT;
-		wakeup(&sb->sb_cc);
+		wakeup(&sb->sb_acc);
 	}
 	KNOTE_LOCKED(&sb->sb_sel.si_note, 0);
 	if (sb->sb_upcall != NULL) {
@@ -519,7 +617,7 @@ sbappend(struct sockbuf *sb, struct mbuf
  * that is, a stream protocol (such as TCP).
  */
 void
-sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
+sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 	SOCKBUF_LOCK_ASSERT(sb);
 
@@ -529,8 +627,8 @@ sbappendstream_locked(struct sockbuf *sb
 	SBLASTMBUFCHK(sb);
 
 	/* Remove all packet headers and mbuf tags to get a pure data chain. */
-	m_demote(m, 1);
-	
+	m_demote(m, 1, flags & PRUS_NOTREADY ? M_NOTREADY : 0);
+
 	sbcompress(sb, m, sb->sb_mbtail);
 
 	sb->sb_lastrecord = sb->sb_mb;
@@ -543,38 +641,59 @@ sbappendstream_locked(struct sockbuf *sb
  * that is, a stream protocol (such as TCP).
  */
 void
-sbappendstream(struct sockbuf *sb, struct mbuf *m)
+sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags)
 {
 
 	SOCKBUF_LOCK(sb);
-	sbappendstream_locked(sb, m);
+	sbappendstream_locked(sb, m, flags);
 	SOCKBUF_UNLOCK(sb);
 }
 
 #ifdef SOCKBUF_DEBUG
 void
-sbcheck(struct sockbuf *sb)
+sbcheck(struct sockbuf *sb, const char *file, int line)
 {
-	struct mbuf *m;
-	struct mbuf *n = 0;
-	u_long len = 0, mbcnt = 0;
+	struct mbuf *m, *n, *fnrdy;
+	u_long acc, ccc, mbcnt;
 
 	SOCKBUF_LOCK_ASSERT(sb);
 
+	acc = ccc = mbcnt = 0;
+	fnrdy = NULL;
+
 	for (m = sb->sb_mb; m; m = n) {
 	    n = m->m_nextpkt;
 	    for (; m; m = m->m_next) {
-		len += m->m_len;
+		if ((m->m_flags & M_NOTREADY) && fnrdy == NULL) {
+			if (m != sb->sb_fnrdy) {
+				printf("sb %p: fnrdy %p != m %p\n",
+				    sb, sb->sb_fnrdy, m);
+				goto fail;
+			}
+			fnrdy = m;
+		}
+		if (fnrdy) {
+			if (!(m->m_flags & M_NOTAVAIL)) {
+				printf("sb %p: fnrdy %p, m %p is avail\n",
+				    sb, sb->sb_fnrdy, m);
+				goto fail;
+			}
+		} else
+			acc += m->m_len;
+		ccc += m->m_len;
 		mbcnt += MSIZE;
 		if (m->m_flags & M_EXT) /*XXX*/ /* pretty sure this is bogus */
 			mbcnt += m->m_ext.ext_size;
 	    }
 	}
-	if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
-		printf("cc %ld != %u || mbcnt %ld != %u\n", len, sb->sb_cc,
-		    mbcnt, sb->sb_mbcnt);
-		panic("sbcheck");
+	if (acc != sb->sb_acc || ccc != sb->sb_ccc || mbcnt != sb->sb_mbcnt) {
+		printf("acc %ld/%u ccc %ld/%u mbcnt %ld/%u\n",
+		    acc, sb->sb_acc, ccc, sb->sb_ccc, mbcnt, sb->sb_mbcnt);
+		goto fail;
 	}
+	return;
+fail:
+	panic("%s from %s:%u", __func__, file, line);
 }
 #endif
 
@@ -773,13 +892,16 @@ sbcompress(struct sockbuf *sb, struct mb
 		if (n && (n->m_flags & M_EOR) == 0 &&
 		    M_WRITABLE(n) &&
 		    ((sb->sb_flags & SB_NOCOALESCE) == 0) &&
+		    !(m->m_flags & M_NOTREADY) &&
 		    m->m_len <= MCLBYTES / 4 && /* XXX: Don't copy too much */
 		    m->m_len <= M_TRAILINGSPACE(n) &&
 		    n->m_type == m->m_type) {
 			bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
 			    (unsigned)m->m_len);
 			n->m_len += m->m_len;
-			sb->sb_cc += m->m_len;
+			sb->sb_ccc += m->m_len;
+			if (sb->sb_fnrdy == NULL)
+				sb->sb_acc += m->m_len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
 				/* XXX: Probably don't need.*/
 				sb->sb_ctl += m->m_len;
@@ -816,13 +938,13 @@ sbflush_internal(struct sockbuf *sb)
 		 * Don't call sbdrop(sb, 0) if the leading mbuf is non-empty:
 		 * we would loop forever. Panic instead.
 		 */
-		if (!sb->sb_cc && (sb->sb_mb == NULL || sb->sb_mb->m_len))
+		if (sb->sb_ccc > 0 && (sb->sb_mb == NULL || sb->sb_mb->m_len))
 			break;
-		m_freem(sbcut_internal(sb, (int)sb->sb_cc));
+		m_freem(sbcut_internal(sb, (int)sb->sb_ccc));
 	}
-	if (sb->sb_cc || sb->sb_mb || sb->sb_mbcnt)
-		panic("sbflush_internal: cc %u || mb %p || mbcnt %u",
-		    sb->sb_cc, (void *)sb->sb_mb, sb->sb_mbcnt);
+	KASSERT(sb->sb_ccc == 0 && sb->sb_mb == 0 && sb->sb_mbcnt == 0,
+	    ("%s: ccc %u mb %p mbcnt %u", __func__,
+	    sb->sb_ccc, (void *)sb->sb_mb, sb->sb_mbcnt));
 }
 
 void
@@ -864,7 +986,9 @@ sbcut_internal(struct sockbuf *sb, int l
 		if (m->m_len > len) {
 			m->m_len -= len;
 			m->m_data += len;
-			sb->sb_cc -= len;
+			sb->sb_ccc -= len;
+			if (!(m->m_flags & M_NOTAVAIL))
+				sb->sb_acc -= len;
 			if (sb->sb_sndptroff != 0)
 				sb->sb_sndptroff -= len;
 			if (m->m_type != MT_DATA && m->m_type != MT_OOBDATA)
@@ -950,8 +1074,8 @@ sbsndptr(struct sockbuf *sb, u_int off, 
 	struct mbuf *m, *ret;
 
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
-	KASSERT(off + len <= sb->sb_cc, ("%s: beyond sb", __func__));
-	KASSERT(sb->sb_sndptroff <= sb->sb_cc, ("%s: sndptroff broken", __func__));
+	KASSERT(off + len <= sb->sb_acc, ("%s: beyond sb", __func__));
+	KASSERT(sb->sb_sndptroff <= sb->sb_acc, ("%s: sndptroff broken", __func__));
 
 	/*
 	 * Is off below stored offset? Happens on retransmits.
@@ -1064,7 +1188,7 @@ void
 sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb)
 {
 
-	xsb->sb_cc = sb->sb_cc;
+	xsb->sb_cc = sb->sb_ccc;
 	xsb->sb_hiwat = sb->sb_hiwat;
 	xsb->sb_mbcnt = sb->sb_mbcnt;
 	xsb->sb_mcnt = sb->sb_mcnt;	

Modified: projects/sendfile/sys/kern/uipc_socket.c
==============================================================================
--- projects/sendfile/sys/kern/uipc_socket.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/kern/uipc_socket.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -1459,12 +1459,12 @@ restart:
 	 *   2. MSG_DONTWAIT is not set
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
-	    so->so_rcv.sb_cc < uio->uio_resid) &&
-	    so->so_rcv.sb_cc < so->so_rcv.sb_lowat &&
+	    sbavail(&so->so_rcv) < uio->uio_resid) &&
+	    sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
-		KASSERT(m != NULL || !so->so_rcv.sb_cc,
-		    ("receive: m == %p so->so_rcv.sb_cc == %u",
-		    m, so->so_rcv.sb_cc));
+		KASSERT(m != NULL || !sbavail(&so->so_rcv),
+		    ("receive: m == %p sbavail == %u",
+		    m, sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
@@ -1747,9 +1747,7 @@ dontblock:
 						break;
 					}
 				}
-				m->m_data += len;
-				m->m_len -= len;
-				so->so_rcv.sb_cc -= len;
+				sbmtrim(&so->so_rcv, m, len);
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
@@ -1914,7 +1912,7 @@ restart:
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
-		if (sb->sb_cc > 0)
+		if (sbavail(sb) > 0)
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
@@ -1926,32 +1924,32 @@ restart:
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
-		if (sb->sb_cc > 0)
+		if (sbavail(sb) > 0)
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
-	if (sb->sb_cc == 0 &&
+	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
-	if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) &&
+	if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 	    ((sb->sb_flags & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
-	     sb->sb_cc >= sb->sb_lowat ||
-	     sb->sb_cc >= uio->uio_resid ||
-	     sb->sb_cc >= sb->sb_hiwat) ) {
+	     sbavail(sb) >= sb->sb_lowat ||
+	     sbavail(sb) >= uio->uio_resid ||
+	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
-	    (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat))
+	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 		goto deliver;
 
 	/*
@@ -1965,7 +1963,7 @@ restart:
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
-	KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__));
+	KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
@@ -1973,7 +1971,7 @@ deliver:
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
-	len = min(uio->uio_resid, sb->sb_cc);
+	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
@@ -1984,6 +1982,8 @@ deliver:
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
+				KASSERT(!(m->m_flags & M_NOTAVAIL),
+				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
@@ -2108,9 +2108,9 @@ soreceive_dgram(struct socket *so, struc
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	while ((m = so->so_rcv.sb_mb) == NULL) {
-		KASSERT(so->so_rcv.sb_cc == 0,
-		    ("soreceive_dgram: sb_mb NULL but sb_cc %u",
-		    so->so_rcv.sb_cc));
+		KASSERT(sbavail(&so->so_rcv) == 0,
+		    ("soreceive_dgram: sb_mb NULL but sbavail %u",
+		    sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
@@ -3158,7 +3158,7 @@ filt_soread(struct knote *kn, long hint)
 	so = kn->kn_fp->f_data;
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
-	kn->kn_data = so->so_rcv.sb_cc - so->so_rcv.sb_ctl;
+	kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
@@ -3168,7 +3168,7 @@ filt_soread(struct knote *kn, long hint)
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
-		return (so->so_rcv.sb_cc >= so->so_rcv.sb_lowat);
+		return (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat);
 }
 
 static void
@@ -3351,7 +3351,7 @@ soisdisconnected(struct socket *so)
 	sorwakeup_locked(so);
 	SOCKBUF_LOCK(&so->so_snd);
 	so->so_snd.sb_state |= SBS_CANTSENDMORE;
-	sbdrop_locked(&so->so_snd, so->so_snd.sb_cc);
+	sbdrop_locked(&so->so_snd, so->so_snd.sb_ccc);
 	sowwakeup_locked(so);
 	wakeup(&so->so_timeo);
 }

Modified: projects/sendfile/sys/kern/uipc_usrreq.c
==============================================================================
--- projects/sendfile/sys/kern/uipc_usrreq.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/kern/uipc_usrreq.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -792,10 +792,9 @@ uipc_rcvd(struct socket *so, int flags)
 	u_long newhiwat;
 
 	unp = sotounpcb(so);
-	KASSERT(unp != NULL, ("uipc_rcvd: unp == NULL"));
-
-	if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
-		panic("uipc_rcvd socktype %d", so->so_type);
+	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
+	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET,
+	    ("%s: socktype %d", __func__, so->so_type));
 
 	/*
 	 * Adjust backpressure on sender and wakeup any waiting to write.
@@ -809,7 +808,7 @@ uipc_rcvd(struct socket *so, int flags)
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	mbcnt = so->so_rcv.sb_mbcnt;
-	sbcc = so->so_rcv.sb_cc;
+	sbcc = sbavail(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 	UNP_PCB_LOCK(unp);
 	unp2 = unp->unp_conn;
@@ -841,7 +840,10 @@ uipc_send(struct socket *so, int flags, 
 	int error = 0;
 
 	unp = sotounpcb(so);
-	KASSERT(unp != NULL, ("uipc_send: unp == NULL"));
+	KASSERT(unp != NULL, ("%s: unp == NULL", __func__));
+	KASSERT(so->so_type == SOCK_STREAM || so->so_type == SOCK_DGRAM ||
+	    so->so_type == SOCK_SEQPACKET,
+	    ("%s: socktype %d", __func__, so->so_type));
 
 	if (flags & PRUS_OOB) {
 		error = EOPNOTSUPP;
@@ -992,7 +994,7 @@ uipc_send(struct socket *so, int flags, 
 		 */
 		mbcnt_delta = so2->so_rcv.sb_mbcnt - unp2->unp_mbcnt;
 		unp2->unp_mbcnt = so2->so_rcv.sb_mbcnt;
-		sbcc = so2->so_rcv.sb_cc;
+		sbcc = sbavail(&so2->so_rcv);
 		sorwakeup_locked(so2);
 
 		SOCKBUF_LOCK(&so->so_snd);
@@ -1008,9 +1010,6 @@ uipc_send(struct socket *so, int flags, 
 		UNP_PCB_UNLOCK(unp2);
 		m = NULL;
 		break;
-
-	default:
-		panic("uipc_send unknown socktype");
 	}
 
 	/*
@@ -1055,7 +1054,7 @@ uipc_sense(struct socket *so, struct sta
 	if ((so->so_type == SOCK_STREAM || so->so_type == SOCK_SEQPACKET) &&
 	    unp2 != NULL) {
 		so2 = unp2->unp_socket;
-		sb->st_blksize += so2->so_rcv.sb_cc;
+		sb->st_blksize += sbavail(&so2->so_rcv);
 	}
 	sb->st_dev = NODEV;
 	if (unp->unp_ino == 0)

Modified: projects/sendfile/sys/netinet/tcp_input.c
==============================================================================
--- projects/sendfile/sys/netinet/tcp_input.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/netinet/tcp_input.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -1754,7 +1754,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 					tcp_timer_activate(tp, TT_REXMT,
 						      tp->t_rxtcur);
 				sowwakeup(so);
-				if (so->so_snd.sb_cc)
+				if (sbavail(&so->so_snd))
 					(void) tcp_output(tp);
 				goto check_delack;
 			}
@@ -1862,7 +1862,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 					    newsize, so, NULL))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
-				sbappendstream_locked(&so->so_rcv, m);
+				sbappendstream_locked(&so->so_rcv, m, 0);
 			}
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
@@ -2587,7 +2587,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 					 * Otherwise we would send pure ACKs.
 					 */
 					SOCKBUF_LOCK(&so->so_snd);
-					avail = so->so_snd.sb_cc -
+					avail = sbavail(&so->so_snd) -
 					    (tp->snd_nxt - tp->snd_una);
 					SOCKBUF_UNLOCK(&so->so_snd);
 					if (avail > 0)
@@ -2722,10 +2722,10 @@ process_ACK:
 		cc_ack_received(tp, th, CC_ACK);
 
 		SOCKBUF_LOCK(&so->so_snd);
-		if (acked > so->so_snd.sb_cc) {
-			tp->snd_wnd -= so->so_snd.sb_cc;
+		if (acked > sbavail(&so->so_snd)) {
+			tp->snd_wnd -= sbavail(&so->so_snd);
 			mfree = sbcut_locked(&so->so_snd,
-			    (int)so->so_snd.sb_cc);
+			    (int)sbavail(&so->so_snd));
 			ourfinisacked = 1;
 		} else {
 			mfree = sbcut_locked(&so->so_snd, acked);
@@ -2851,7 +2851,7 @@ step6:
 		 * actually wanting to send this much urgent data.
 		 */
 		SOCKBUF_LOCK(&so->so_rcv);
-		if (th->th_urp + so->so_rcv.sb_cc > sb_max) {
+		if (th->th_urp + sbavail(&so->so_rcv) > sb_max) {
 			th->th_urp = 0;			/* XXX */
 			thflags &= ~TH_URG;		/* XXX */
 			SOCKBUF_UNLOCK(&so->so_rcv);	/* XXX */
@@ -2873,7 +2873,7 @@ step6:
 		 */
 		if (SEQ_GT(th->th_seq+th->th_urp, tp->rcv_up)) {
 			tp->rcv_up = th->th_seq + th->th_urp;
-			so->so_oobmark = so->so_rcv.sb_cc +
+			so->so_oobmark = sbavail(&so->so_rcv) +
 			    (tp->rcv_up - tp->rcv_nxt) - 1;
 			if (so->so_oobmark == 0)
 				so->so_rcv.sb_state |= SBS_RCVATMARK;
@@ -2944,7 +2944,7 @@ dodata:							/* XXX */
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				m_freem(m);
 			else
-				sbappendstream_locked(&so->so_rcv, m);
+				sbappendstream_locked(&so->so_rcv, m, 0);
 			/* NB: sorwakeup_locked() does an implicit unlock. */
 			sorwakeup_locked(so);
 		} else {

Modified: projects/sendfile/sys/netinet/tcp_output.c
==============================================================================
--- projects/sendfile/sys/netinet/tcp_output.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/netinet/tcp_output.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -322,7 +322,7 @@ after_sack_rexmit:
 			 * to send then the probe will be the FIN
 			 * itself.
 			 */
-			if (off < so->so_snd.sb_cc)
+			if (off < sbavail(&so->so_snd))
 				flags &= ~TH_FIN;
 			sendwin = 1;
 		} else {
@@ -348,7 +348,8 @@ after_sack_rexmit:
 	 */
 	if (sack_rxmit == 0) {
 		if (sack_bytes_rxmt == 0)
-			len = ((long)ulmin(so->so_snd.sb_cc, sendwin) - off);
+			len = ((long)ulmin(sbavail(&so->so_snd), sendwin) -
+			    off);
 		else {
 			long cwin;
 
@@ -357,8 +358,8 @@ after_sack_rexmit:
 			 * sending new data, having retransmitted all the
 			 * data possible in the scoreboard.
 			 */
-			len = ((long)ulmin(so->so_snd.sb_cc, tp->snd_wnd) 
-			       - off);
+			len = ((long)ulmin(sbavail(&so->so_snd), tp->snd_wnd) -
+			    off);
 			/*
 			 * Don't remove this (len > 0) check !
 			 * We explicitly check for len > 0 here (although it 
@@ -457,12 +458,15 @@ after_sack_rexmit:
 	 * TODO: Shrink send buffer during idle periods together
 	 * with congestion window.  Requires another timer.  Has to
 	 * wait for upcoming tcp timer rewrite.
+	 *
+	 * XXXGL: should there be used sbused() or sbavail()?
 	 */
 	if (V_tcp_do_autosndbuf && so->so_snd.sb_flags & SB_AUTOSIZE) {
 		if ((tp->snd_wnd / 4 * 5) >= so->so_snd.sb_hiwat &&
-		    so->so_snd.sb_cc >= (so->so_snd.sb_hiwat / 8 * 7) &&
-		    so->so_snd.sb_cc < V_tcp_autosndbuf_max &&
-		    sendwin >= (so->so_snd.sb_cc - (tp->snd_nxt - tp->snd_una))) {
+		    sbused(&so->so_snd) >= (so->so_snd.sb_hiwat / 8 * 7) &&
+		    sbused(&so->so_snd) < V_tcp_autosndbuf_max &&
+		    sendwin >= (sbused(&so->so_snd) -
+		    (tp->snd_nxt - tp->snd_una))) {
 			if (!sbreserve_locked(&so->so_snd,
 			    min(so->so_snd.sb_hiwat + V_tcp_autosndbuf_inc,
 			     V_tcp_autosndbuf_max), so, curthread))
@@ -499,10 +503,11 @@ after_sack_rexmit:
 		tso = 1;
 
 	if (sack_rxmit) {
-		if (SEQ_LT(p->rxmit + len, tp->snd_una + so->so_snd.sb_cc))
+		if (SEQ_LT(p->rxmit + len, tp->snd_una + sbavail(&so->so_snd)))
 			flags &= ~TH_FIN;
 	} else {
-		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una + so->so_snd.sb_cc))
+		if (SEQ_LT(tp->snd_nxt + len, tp->snd_una +
+		    sbavail(&so->so_snd)))
 			flags &= ~TH_FIN;
 	}
 
@@ -532,7 +537,7 @@ after_sack_rexmit:
 		 */
 		if (!(tp->t_flags & TF_MORETOCOME) &&	/* normal case */
 		    (idle || (tp->t_flags & TF_NODELAY)) &&
-		    len + off >= so->so_snd.sb_cc &&
+		    len + off >= sbavail(&so->so_snd) &&
 		    (tp->t_flags & TF_NOPUSH) == 0) {
 			goto send;
 		}
@@ -660,7 +665,7 @@ dontupdate:
 	 * if window is nonzero, transmit what we can,
 	 * otherwise force out a byte.
 	 */
-	if (so->so_snd.sb_cc && !tcp_timer_active(tp, TT_REXMT) &&
+	if (sbavail(&so->so_snd) && !tcp_timer_active(tp, TT_REXMT) &&
 	    !tcp_timer_active(tp, TT_PERSIST)) {
 		tp->t_rxtshift = 0;
 		tcp_setpersist(tp);
@@ -786,7 +791,7 @@ send:
 			 * fractional unless the send sockbuf can
 			 * be emptied.
 			 */
-			if (sendalot && off + len < so->so_snd.sb_cc) {
+			if (sendalot && off + len < sbavail(&so->so_snd)) {
 				len -= len % (tp->t_maxopd - optlen);
 				sendalot = 1;
 			}
@@ -889,7 +894,7 @@ send:
 		 * give data to the user when a buffer fills or
 		 * a PUSH comes in.)
 		 */
-		if (off + len == so->so_snd.sb_cc)
+		if (off + len == sbavail(&so->so_snd))
 			flags |= TH_PUSH;
 		SOCKBUF_UNLOCK(&so->so_snd);
 	} else {

Modified: projects/sendfile/sys/netinet/tcp_reass.c
==============================================================================
--- projects/sendfile/sys/netinet/tcp_reass.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/netinet/tcp_reass.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -351,7 +351,7 @@ present:
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE)
 			m_freem(q->tqe_m);
 		else
-			sbappendstream_locked(&so->so_rcv, q->tqe_m);
+			sbappendstream_locked(&so->so_rcv, q->tqe_m, 0);
 		if (q != &tqs)
 			uma_zfree(V_tcp_reass_zone, q);
 		tp->t_segqlen--;

Modified: projects/sendfile/sys/netinet/tcp_usrreq.c
==============================================================================
--- projects/sendfile/sys/netinet/tcp_usrreq.c	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/netinet/tcp_usrreq.c	Tue Dec 24 14:14:05 2013	(r259821)
@@ -835,7 +835,7 @@ tcp_usr_send(struct socket *so, int flag
 		m_freem(control);	/* empty control, just free it */
 	}
 	if (!(flags & PRUS_OOB)) {
-		sbappendstream(&so->so_snd, m);
+		sbappendstream(&so->so_snd, m, flags);
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
 			 * Do implied connect if not yet connected,
@@ -867,7 +867,8 @@ tcp_usr_send(struct socket *so, int flag
 			socantsendmore(so);
 			tcp_usrclosed(tp);
 		}
-		if (!(inp->inp_flags & INP_DROPPED)) {
+		if (!(inp->inp_flags & INP_DROPPED) &&
+		    !(flags & PRUS_NOTREADY)) {
 			if (flags & PRUS_MORETOCOME)
 				tp->t_flags |= TF_MORETOCOME;
 			error = tcp_output(tp);
@@ -893,7 +894,7 @@ tcp_usr_send(struct socket *so, int flag
 		 * of data past the urgent section.
 		 * Otherwise, snd_up should be one lower.
 		 */
-		sbappendstream_locked(&so->so_snd, m);
+		sbappendstream_locked(&so->so_snd, m, flags);
 		SOCKBUF_UNLOCK(&so->so_snd);
 		if (nam && tp->t_state < TCPS_SYN_SENT) {
 			/*
@@ -917,10 +918,12 @@ tcp_usr_send(struct socket *so, int flag
 			tp->snd_wnd = TTCP_CLIENT_SND_WND;
 			tcp_mss(tp, -1);
 		}
-		tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
-		tp->t_flags |= TF_FORCEDATA;
-		error = tcp_output(tp);
-		tp->t_flags &= ~TF_FORCEDATA;
+		tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
+		if (!(flags & PRUS_NOTREADY)) {
+			tp->t_flags |= TF_FORCEDATA;
+			error = tcp_output(tp);
+			tp->t_flags &= ~TF_FORCEDATA;
+		}
 	}
 out:
 	TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :

Modified: projects/sendfile/sys/sys/mbuf.h
==============================================================================
--- projects/sendfile/sys/sys/mbuf.h	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/sys/mbuf.h	Tue Dec 24 14:14:05 2013	(r259821)
@@ -929,7 +929,7 @@ struct mbuf	*m_copypacket(struct mbuf *,
 void		 m_copy_pkthdr(struct mbuf *, struct mbuf *);
 struct mbuf	*m_copyup(struct mbuf *, int, int);
 struct mbuf	*m_defrag(struct mbuf *, int);
-void		 m_demote(struct mbuf *, int);
+void		 m_demote(struct mbuf *, int, int);
 struct mbuf	*m_devget(char *, int, int, struct ifnet *,
 		    void (*)(char *, caddr_t, u_int));
 struct mbuf	*m_dup(struct mbuf *, int);

Modified: projects/sendfile/sys/sys/protosw.h
==============================================================================
--- projects/sendfile/sys/sys/protosw.h	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/sys/protosw.h	Tue Dec 24 14:14:05 2013	(r259821)
@@ -209,6 +209,7 @@ struct pr_usrreqs {
 #define	PRUS_OOB	0x1
 #define	PRUS_EOF	0x2
 #define	PRUS_MORETOCOME	0x4
+#define	PRUS_NOTREADY	0x8
 	int	(*pru_sense)(struct socket *so, struct stat *sb);
 	int	(*pru_shutdown)(struct socket *so);
 	int	(*pru_flush)(struct socket *so, int direction);

Modified: projects/sendfile/sys/sys/sockbuf.h
==============================================================================
--- projects/sendfile/sys/sys/sockbuf.h	Tue Dec 24 13:46:54 2013	(r259820)
+++ projects/sendfile/sys/sys/sockbuf.h	Tue Dec 24 14:14:05 2013	(r259821)
@@ -88,8 +88,13 @@ struct	sockbuf {
 	struct	mbuf *sb_lastrecord;	/* (c/d) first mbuf of last
 					 * record in socket buffer */
 	struct	mbuf *sb_sndptr; /* (c/d) pointer into mbuf chain */
+	struct	mbuf *sb_fnrdy;	/* (c/d) pointer to first not ready buffer */
+#if 0
+	struct	mbuf *sb_lnrdy;	/* (c/d) pointer to last not ready buffer */
+#endif
 	u_int	sb_sndptroff;	/* (c/d) byte offset of ptr into chain */
-	u_int	sb_cc;		/* (c/d) actual chars in buffer */
+	u_int	sb_acc;		/* (c/d) available chars in buffer */
+	u_int	sb_ccc;		/* (c/d) claimed chars in buffer */
 	u_int	sb_hiwat;	/* (c/d) max actual char count */
 	u_int	sb_mbcnt;	/* (c/d) chars of mbufs used */
 	u_int   sb_mcnt;        /* (c/d) number of mbufs in buffer */
@@ -119,10 +124,17 @@ struct	sockbuf {
 #define	SOCKBUF_LOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_OWNED)
 #define	SOCKBUF_UNLOCK_ASSERT(_sb)	mtx_assert(SOCKBUF_MTX(_sb), MA_NOTOWNED)
 
+/*
+ * Socket buffer private mbuf(9) flags.
+ */
+#define	M_NOTREADY	M_PROTO1	/* m_data not populated yet */
+#define	M_BLOCKED	M_PROTO2	/* M_NOTREADY in front of m */
+#define	M_NOTAVAIL	(M_NOTREADY | M_BLOCKED)
+
 void	sbappend(struct sockbuf *sb, struct mbuf *m);
 void	sbappend_locked(struct sockbuf *sb, struct mbuf *m);
-void	sbappendstream(struct sockbuf *sb, struct mbuf *m);
-void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m);
+void	sbappendstream(struct sockbuf *sb, struct mbuf *m, int flags);
+void	sbappendstream_locked(struct sockbuf *sb, struct mbuf *m, int flags);
 int	sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
 	    struct mbuf *m0, struct mbuf *control);
 int	sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa,
@@ -133,7 +145,6 @@ int	sbappendcontrol_locked(struct sockbu
 	    struct mbuf *control);
 void	sbappendrecord(struct sockbuf *sb, struct mbuf *m0);
 void	sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0);
-void	sbcheck(struct sockbuf *sb);
 void	sbcompress(struct sockbuf *sb, struct mbuf *m, struct mbuf *n);
 struct mbuf *
 	sbcreatecontrol(caddr_t p, int size, int type, int level);
@@ -159,47 +170,49 @@ void	sbtoxsockbuf(struct sockbuf *sb, st
 int	sbwait(struct sockbuf *sb);
 int	sblock(struct sockbuf *sb, int flags);
 void	sbunlock(struct sockbuf *sb);
+void	sballoc(struct sockbuf *, struct mbuf *);
+void	sbfree(struct sockbuf *, struct mbuf *);
+void	sbmtrim(struct sockbuf *, struct mbuf *, int);
+
+static inline u_int
+sbavail(struct sockbuf *sb)
+{
+
+#if 0
+	SOCKBUF_LOCK_ASSERT(sb);
+#endif
+	return (sb->sb_acc);
+}
+
+static inline u_int
+sbused(struct sockbuf *sb)
+{
+
+#if 0
+	SOCKBUF_LOCK_ASSERT(sb);
+#endif
+	return (sb->sb_ccc);
+}
 
 /*
  * How much space is there in a socket buffer (so->so_snd or so->so_rcv)?
  * This is problematical if the fields are unsigned, as the space might
- * still be negative (cc > hiwat or mbcnt > mbmax).  Should detect
- * overflow and return 0.  Should use "lmin" but it doesn't exist now.
+ * still be negative (ccc > hiwat or mbcnt > mbmax).
  */
-#define	sbspace(sb) \
-    ((long) imin((int)((sb)->sb_hiwat - (sb)->sb_cc), \
-	 (int)((sb)->sb_mbmax - (sb)->sb_mbcnt)))
-
-/* adjust counters in sb reflecting allocation of m */
-#define	sballoc(sb, m) { \
-	(sb)->sb_cc += (m)->m_len; \
-	if ((m)->m_type != MT_DATA && (m)->m_type != MT_OOBDATA) \
-		(sb)->sb_ctl += (m)->m_len; \
-	(sb)->sb_mbcnt += MSIZE; \
-	(sb)->sb_mcnt += 1; \
-	if ((m)->m_flags & M_EXT) { \
-		(sb)->sb_mbcnt += (m)->m_ext.ext_size; \
-		(sb)->sb_ccnt += 1; \
-	} \
-}
+static inline int
+sbspace(struct sockbuf *sb)
+{
+	int cc, mbc;
+
+#if 0
+	SOCKBUF_LOCK_ASSERT(sb);
+#endif
+	cc = sb->sb_hiwat - sb->sb_ccc;
+	mbc = sb->sb_mbmax - sb->sb_mbcnt;
+	if (cc < 0 || mbc < 0)
+		return (0);
 
-/* adjust counters in sb reflecting freeing of m */

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201312241414.rBOEE5Oq017066>