Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 26 Oct 2009 23:39:07 +0000 (UTC)
From:      Lawrence Stewart <lstewart@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-projects@freebsd.org
Subject:   svn commit: r198504 - projects/tcp_ffcaia2008_head/sys/netinet
Message-ID:  <200910262339.n9QNd7Rc013704@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: lstewart
Date: Mon Oct 26 23:39:07 2009
New Revision: 198504
URL: http://svn.freebsd.org/changeset/base/198504

Log:
  WIP checkpoint commit for reassembly queue autotuning and related cleanup. More
  cleanup and testing required.
  
  Sponsored by:	FreeBSD Foundation

Modified:
  projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c
  projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c
  projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c
  projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c
  projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h

Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c	Mon Oct 26 23:24:59 2009	(r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c	Mon Oct 26 23:39:07 2009	(r198504)
@@ -1461,10 +1461,23 @@ tcp_do_segment(struct mbuf *m, struct tc
 				 * Set new socket buffer size.
 				 * Give up when limit is reached.
 				 */
-				if (newsize)
+				if (newsize) {
 					if (!sbreserve_locked(&so->so_rcv,
 					    newsize, so, NULL))
 						so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
+					else {
+						/*
+						 * Scale reassembly queue to 8/7
+						 * the socket buffer size to
+						 * allow a little wiggle room.
+						 */
+						tp->t_segq.tsegq_maxbytes =
+						    (newsize << 3) / 7;
+						tp->t_segq.tsegq_maxmbufs =
+						    tp->t_segq.tsegq_maxbytes /
+						    tp->t_maxseg;
+					}
+				}
 				m_adj(m, drop_hdrlen);	/* delayed header drop */
 				sbappendstream_locked(&so->so_rcv, m);
 			}

Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c	Mon Oct 26 23:24:59 2009	(r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c	Mon Oct 26 23:39:07 2009	(r198504)
@@ -74,41 +74,49 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_debug.h>
 #endif /* TCPDEBUG */
 
-static VNET_DEFINE(int, tcp_reass_maxseg);
-VNET_DEFINE(int, tcp_reass_qsize);
-static VNET_DEFINE(int, tcp_reass_maxqlen);
+#include <machine/atomic.h>
+
+static VNET_DEFINE(int, tcp_reass_maxmbufs);
+static VNET_DEFINE(int, tcp_reass_maxbytes);
+VNET_DEFINE(int, tcp_reass_curmbufs);
+VNET_DEFINE(int, tcp_reass_curbytes);
 static VNET_DEFINE(int, tcp_reass_overflows);
 
-#define	V_tcp_reass_maxseg		VNET(tcp_reass_maxseg)
-#define	V_tcp_reass_maxqlen		VNET(tcp_reass_maxqlen)
+#define	V_tcp_reass_maxmbufs		VNET(tcp_reass_maxmbufs)
+#define	V_tcp_reass_maxbytes		VNET(tcp_reass_maxbytes)
 #define	V_tcp_reass_overflows		VNET(tcp_reass_overflows)
 
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0,
     "TCP Segment Reassembly Queue");
 
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN,
-    &VNET_NAME(tcp_reass_maxseg), 0,
-    "Global maximum number of TCP Segments in Reassembly Queue");
-
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD,
-    &VNET_NAME(tcp_reass_qsize), 0,
-    "Global number of TCP Segments currently in Reassembly Queue");
-
-SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW,
-    &VNET_NAME(tcp_reass_maxqlen), 0,
-    "Maximum number of TCP Segments per individual Reassembly Queue");
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxmbufs, CTLFLAG_RD,
+    &VNET_NAME(tcp_reass_maxmbufs), 0,
+    "Global maximum number of mbufs permitted across TCP reassembly queues");
+
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxbytes, CTLFLAG_RD,
+    &VNET_NAME(tcp_reass_maxbytes), 0,
+    "Global maximum number of bytes permitted across TCP reassembly queues");
+
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, curmbufs, CTLFLAG_RD,
+    &VNET_NAME(tcp_reass_curmbufs), 0,
+    "Global number of mbufs currently held in TCP reassembly queues");
+
+SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, curbytes, CTLFLAG_RD,
+    &VNET_NAME(tcp_reass_curbytes), 0,
+    "Global number of bytes currently held in TCP reassembly queues");
 
 SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD,
     &VNET_NAME(tcp_reass_overflows), 0,
-    "Global number of TCP Segment Reassembly Queue Overflows");
+    "Global number of overflows across TCP reassembly queues");
 
 /* Initialize TCP reassembly queue */
 static void
 tcp_reass_zone_change(void *tag)
 {
 
-	V_tcp_reass_maxseg = nmbclusters / 16;
-	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+	V_tcp_reass_maxmbufs = nmbclusters / 16;
+	V_tcp_reass_maxbytes = V_tcp_reass_maxmbufs * 1448;
+	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxmbufs);
 }
 
 VNET_DEFINE(uma_zone_t, tcp_reass_zone);
@@ -117,19 +125,24 @@ void
 tcp_reass_init(void)
 {
 
-	V_tcp_reass_maxseg = 0;
-	V_tcp_reass_qsize = 0;
-	V_tcp_reass_maxqlen = 48;
+	V_tcp_reass_maxmbufs = 0;
+	V_tcp_reass_maxbytes = 0;
+	V_tcp_reass_curmbufs = 0;
+	V_tcp_reass_curbytes = 0;
 	V_tcp_reass_overflows = 0;
 
-	V_tcp_reass_maxseg = nmbclusters / 16;
-	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments",
-	    &V_tcp_reass_maxseg);
+	/**/
+	V_tcp_reass_maxmbufs = nmbclusters / 16;
+	TUNABLE_INT_FETCH("net.inet.tcp.reass.maxmbufs",
+	    &V_tcp_reass_maxmbufs);
+	/* 1448 bytes is the most common segment size for bulk transfer */
+	V_tcp_reass_maxbytes = V_tcp_reass_maxmbufs * 1448;
 	V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
-	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg);
+	uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxmbufs);
 	EVENTHANDLER_REGISTER(nmbclusters_change,
 	    tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY);
+	/**/
 }
 
 int
@@ -141,6 +154,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 	struct tseg_qent *te = NULL;
 	struct socket *so = tp->t_inpcb->inp_socket;
 	int flags;
+	struct tsegq *t_segq = &tp->t_segq;
 
 	INP_WLOCK_ASSERT(tp->t_inpcb);
 
@@ -164,9 +178,11 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 	 * process the missing segment.
 	 */
 	if (th->th_seq != tp->rcv_nxt &&
-	    (V_tcp_reass_qsize + 1 >= V_tcp_reass_maxseg ||
-	     tp->t_segqlen >= V_tcp_reass_maxqlen)) {
-		V_tcp_reass_overflows++;
+	    (V_tcp_reass_curmbufs + 1 > V_tcp_reass_maxmbufs ||
+	    V_tcp_reass_curbytes + *tlenp > V_tcp_reass_maxbytes ||
+	    t_segq->tsegq_bytes + *tlenp >= t_segq->tsegq_maxbytes ||
+	    t_segq->tsegq_mbufs + 1 > t_segq->tsegq_maxmbufs)) {
+		atomic_add_int(&V_tcp_reass_overflows, 1);
 		TCPSTAT_INC(tcps_rcvmemdrop);
 		m_freem(m);
 		*tlenp = 0;
@@ -184,8 +200,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 		*tlenp = 0;
 		return (0);
 	}
-	tp->t_segqlen++;
-	V_tcp_reass_qsize++;
+	t_segq->tsegq_bytes += *tlenp;
+	t_segq->tsegq_mbufs++;
+	atomic_add_int(&V_tcp_reass_curmbufs, 1);
+	atomic_add_int(&V_tcp_reass_curbytes, *tlenp);
 
 	/*
 	 * Find a segment which begins after this one does.
@@ -211,8 +229,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 				TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp);
 				m_freem(m);
 				uma_zfree(V_tcp_reass_zone, te);
-				tp->t_segqlen--;
-				V_tcp_reass_qsize--;
+				t_segq->tsegq_bytes -= *tlenp;
+				t_segq->tsegq_mbufs--;
+				atomic_subtract_int(&V_tcp_reass_curmbufs, 1);
+				atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp);
 				/*
 				 * Try to present any queued data
 				 * at the left window edge to the user.
@@ -248,8 +268,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd
 		LIST_REMOVE(q, tqe_q);
 		m_freem(q->tqe_m);
 		uma_zfree(V_tcp_reass_zone, q);
-		tp->t_segqlen--;
-		V_tcp_reass_qsize--;
+		t_segq->tsegq_bytes -= *tlenp;
+		t_segq->tsegq_mbufs--;
+		atomic_subtract_int(&V_tcp_reass_curmbufs, 1);
+		atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp);
 		q = nq;
 	}
 
@@ -285,8 +307,10 @@ present:
 		else
 			sbappendstream_locked(&so->so_rcv, q->tqe_m);
 		uma_zfree(V_tcp_reass_zone, q);
-		tp->t_segqlen--;
-		V_tcp_reass_qsize--;
+		t_segq->tsegq_bytes -= *tlenp;
+		t_segq->tsegq_mbufs--;
+		atomic_subtract_int(&V_tcp_reass_curmbufs, 1);
+		atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp);
 		q = nq;
 	} while (q && q->tqe_th->th_seq == tp->rcv_nxt);
 	ND6_HINT(tp);

Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c	Mon Oct 26 23:24:59 2009	(r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c	Mon Oct 26 23:39:07 2009	(r198504)
@@ -106,6 +106,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/syslog.h>
 #endif /*IPSEC*/
 
+#include <machine/atomic.h>
 #include <machine/in_cksum.h>
 #include <sys/md5.h>
 
@@ -825,7 +826,6 @@ tcp_drop(struct tcpcb *tp, int errno)
 void
 tcp_discardcb(struct tcpcb *tp)
 {
-	struct tseg_qent *q;
 	struct inpcb *inp = tp->t_inpcb;
 	struct socket *so = inp->inp_socket;
 #ifdef INET6
@@ -903,13 +903,8 @@ tcp_discardcb(struct tcpcb *tp)
 	}
 
 	/* free the reassembly queue, if any */
-	while ((q = LIST_FIRST(&tp->t_segq)) != NULL) {
-		LIST_REMOVE(q, tqe_q);
-		m_freem(q->tqe_m);
-		uma_zfree(V_tcp_reass_zone, q);
-		tp->t_segqlen--;
-		V_tcp_reass_qsize--;
-	}
+	TCP_REASS_FLUSH(&tp->t_segq);
+
 	/* Disconnect offload device, if any. */
 	tcp_offload_detach(tp);
 		
@@ -967,7 +962,6 @@ tcp_drain(void)
 		CURVNET_SET(vnet_iter);
 		struct inpcb *inpb;
 		struct tcpcb *tcpb;
-		struct tseg_qent *te;
 
 	/*
 	 * Walk the tcpbs, if existing, and flush the reassembly queue,
@@ -983,14 +977,7 @@ tcp_drain(void)
 				continue;
 			INP_WLOCK(inpb);
 			if ((tcpb = intotcpcb(inpb)) != NULL) {
-				while ((te = LIST_FIRST(&tcpb->t_segq))
-			            != NULL) {
-					LIST_REMOVE(te, tqe_q);
-					m_freem(te->tqe_m);
-					uma_zfree(V_tcp_reass_zone, te);
-					tcpb->t_segqlen--;
-					V_tcp_reass_qsize--;
-				}
+				TCP_REASS_FLUSH(&tcpb->t_segq);
 				tcp_clean_sackreport(tcpb);
 			}
 			INP_WUNLOCK(inpb);

Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c	Mon Oct 26 23:24:59 2009	(r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c	Mon Oct 26 23:39:07 2009	(r198504)
@@ -1452,6 +1452,9 @@ tcp_attach(struct socket *so)
 		INP_INFO_WUNLOCK(&V_tcbinfo);
 		return (ENOBUFS);
 	}
+
+	tp->t_segq.tsegq_maxbytes = (so->so_rcv.sb_hiwat << 3) / 7;
+	tp->t_segq.tsegq_maxmbufs  = tp->t_segq.tsegq_maxbytes / tp->t_maxseg;
 	tp->t_state = TCPS_CLOSED;
 	INP_WUNLOCK(inp);
 	INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1749,8 +1752,8 @@ db_print_tcpcb(struct tcpcb *tp, const c
 	indent += 2;
 
 	db_print_indent(indent);
-	db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
-	   LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
+	db_printf("t_segq first: %p   t_dupacks: %d\n",
+	   LIST_FIRST(&tp->t_segq), tp->t_dupacks);
 
 	db_print_indent(indent);
 	db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",

Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h
==============================================================================
--- projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h	Mon Oct 26 23:24:59 2009	(r198503)
+++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h	Mon Oct 26 23:39:07 2009	(r198504)
@@ -42,10 +42,12 @@
  * Kernel variables for tcp.
  */
 VNET_DECLARE(int, tcp_do_rfc1323);
-VNET_DECLARE(int, tcp_reass_qsize);
 VNET_DECLARE(struct uma_zone *, tcp_reass_zone);
+VNET_DECLARE(int, tcp_reass_curmbufs);
+VNET_DECLARE(int, tcp_reass_curbytes);
 #define	V_tcp_do_rfc1323	VNET(tcp_do_rfc1323)
-#define	V_tcp_reass_qsize	VNET(tcp_reass_qsize)
+#define	V_tcp_reass_curmbufs	VNET(tcp_reass_curmbufs)
+#define	V_tcp_reass_curbytes	VNET(tcp_reass_curbytes)
 #define	V_tcp_reass_zone	VNET(tcp_reass_zone)
 
 #endif /* _KERNEL */
@@ -57,7 +59,14 @@ struct tseg_qent {
 	struct	tcphdr *tqe_th;		/* a pointer to tcp header */
 	struct	mbuf	*tqe_m;		/* mbuf contains packet */
 };
-LIST_HEAD(tsegqe_head, tseg_qent);
+
+struct tsegq {
+	int tsegq_mbufs;
+	int tsegq_bytes;
+	int tsegq_maxbytes;
+	int tsegq_maxmbufs;
+	struct tseg_qent *lh_first;
+};
 
 struct sackblk {
 	tcp_seq start;		/* start seq no. of sack block */
@@ -95,9 +104,8 @@ do {								\
  * Organized for 16 byte cacheline efficiency.
  */
 struct tcpcb {
-	struct	tsegqe_head t_segq;	/* segment reassembly queue */
+	struct	tsegq t_segq;		/* segment reassembly queue */
 	void	*t_pspare[2];		/* new reassembly queue */
-	int	t_segqlen;		/* segment reassembly queue length */
 	int	t_dupacks;		/* consecutive dup acks recd */
 
 	struct tcp_timer *t_timers;	/* All the TCP timers in one struct */
@@ -356,6 +364,19 @@ struct tcptw {
 	max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT))  \
 	  + (tp)->t_rttvar) >> TCP_DELTA_SHIFT)
 
+#define	TCP_REASS_FLUSH(segq) do { \
+	struct tseg_qent *qe; \
+	while ((qe = LIST_FIRST((segq))) != NULL) { \
+		LIST_REMOVE(qe, tqe_q); \
+		(segq)->tsegq_mbufs--; \
+		(segq)->tsegq_bytes -= qe->tqe_len; \
+		atomic_subtract_int(&V_tcp_reass_curmbufs, 1); \
+		atomic_subtract_int(&V_tcp_reass_curbytes, qe->tqe_len); \
+		m_freem(qe->tqe_m); \
+		uma_zfree(V_tcp_reass_zone, qe); \
+	} \
+} while (0)
+
 /*
  * TCP statistics.
  * Many of these should be kept per connection,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200910262339.n9QNd7Rc013704>