Date: Mon, 26 Oct 2009 23:39:07 +0000 (UTC) From: Lawrence Stewart <lstewart@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r198504 - projects/tcp_ffcaia2008_head/sys/netinet Message-ID: <200910262339.n9QNd7Rc013704@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: lstewart Date: Mon Oct 26 23:39:07 2009 New Revision: 198504 URL: http://svn.freebsd.org/changeset/base/198504 Log: WIP checkpoint commit for reassembly queue autotuning and related cleanup. More cleanup and testing required. Sponsored by: FreeBSD Foundation Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c ============================================================================== --- projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c Mon Oct 26 23:24:59 2009 (r198503) +++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_input.c Mon Oct 26 23:39:07 2009 (r198504) @@ -1461,10 +1461,23 @@ tcp_do_segment(struct mbuf *m, struct tc * Set new socket buffer size. * Give up when limit is reached. */ - if (newsize) + if (newsize) { if (!sbreserve_locked(&so->so_rcv, newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; + else { + /* + * Scale reassembly queue to 8/7 + * the socket buffer size to + * allow a little wiggle room. + */ + tp->t_segq.tsegq_maxbytes = + (newsize << 3) / 7; + tp->t_segq.tsegq_maxmbufs = + tp->t_segq.tsegq_maxbytes / + tp->t_maxseg; + } + } m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(&so->so_rcv, m); } Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c ============================================================================== --- projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c Mon Oct 26 23:24:59 2009 (r198503) +++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_reass.c Mon Oct 26 23:39:07 2009 (r198504) @@ -74,41 +74,49 @@ __FBSDID("$FreeBSD$"); #include <netinet/tcp_debug.h> #endif /* TCPDEBUG */ -static VNET_DEFINE(int, tcp_reass_maxseg); -VNET_DEFINE(int, tcp_reass_qsize); -static VNET_DEFINE(int, tcp_reass_maxqlen); +#include <machine/atomic.h> + +static VNET_DEFINE(int, tcp_reass_maxmbufs); +static VNET_DEFINE(int, tcp_reass_maxbytes); +VNET_DEFINE(int, tcp_reass_curmbufs); +VNET_DEFINE(int, tcp_reass_curbytes); static VNET_DEFINE(int, tcp_reass_overflows); -#define V_tcp_reass_maxseg VNET(tcp_reass_maxseg) -#define V_tcp_reass_maxqlen VNET(tcp_reass_maxqlen) +#define V_tcp_reass_maxmbufs VNET(tcp_reass_maxmbufs) +#define V_tcp_reass_maxbytes VNET(tcp_reass_maxbytes) #define V_tcp_reass_overflows VNET(tcp_reass_overflows) SYSCTL_NODE(_net_inet_tcp, OID_AUTO, reass, CTLFLAG_RW, 0, "TCP Segment Reassembly Queue"); -SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxsegments, CTLFLAG_RDTUN, - &VNET_NAME(tcp_reass_maxseg), 0, - "Global maximum number of TCP Segments in Reassembly Queue"); - -SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, cursegments, CTLFLAG_RD, - &VNET_NAME(tcp_reass_qsize), 0, - "Global number of TCP Segments currently in Reassembly Queue"); - -SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxqlen, CTLFLAG_RW, - &VNET_NAME(tcp_reass_maxqlen), 0, - "Maximum number of TCP Segments per individual Reassembly Queue"); +SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxmbufs, CTLFLAG_RD, + &VNET_NAME(tcp_reass_maxmbufs), 0, + "Global maximum number of mbufs permitted across TCP reassembly queues"); + +SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, maxbytes, CTLFLAG_RD, + &VNET_NAME(tcp_reass_maxbytes), 0, + "Global maximum number of bytes permitted across TCP reassembly queues"); + +SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, curmbufs, CTLFLAG_RD, + &VNET_NAME(tcp_reass_curmbufs), 0, + "Global number of mbufs currently held in TCP reassembly queues"); + +SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, curbytes, CTLFLAG_RD, + &VNET_NAME(tcp_reass_curbytes), 0, + "Global number of bytes currently held in TCP reassembly queues"); SYSCTL_VNET_INT(_net_inet_tcp_reass, OID_AUTO, overflows, CTLFLAG_RD, &VNET_NAME(tcp_reass_overflows), 0, - "Global number of TCP Segment Reassembly Queue Overflows"); + "Global number of overflows across TCP reassembly queues"); /* Initialize TCP reassembly queue */ static void tcp_reass_zone_change(void *tag) { - V_tcp_reass_maxseg = nmbclusters / 16; - uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg); + V_tcp_reass_maxmbufs = nmbclusters / 16; + V_tcp_reass_maxbytes = V_tcp_reass_maxmbufs * 1448; + uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxmbufs); } VNET_DEFINE(uma_zone_t, tcp_reass_zone); @@ -117,19 +125,24 @@ void tcp_reass_init(void) { - V_tcp_reass_maxseg = 0; - V_tcp_reass_qsize = 0; - V_tcp_reass_maxqlen = 48; + V_tcp_reass_maxmbufs = 0; + V_tcp_reass_maxbytes = 0; + V_tcp_reass_curmbufs = 0; + V_tcp_reass_curbytes = 0; V_tcp_reass_overflows = 0; - V_tcp_reass_maxseg = nmbclusters / 16; - TUNABLE_INT_FETCH("net.inet.tcp.reass.maxsegments", - &V_tcp_reass_maxseg); + /**/ + V_tcp_reass_maxmbufs = nmbclusters / 16; + TUNABLE_INT_FETCH("net.inet.tcp.reass.maxmbufs", + &V_tcp_reass_maxmbufs); + /* 1448 bytes is the most common segment size for bulk transfer */ + V_tcp_reass_maxbytes = V_tcp_reass_maxmbufs * 1448; V_tcp_reass_zone = uma_zcreate("tcpreass", sizeof (struct tseg_qent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); - uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxseg); + uma_zone_set_max(V_tcp_reass_zone, V_tcp_reass_maxmbufs); EVENTHANDLER_REGISTER(nmbclusters_change, tcp_reass_zone_change, NULL, EVENTHANDLER_PRI_ANY); + /**/ } int @@ -141,6 +154,7 @@ tcp_reass(struct tcpcb *tp, struct tcphd struct tseg_qent *te = NULL; struct socket *so = tp->t_inpcb->inp_socket; int flags; + struct tsegq *t_segq = &tp->t_segq; INP_WLOCK_ASSERT(tp->t_inpcb); @@ -164,9 +178,11 @@ tcp_reass(struct tcpcb *tp, struct tcphd * process the missing segment. */ if (th->th_seq != tp->rcv_nxt && - (V_tcp_reass_qsize + 1 >= V_tcp_reass_maxseg || - tp->t_segqlen >= V_tcp_reass_maxqlen)) { - V_tcp_reass_overflows++; + (V_tcp_reass_curmbufs + 1 > V_tcp_reass_maxmbufs || + V_tcp_reass_curbytes + *tlenp > V_tcp_reass_maxbytes || + t_segq->tsegq_bytes + *tlenp >= t_segq->tsegq_maxbytes || + t_segq->tsegq_mbufs + 1 > t_segq->tsegq_maxmbufs)) { + atomic_add_int(&V_tcp_reass_overflows, 1); TCPSTAT_INC(tcps_rcvmemdrop); m_freem(m); *tlenp = 0; @@ -184,8 +200,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd *tlenp = 0; return (0); } - tp->t_segqlen++; - V_tcp_reass_qsize++; + t_segq->tsegq_bytes += *tlenp; + t_segq->tsegq_mbufs++; + atomic_add_int(&V_tcp_reass_curmbufs, 1); + atomic_add_int(&V_tcp_reass_curbytes, *tlenp); /* * Find a segment which begins after this one does. @@ -211,8 +229,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd TCPSTAT_ADD(tcps_rcvdupbyte, *tlenp); m_freem(m); uma_zfree(V_tcp_reass_zone, te); - tp->t_segqlen--; - V_tcp_reass_qsize--; + t_segq->tsegq_bytes -= *tlenp; + t_segq->tsegq_mbufs--; + atomic_subtract_int(&V_tcp_reass_curmbufs, 1); + atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp); /* * Try to present any queued data * at the left window edge to the user. @@ -248,8 +268,10 @@ tcp_reass(struct tcpcb *tp, struct tcphd LIST_REMOVE(q, tqe_q); m_freem(q->tqe_m); uma_zfree(V_tcp_reass_zone, q); - tp->t_segqlen--; - V_tcp_reass_qsize--; + t_segq->tsegq_bytes -= *tlenp; + t_segq->tsegq_mbufs--; + atomic_subtract_int(&V_tcp_reass_curmbufs, 1); + atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp); q = nq; } @@ -285,8 +307,10 @@ present: else sbappendstream_locked(&so->so_rcv, q->tqe_m); uma_zfree(V_tcp_reass_zone, q); - tp->t_segqlen--; - V_tcp_reass_qsize--; + t_segq->tsegq_bytes -= *tlenp; + t_segq->tsegq_mbufs--; + atomic_subtract_int(&V_tcp_reass_curmbufs, 1); + atomic_subtract_int(&V_tcp_reass_curbytes, *tlenp); q = nq; } while (q && q->tqe_th->th_seq == tp->rcv_nxt); ND6_HINT(tp); Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c ============================================================================== --- projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c Mon Oct 26 23:24:59 2009 (r198503) +++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_subr.c Mon Oct 26 23:39:07 2009 (r198504) @@ -106,6 +106,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syslog.h> #endif /*IPSEC*/ +#include <machine/atomic.h> #include <machine/in_cksum.h> #include <sys/md5.h> @@ -825,7 +826,6 @@ tcp_drop(struct tcpcb *tp, int errno) void tcp_discardcb(struct tcpcb *tp) { - struct tseg_qent *q; struct inpcb *inp = tp->t_inpcb; struct socket *so = inp->inp_socket; #ifdef INET6 @@ -903,13 +903,8 @@ tcp_discardcb(struct tcpcb *tp) } /* free the reassembly queue, if any */ - while ((q = LIST_FIRST(&tp->t_segq)) != NULL) { - LIST_REMOVE(q, tqe_q); - m_freem(q->tqe_m); - uma_zfree(V_tcp_reass_zone, q); - tp->t_segqlen--; - V_tcp_reass_qsize--; - } + TCP_REASS_FLUSH(&tp->t_segq); + /* Disconnect offload device, if any. */ tcp_offload_detach(tp); @@ -967,7 +962,6 @@ tcp_drain(void) CURVNET_SET(vnet_iter); struct inpcb *inpb; struct tcpcb *tcpb; - struct tseg_qent *te; /* * Walk the tcpbs, if existing, and flush the reassembly queue, @@ -983,14 +977,7 @@ tcp_drain(void) continue; INP_WLOCK(inpb); if ((tcpb = intotcpcb(inpb)) != NULL) { - while ((te = LIST_FIRST(&tcpb->t_segq)) - != NULL) { - LIST_REMOVE(te, tqe_q); - m_freem(te->tqe_m); - uma_zfree(V_tcp_reass_zone, te); - tcpb->t_segqlen--; - V_tcp_reass_qsize--; - } + TCP_REASS_FLUSH(&tcpb->t_segq); tcp_clean_sackreport(tcpb); } INP_WUNLOCK(inpb); Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c ============================================================================== --- projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c Mon Oct 26 23:24:59 2009 (r198503) +++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_usrreq.c Mon Oct 26 23:39:07 2009 (r198504) @@ -1452,6 +1452,9 @@ tcp_attach(struct socket *so) INP_INFO_WUNLOCK(&V_tcbinfo); return (ENOBUFS); } + + tp->t_segq.tsegq_maxbytes = (so->so_rcv.sb_hiwat << 3) / 7; + tp->t_segq.tsegq_maxmbufs = tp->t_segq.tsegq_maxbytes / tp->t_maxseg; tp->t_state = TCPS_CLOSED; INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); @@ -1749,8 +1752,8 @@ db_print_tcpcb(struct tcpcb *tp, const c indent += 2; db_print_indent(indent); - db_printf("t_segq first: %p t_segqlen: %d t_dupacks: %d\n", - LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks); + db_printf("t_segq first: %p t_dupacks: %d\n", + LIST_FIRST(&tp->t_segq), tp->t_dupacks); db_print_indent(indent); db_printf("tt_rexmt: %p tt_persist: %p tt_keep: %p\n", Modified: projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h ============================================================================== --- projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h Mon Oct 26 23:24:59 2009 (r198503) +++ projects/tcp_ffcaia2008_head/sys/netinet/tcp_var.h Mon Oct 26 23:39:07 2009 (r198504) @@ -42,10 +42,12 @@ * Kernel variables for tcp. */ VNET_DECLARE(int, tcp_do_rfc1323); -VNET_DECLARE(int, tcp_reass_qsize); VNET_DECLARE(struct uma_zone *, tcp_reass_zone); +VNET_DECLARE(int, tcp_reass_curmbufs); +VNET_DECLARE(int, tcp_reass_curbytes); #define V_tcp_do_rfc1323 VNET(tcp_do_rfc1323) -#define V_tcp_reass_qsize VNET(tcp_reass_qsize) +#define V_tcp_reass_curmbufs VNET(tcp_reass_curmbufs) +#define V_tcp_reass_curbytes VNET(tcp_reass_curbytes) #define V_tcp_reass_zone VNET(tcp_reass_zone) #endif /* _KERNEL */ @@ -57,7 +59,14 @@ struct tseg_qent { struct tcphdr *tqe_th; /* a pointer to tcp header */ struct mbuf *tqe_m; /* mbuf contains packet */ }; -LIST_HEAD(tsegqe_head, tseg_qent); + +struct tsegq { + int tsegq_mbufs; + int tsegq_bytes; + int tsegq_maxbytes; + int tsegq_maxmbufs; + struct tseg_qent *lh_first; +}; struct sackblk { tcp_seq start; /* start seq no. of sack block */ @@ -95,9 +104,8 @@ do { \ * Organized for 16 byte cacheline efficiency. */ struct tcpcb { - struct tsegqe_head t_segq; /* segment reassembly queue */ + struct tsegq t_segq; /* segment reassembly queue */ void *t_pspare[2]; /* new reassembly queue */ - int t_segqlen; /* segment reassembly queue length */ int t_dupacks; /* consecutive dup acks recd */ struct tcp_timer *t_timers; /* All the TCP timers in one struct */ @@ -356,6 +364,19 @@ struct tcptw { max((tp)->t_rttmin, (((tp)->t_srtt >> (TCP_RTT_SHIFT - TCP_DELTA_SHIFT)) \ + (tp)->t_rttvar) >> TCP_DELTA_SHIFT) +#define TCP_REASS_FLUSH(segq) do { \ + struct tseg_qent *qe; \ + while ((qe = LIST_FIRST((segq))) != NULL) { \ + LIST_REMOVE(qe, tqe_q); \ + (segq)->tsegq_mbufs--; \ + (segq)->tsegq_bytes -= qe->tqe_len; \ + atomic_subtract_int(&V_tcp_reass_curmbufs, 1); \ + atomic_subtract_int(&V_tcp_reass_curbytes, qe->tqe_len); \ + m_freem(qe->tqe_m); \ + uma_zfree(V_tcp_reass_zone, qe); \ + } \ +} while (0) + /* * TCP statistics. * Many of these should be kept per connection,
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200910262339.n9QNd7Rc013704>