Date: Tue, 19 Jun 2001 17:34:24 -0400 From: Bosko Milekic <bmilekic@technokratis.com> To: Mike Silbersack <silby@silby.com> Cc: freebsd-net@FreeBSD.ORG Subject: Re: tcp template removal / scalability patch Message-ID: <20010619173424.A8399@technokratis.com> In-Reply-To: <20010619121337.C77484-300000@achilles.silby.com>; from silby@silby.com on Tue, Jun 19, 2001 at 12:19:19PM -0500 References: <20010619121337.C77484-300000@achilles.silby.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Jun 19, 2001 at 12:19:19PM -0500, Mike Silbersack wrote: > As suggested by Terry, I've cooked up a patch which halts the use of mbufs > for storing tcp template structures. The structure was only used in two > places; tcp_output.c when sending packets, and tcp_timer.c when sending > keepalives. tcp_output now pulls the info directly from the tcpcb, while > tcp_timer creates a short-term tcp template that is destroyed after use. > > The end result is that rather than 1 mbuf being the minimum used per > connection, 0 mbufs is now the minimum. As a result, those with boxes > handling a lot of connections should see greatly reduced mbuf usage. Cool! > I've attached two patches; one for current, and one for stable. Please > review / test, _especially_ if you're using IPv6 or IPSec - while those > cases look correct, I'm not running either and haven't tested them. I've spotted some patch-related (not conceptual) things worth mentionning, so I'll do that now and give you the conceptual review a little later, hopefully before I leave. > Thanks, > > Mike "Silby" Silbersack > Only in netinet.old/: icmp_var.h.orig > Only in netinet.old/: ip_icmp.c.orig Dirty. :-) > diff -u -r netinet.old/tcp_input.c netinet/tcp_input.c > --- netinet.old/tcp_input.c Tue Jun 19 11:53:16 2001 > +++ netinet/tcp_input.c Tue Jun 19 11:53:25 2001 > @@ -1066,12 +1066,7 @@ > } > FREE(sin, M_SONAME); > } > - tp->t_template = tcp_template(tp); > - if (tp->t_template == 0) { > - tp = tcp_drop(tp, ENOBUFS); > - dropsocket = 0; /* socket is already gone */ > - goto drop; > - } > + tp->t_template = NULL; > if ((taop = tcp_gettaocache(inp)) == NULL) { > taop = &tao_noncached; > bzero(taop, sizeof(*taop)); > Only in netinet.old/: tcp_input.c.orig > Only in netinet.old/: tcp_input.c.rej > diff -u -r netinet.old/tcp_output.c netinet/tcp_output.c > --- netinet.old/tcp_output.c Tue Jun 19 11:53:16 2001 > +++ netinet/tcp_output.c Tue Jun 19 11:53:25 2001 > @@ -630,16 +630,12 @@ > m->m_len = hdrlen; > } > m->m_pkthdr.rcvif = (struct ifnet *)0; > - if (tp->t_template == 0) > - panic("tcp_output"); > + > #ifdef INET6 > if (isipv6) { > ip6 = mtod(m, struct ip6_hdr *); > th = (struct tcphdr *)(ip6 + 1); > - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, > - sizeof(struct ip6_hdr)); > - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, > - sizeof(struct tcphdr)); > + tcp_fillheaders(tp, ip6, th); > } else > #endif /* INET6 */ > { > @@ -647,10 +643,7 @@ > ipov = (struct ipovly *)ip; > th = (struct tcphdr *)(ip + 1); > /* this picks up the pseudo header (w/o the length) */ > - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, > - sizeof(struct ip)); > - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, > - sizeof(struct tcphdr)); > + tcp_fillheaders(tp, ip, th); > } > > /* > Only in netinet.old/: tcp_output.c.orig > Only in netinet.old/: tcp_seq.h.orig > diff -u -r netinet.old/tcp_subr.c netinet/tcp_subr.c > --- netinet.old/tcp_subr.c Tue Jun 19 11:53:16 2001 > +++ netinet/tcp_subr.c Tue Jun 19 11:57:56 2001 > @@ -220,32 +220,27 @@ > #undef TCP_MINPROTOHDR > } > > + Unnecessary newline. > /* > - * Create template to be used to send tcp packets on a connection. > - * Call after host entry created, allocates an mbuf and fills > - * in a skeletal tcp/ip header, minimizing the amount of work > - * necessary when the connection is used. > + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. > + * tcp_template used to store this data in mbufs, but we now recopy it out > + * of the tcpcb each time to conserve mbufs. > */ > -struct tcptemp * > -tcp_template(tp) > + Unnecessary newline. > +void > +tcp_fillheaders(tp, ip_ptr, tcp_ptr) > struct tcpcb *tp; > + void *ip_ptr; > + void *tcp_ptr; > { > - register struct inpcb *inp = tp->t_inpcb; > - register struct mbuf *m; > - register struct tcptemp *n; > + struct inpcb *inp = tp->t_inpcb; > + struct tcphdr *tcp_hdr = (struct tcphdr *)tcp_ptr; > > - if ((n = tp->t_template) == 0) { > - m = m_get(M_DONTWAIT, MT_HEADER); > - if (m == NULL) > - return (0); > - m->m_len = sizeof (struct tcptemp); > - n = mtod(m, struct tcptemp *); > - } > #ifdef INET6 > if ((inp->inp_vflag & INP_IPV6) != 0) { > - register struct ip6_hdr *ip6; > + struct ip6_hdr *ip6; Cool. > - ip6 = (struct ip6_hdr *)n->tt_ipgen; > + ip6 = (struct ip6_hdr *)ip_ptr; > ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | > (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); > ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | > @@ -254,29 +249,52 @@ > ip6->ip6_plen = sizeof(struct tcphdr); > ip6->ip6_src = inp->in6p_laddr; > ip6->ip6_dst = inp->in6p_faddr; > - n->tt_t.th_sum = 0; > + tcp_hdr->th_sum = 0; > } else > #endif > - { > - struct ip *ip = (struct ip *)n->tt_ipgen; > + { > + struct ip *ip = (struct ip *) ip_ptr; Something seems stylistically wrong here. I think the "{" brace should be on the same line as the "} else" above. > - bzero(ip, sizeof(struct ip)); /* XXX overkill? */ > + bzero(ip, sizeof(struct ip)); /* XXX overkill? */ Unnecessary whitespace. You changed the tabs separating the comment from the statement to spaces. > ip->ip_vhl = IP_VHL_BORING; > ip->ip_p = IPPROTO_TCP; > ip->ip_src = inp->inp_laddr; > ip->ip_dst = inp->inp_faddr; > - n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, > - htons(sizeof(struct tcphdr) + IPPROTO_TCP)); > - } > - n->tt_t.th_sport = inp->inp_lport; > - n->tt_t.th_dport = inp->inp_fport; > - n->tt_t.th_seq = 0; > - n->tt_t.th_ack = 0; > - n->tt_t.th_x2 = 0; > - n->tt_t.th_off = 5; > - n->tt_t.th_flags = 0; > - n->tt_t.th_win = 0; > - n->tt_t.th_urp = 0; > + tcp_hdr->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, > + htons(sizeof(struct tcphdr) + IPPROTO_TCP)); > + } > + > + tcp_hdr->th_sport = inp->inp_lport; > + tcp_hdr->th_dport = inp->inp_fport; > + tcp_hdr->th_seq = 0; > + tcp_hdr->th_ack = 0; > + tcp_hdr->th_x2 = 0; > + tcp_hdr->th_off = 5; > + tcp_hdr->th_flags = 0; > + tcp_hdr->th_win = 0; > + tcp_hdr->th_urp = 0; > +} > + > + I think one line too many. I could be wrong. > +/* > + * Create template to be used to send tcp packets on a connection. > + * Allocates an mbuf and fills in a skeletal tcp/ip header. The only > + * use for this function is in keepalives, who like to use tcp_respond. > + */ > +struct tcptemp * > +tcp_maketemplate(tp) > + struct tcpcb *tp; > +{ > + struct mbuf *m; > + struct tcptemp *n; > + > + m = m_get(M_DONTWAIT, MT_HEADER); > + if (m == NULL) > + return (0); > + m->m_len = sizeof (struct tcptemp); No space after sizeof, unless you get rid of the parantheses (style(9)). > + n = mtod(m, struct tcptemp *); > + > + tcp_fillheaders(tp, (void *)&n->tt_ipgen, (void *)&n->tt_t); > return (n); > } > > @@ -706,7 +724,7 @@ > FREE(q, M_TSEGQ); > } > if (tp->t_template) > - (void) m_free(dtom(tp->t_template)); > + panic("t_template non-null!"); Perhaps this should be an INVARIANTS-enabled KASSERT()? Is this something that can only happen due to programming error? It seems to me like it's the case here (assuming we're not dealing with a huge-assed memory corruption). > inp->inp_ppcb = NULL; > soisdisconnected(so); > #ifdef INET6 > @@ -1347,7 +1365,7 @@ > #endif /* INET6 */ > struct tcphdr *th; > > - if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) > + if (!tp || !(inp = tp->t_inpcb)) Hmmm, this is gross. can we do something like, assuming tp is a pointer: if (tp == NULL || (inp = tp->t_inpcb) == NULL) (especially now that we have enough space in the line to do it?) > return 0; > MGETHDR(m, M_DONTWAIT, MT_DATA); > if (!m) > @@ -1359,10 +1377,7 @@ > th = (struct tcphdr *)(ip6 + 1); > m->m_pkthdr.len = m->m_len = > sizeof(struct ip6_hdr) + sizeof(struct tcphdr); > - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, > - sizeof(struct ip6_hdr)); > - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, > - sizeof(struct tcphdr)); > + tcp_fillheaders(tp, ip6, th); > hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); > } else > #endif /* INET6 */ > @@ -1370,10 +1385,7 @@ > ip = mtod(m, struct ip *); > th = (struct tcphdr *)(ip + 1); > m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); > - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, > - sizeof(struct ip)); > - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, > - sizeof(struct tcphdr)); > + tcp_fillheaders(tp, ip, th); > hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); > } > > Only in netinet.old/: tcp_subr.c.orig > diff -u -r netinet.old/tcp_timer.c netinet/tcp_timer.c > --- netinet.old/tcp_timer.c Tue Jun 19 11:53:16 2001 > +++ netinet/tcp_timer.c Tue Jun 19 11:56:13 2001 > @@ -41,6 +41,7 @@ > #include <sys/param.h> > #include <sys/systm.h> > #include <sys/kernel.h> > +#include <sys/mbuf.h> > #include <sys/sysctl.h> > #include <sys/socket.h> > #include <sys/socketvar.h> > @@ -227,6 +228,7 @@ > void *xtp; > { > struct tcpcb *tp = xtp; > + struct tcptemp *t_template; > int s; > #ifdef TCPDEBUG > int ostate; > @@ -273,9 +275,14 @@ > &tp->t_template->tt_t, (struct mbuf *)NULL, > tp->rcv_nxt - 1, tp->snd_una - 1, 0); > #else > - tcp_respond(tp, tp->t_template->tt_ipgen, > - &tp->t_template->tt_t, (struct mbuf *)NULL, > - tp->rcv_nxt, tp->snd_una - 1, 0); > + > + t_template = tcp_maketemplate(tp); > + if (t_template) { > + tcp_respond(tp, t_template->tt_ipgen, > + &t_template->tt_t, (struct mbuf *)NULL, > + tp->rcv_nxt, tp->snd_una - 1, 0); > + (void) m_free(dtom(t_template)); > + } > #endif > callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); > } else > Only in netinet.old/: tcp_timer.c.orig > diff -u -r netinet.old/tcp_usrreq.c netinet/tcp_usrreq.c > --- netinet.old/tcp_usrreq.c Tue Jun 19 11:53:16 2001 > +++ netinet/tcp_usrreq.c Tue Jun 19 11:53:27 2001 > @@ -744,11 +744,7 @@ > inp->inp_fport = sin->sin_port; > in_pcbrehash(inp); > > - tp->t_template = tcp_template(tp); > - if (tp->t_template == 0) { > - in_pcbdisconnect(inp); > - return ENOBUFS; > - } > + tp->t_template = NULL; > > /* Compute window scaling to request. */ > while (tp->request_r_scale < TCP_MAX_WINSHIFT && > @@ -841,11 +837,7 @@ > inp->in6p_flowinfo = sin6->sin6_flowinfo; > in_pcbrehash(inp); > > - tp->t_template = tcp_template(tp); > - if (tp->t_template == 0) { > - in6_pcbdisconnect(inp); > - return ENOBUFS; > - } > + tp->t_template = NULL; > > /* Compute window scaling to request. */ > while (tp->request_r_scale < TCP_MAX_WINSHIFT && > Only in netinet.old/: tcp_usrreq.c.orig > Only in netinet.old/: tcp_usrreq.c.rej > diff -u -r netinet.old/tcp_var.h netinet/tcp_var.h > --- netinet.old/tcp_var.h Tue Jun 19 11:53:16 2001 > +++ netinet/tcp_var.h Tue Jun 19 11:53:27 2001 > @@ -400,7 +400,8 @@ > void tcp_setpersist __P((struct tcpcb *)); > void tcp_slowtimo __P((void)); > struct tcptemp * > - tcp_template __P((struct tcpcb *)); > + tcp_maketemplate __P((struct tcpcb *)); > +void tcp_fillheaders __P((struct tcpcb *, void *, void *)); > struct tcpcb * > tcp_timers __P((struct tcpcb *, int)); > void tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *, > Only in netinet.old/: tcp_var.h.orig > Only in netinet.old/: udp_usrreq.c.orig [snipped second posting of patch - it was posted twice] Cheers, -- Bosko Milekic bmilekic@technokratis.com To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-net" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010619173424.A8399>