Date: Tue, 19 Jun 2001 17:34:24 -0400 From: Bosko Milekic <bmilekic@technokratis.com> To: Mike Silbersack <silby@silby.com> Cc: freebsd-net@FreeBSD.ORG Subject: Re: tcp template removal / scalability patch Message-ID: <20010619173424.A8399@technokratis.com> In-Reply-To: <20010619121337.C77484-300000@achilles.silby.com>; from silby@silby.com on Tue, Jun 19, 2001 at 12:19:19PM -0500 References: <20010619121337.C77484-300000@achilles.silby.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Jun 19, 2001 at 12:19:19PM -0500, Mike Silbersack wrote:
> As suggested by Terry, I've cooked up a patch which halts the use of mbufs
> for storing tcp template structures. The structure was only used in two
> places; tcp_output.c when sending packets, and tcp_timer.c when sending
> keepalives. tcp_output now pulls the info directly from the tcpcb, while
> tcp_timer creates a short-term tcp template that is destroyed after use.
>
> The end result is that rather than 1 mbuf being the minimum used per
> connection, 0 mbufs is now the minimum. As a result, those with boxes
> handling a lot of connections should see greatly reduced mbuf usage.
Cool!
> I've attached two patches; one for current, and one for stable. Please
> review / test, _especially_ if you're using IPv6 or IPSec - while those
> cases look correct, I'm not running either and haven't tested them.
I've spotted some patch-related (not conceptual) things worth
mentionning, so I'll do that now and give you the conceptual review a little
later, hopefully before I leave.
> Thanks,
>
> Mike "Silby" Silbersack
> Only in netinet.old/: icmp_var.h.orig
> Only in netinet.old/: ip_icmp.c.orig
Dirty. :-)
> diff -u -r netinet.old/tcp_input.c netinet/tcp_input.c
> --- netinet.old/tcp_input.c Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_input.c Tue Jun 19 11:53:25 2001
> @@ -1066,12 +1066,7 @@
> }
> FREE(sin, M_SONAME);
> }
> - tp->t_template = tcp_template(tp);
> - if (tp->t_template == 0) {
> - tp = tcp_drop(tp, ENOBUFS);
> - dropsocket = 0; /* socket is already gone */
> - goto drop;
> - }
> + tp->t_template = NULL;
> if ((taop = tcp_gettaocache(inp)) == NULL) {
> taop = &tao_noncached;
> bzero(taop, sizeof(*taop));
> Only in netinet.old/: tcp_input.c.orig
> Only in netinet.old/: tcp_input.c.rej
> diff -u -r netinet.old/tcp_output.c netinet/tcp_output.c
> --- netinet.old/tcp_output.c Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_output.c Tue Jun 19 11:53:25 2001
> @@ -630,16 +630,12 @@
> m->m_len = hdrlen;
> }
> m->m_pkthdr.rcvif = (struct ifnet *)0;
> - if (tp->t_template == 0)
> - panic("tcp_output");
> +
> #ifdef INET6
> if (isipv6) {
> ip6 = mtod(m, struct ip6_hdr *);
> th = (struct tcphdr *)(ip6 + 1);
> - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
> - sizeof(struct ip6_hdr));
> - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> - sizeof(struct tcphdr));
> + tcp_fillheaders(tp, ip6, th);
> } else
> #endif /* INET6 */
> {
> @@ -647,10 +643,7 @@
> ipov = (struct ipovly *)ip;
> th = (struct tcphdr *)(ip + 1);
> /* this picks up the pseudo header (w/o the length) */
> - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
> - sizeof(struct ip));
> - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> - sizeof(struct tcphdr));
> + tcp_fillheaders(tp, ip, th);
> }
>
> /*
> Only in netinet.old/: tcp_output.c.orig
> Only in netinet.old/: tcp_seq.h.orig
> diff -u -r netinet.old/tcp_subr.c netinet/tcp_subr.c
> --- netinet.old/tcp_subr.c Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_subr.c Tue Jun 19 11:57:56 2001
> @@ -220,32 +220,27 @@
> #undef TCP_MINPROTOHDR
> }
>
> +
Unnecessary newline.
> /*
> - * Create template to be used to send tcp packets on a connection.
> - * Call after host entry created, allocates an mbuf and fills
> - * in a skeletal tcp/ip header, minimizing the amount of work
> - * necessary when the connection is used.
> + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
> + * tcp_template used to store this data in mbufs, but we now recopy it out
> + * of the tcpcb each time to conserve mbufs.
> */
> -struct tcptemp *
> -tcp_template(tp)
> +
Unnecessary newline.
> +void
> +tcp_fillheaders(tp, ip_ptr, tcp_ptr)
> struct tcpcb *tp;
> + void *ip_ptr;
> + void *tcp_ptr;
> {
> - register struct inpcb *inp = tp->t_inpcb;
> - register struct mbuf *m;
> - register struct tcptemp *n;
> + struct inpcb *inp = tp->t_inpcb;
> + struct tcphdr *tcp_hdr = (struct tcphdr *)tcp_ptr;
>
> - if ((n = tp->t_template) == 0) {
> - m = m_get(M_DONTWAIT, MT_HEADER);
> - if (m == NULL)
> - return (0);
> - m->m_len = sizeof (struct tcptemp);
> - n = mtod(m, struct tcptemp *);
> - }
> #ifdef INET6
> if ((inp->inp_vflag & INP_IPV6) != 0) {
> - register struct ip6_hdr *ip6;
> + struct ip6_hdr *ip6;
Cool.
> - ip6 = (struct ip6_hdr *)n->tt_ipgen;
> + ip6 = (struct ip6_hdr *)ip_ptr;
> ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
> (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
> ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
> @@ -254,29 +249,52 @@
> ip6->ip6_plen = sizeof(struct tcphdr);
> ip6->ip6_src = inp->in6p_laddr;
> ip6->ip6_dst = inp->in6p_faddr;
> - n->tt_t.th_sum = 0;
> + tcp_hdr->th_sum = 0;
> } else
> #endif
> - {
> - struct ip *ip = (struct ip *)n->tt_ipgen;
> + {
> + struct ip *ip = (struct ip *) ip_ptr;
Something seems stylistically wrong here. I think the "{" brace should
be on the same line as the "} else" above.
> - bzero(ip, sizeof(struct ip)); /* XXX overkill? */
> + bzero(ip, sizeof(struct ip)); /* XXX overkill? */
Unnecessary whitespace. You changed the tabs separating the comment
from the statement to spaces.
> ip->ip_vhl = IP_VHL_BORING;
> ip->ip_p = IPPROTO_TCP;
> ip->ip_src = inp->inp_laddr;
> ip->ip_dst = inp->inp_faddr;
> - n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
> - htons(sizeof(struct tcphdr) + IPPROTO_TCP));
> - }
> - n->tt_t.th_sport = inp->inp_lport;
> - n->tt_t.th_dport = inp->inp_fport;
> - n->tt_t.th_seq = 0;
> - n->tt_t.th_ack = 0;
> - n->tt_t.th_x2 = 0;
> - n->tt_t.th_off = 5;
> - n->tt_t.th_flags = 0;
> - n->tt_t.th_win = 0;
> - n->tt_t.th_urp = 0;
> + tcp_hdr->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
> + htons(sizeof(struct tcphdr) + IPPROTO_TCP));
> + }
> +
> + tcp_hdr->th_sport = inp->inp_lport;
> + tcp_hdr->th_dport = inp->inp_fport;
> + tcp_hdr->th_seq = 0;
> + tcp_hdr->th_ack = 0;
> + tcp_hdr->th_x2 = 0;
> + tcp_hdr->th_off = 5;
> + tcp_hdr->th_flags = 0;
> + tcp_hdr->th_win = 0;
> + tcp_hdr->th_urp = 0;
> +}
> +
> +
I think one line too many. I could be wrong.
> +/*
> + * Create template to be used to send tcp packets on a connection.
> + * Allocates an mbuf and fills in a skeletal tcp/ip header. The only
> + * use for this function is in keepalives, who like to use tcp_respond.
> + */
> +struct tcptemp *
> +tcp_maketemplate(tp)
> + struct tcpcb *tp;
> +{
> + struct mbuf *m;
> + struct tcptemp *n;
> +
> + m = m_get(M_DONTWAIT, MT_HEADER);
> + if (m == NULL)
> + return (0);
> + m->m_len = sizeof (struct tcptemp);
No space after sizeof, unless you get rid of the parantheses (style(9)).
> + n = mtod(m, struct tcptemp *);
> +
> + tcp_fillheaders(tp, (void *)&n->tt_ipgen, (void *)&n->tt_t);
> return (n);
> }
>
> @@ -706,7 +724,7 @@
> FREE(q, M_TSEGQ);
> }
> if (tp->t_template)
> - (void) m_free(dtom(tp->t_template));
> + panic("t_template non-null!");
Perhaps this should be an INVARIANTS-enabled KASSERT()? Is this
something that can only happen due to programming error? It seems to me like
it's the case here (assuming we're not dealing with a huge-assed memory
corruption).
> inp->inp_ppcb = NULL;
> soisdisconnected(so);
> #ifdef INET6
> @@ -1347,7 +1365,7 @@
> #endif /* INET6 */
> struct tcphdr *th;
>
> - if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
> + if (!tp || !(inp = tp->t_inpcb))
Hmmm, this is gross. can we do something like, assuming tp is a pointer:
if (tp == NULL || (inp = tp->t_inpcb) == NULL)
(especially now that we have enough space in the line to do it?)
> return 0;
> MGETHDR(m, M_DONTWAIT, MT_DATA);
> if (!m)
> @@ -1359,10 +1377,7 @@
> th = (struct tcphdr *)(ip6 + 1);
> m->m_pkthdr.len = m->m_len =
> sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
> - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
> - sizeof(struct ip6_hdr));
> - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> - sizeof(struct tcphdr));
> + tcp_fillheaders(tp, ip6, th);
> hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
> } else
> #endif /* INET6 */
> @@ -1370,10 +1385,7 @@
> ip = mtod(m, struct ip *);
> th = (struct tcphdr *)(ip + 1);
> m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
> - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
> - sizeof(struct ip));
> - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> - sizeof(struct tcphdr));
> + tcp_fillheaders(tp, ip, th);
> hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
> }
>
> Only in netinet.old/: tcp_subr.c.orig
> diff -u -r netinet.old/tcp_timer.c netinet/tcp_timer.c
> --- netinet.old/tcp_timer.c Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_timer.c Tue Jun 19 11:56:13 2001
> @@ -41,6 +41,7 @@
> #include <sys/param.h>
> #include <sys/systm.h>
> #include <sys/kernel.h>
> +#include <sys/mbuf.h>
> #include <sys/sysctl.h>
> #include <sys/socket.h>
> #include <sys/socketvar.h>
> @@ -227,6 +228,7 @@
> void *xtp;
> {
> struct tcpcb *tp = xtp;
> + struct tcptemp *t_template;
> int s;
> #ifdef TCPDEBUG
> int ostate;
> @@ -273,9 +275,14 @@
> &tp->t_template->tt_t, (struct mbuf *)NULL,
> tp->rcv_nxt - 1, tp->snd_una - 1, 0);
> #else
> - tcp_respond(tp, tp->t_template->tt_ipgen,
> - &tp->t_template->tt_t, (struct mbuf *)NULL,
> - tp->rcv_nxt, tp->snd_una - 1, 0);
> +
> + t_template = tcp_maketemplate(tp);
> + if (t_template) {
> + tcp_respond(tp, t_template->tt_ipgen,
> + &t_template->tt_t, (struct mbuf *)NULL,
> + tp->rcv_nxt, tp->snd_una - 1, 0);
> + (void) m_free(dtom(t_template));
> + }
> #endif
> callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
> } else
> Only in netinet.old/: tcp_timer.c.orig
> diff -u -r netinet.old/tcp_usrreq.c netinet/tcp_usrreq.c
> --- netinet.old/tcp_usrreq.c Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_usrreq.c Tue Jun 19 11:53:27 2001
> @@ -744,11 +744,7 @@
> inp->inp_fport = sin->sin_port;
> in_pcbrehash(inp);
>
> - tp->t_template = tcp_template(tp);
> - if (tp->t_template == 0) {
> - in_pcbdisconnect(inp);
> - return ENOBUFS;
> - }
> + tp->t_template = NULL;
>
> /* Compute window scaling to request. */
> while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
> @@ -841,11 +837,7 @@
> inp->in6p_flowinfo = sin6->sin6_flowinfo;
> in_pcbrehash(inp);
>
> - tp->t_template = tcp_template(tp);
> - if (tp->t_template == 0) {
> - in6_pcbdisconnect(inp);
> - return ENOBUFS;
> - }
> + tp->t_template = NULL;
>
> /* Compute window scaling to request. */
> while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
> Only in netinet.old/: tcp_usrreq.c.orig
> Only in netinet.old/: tcp_usrreq.c.rej
> diff -u -r netinet.old/tcp_var.h netinet/tcp_var.h
> --- netinet.old/tcp_var.h Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_var.h Tue Jun 19 11:53:27 2001
> @@ -400,7 +400,8 @@
> void tcp_setpersist __P((struct tcpcb *));
> void tcp_slowtimo __P((void));
> struct tcptemp *
> - tcp_template __P((struct tcpcb *));
> + tcp_maketemplate __P((struct tcpcb *));
> +void tcp_fillheaders __P((struct tcpcb *, void *, void *));
> struct tcpcb *
> tcp_timers __P((struct tcpcb *, int));
> void tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *,
> Only in netinet.old/: tcp_var.h.orig
> Only in netinet.old/: udp_usrreq.c.orig
[snipped second posting of patch - it was posted twice]
Cheers,
--
Bosko Milekic
bmilekic@technokratis.com
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010619173424.A8399>
