Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 19 Jun 2001 17:34:24 -0400
From:      Bosko Milekic <bmilekic@technokratis.com>
To:        Mike Silbersack <silby@silby.com>
Cc:        freebsd-net@FreeBSD.ORG
Subject:   Re: tcp template removal / scalability patch
Message-ID:  <20010619173424.A8399@technokratis.com>
In-Reply-To: <20010619121337.C77484-300000@achilles.silby.com>; from silby@silby.com on Tue, Jun 19, 2001 at 12:19:19PM -0500
References:  <20010619121337.C77484-300000@achilles.silby.com>

next in thread | previous in thread | raw e-mail | index | archive | help

On Tue, Jun 19, 2001 at 12:19:19PM -0500, Mike Silbersack wrote:
> As suggested by Terry, I've cooked up a patch which halts the use of mbufs
> for storing tcp template structures.  The structure was only used in two
> places; tcp_output.c when sending packets, and tcp_timer.c when sending
> keepalives.  tcp_output now pulls the info directly from the tcpcb, while
> tcp_timer creates a short-term tcp template that is destroyed after use.
> 
> The end result is that rather than 1 mbuf being the minimum used per
> connection, 0 mbufs is now the minimum.  As a result, those with boxes
> handling a lot of connections should see greatly reduced mbuf usage.

	Cool!

> I've attached two patches; one for current, and one for stable.  Please
> review / test, _especially_ if you're using IPv6 or IPSec - while those
> cases look correct, I'm not running either and haven't tested them.

	I've spotted some patch-related (not conceptual) things worth
mentionning, so I'll do that now and give you the conceptual review a little
later, hopefully before I leave. 
 
> Thanks,
> 
> Mike "Silby" Silbersack

> Only in netinet.old/: icmp_var.h.orig
> Only in netinet.old/: ip_icmp.c.orig

	Dirty. :-)

> diff -u -r netinet.old/tcp_input.c netinet/tcp_input.c
> --- netinet.old/tcp_input.c	Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_input.c	Tue Jun 19 11:53:25 2001
> @@ -1066,12 +1066,7 @@
>  		}
>  		FREE(sin, M_SONAME);
>  	      }
> -		tp->t_template = tcp_template(tp);
> -		if (tp->t_template == 0) {
> -			tp = tcp_drop(tp, ENOBUFS);
> -			dropsocket = 0;		/* socket is already gone */
> -			goto drop;
> -		}
> +		tp->t_template = NULL;
>  		if ((taop = tcp_gettaocache(inp)) == NULL) {
>  			taop = &tao_noncached;
>  			bzero(taop, sizeof(*taop));
> Only in netinet.old/: tcp_input.c.orig
> Only in netinet.old/: tcp_input.c.rej
> diff -u -r netinet.old/tcp_output.c netinet/tcp_output.c
> --- netinet.old/tcp_output.c	Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_output.c	Tue Jun 19 11:53:25 2001
> @@ -630,16 +630,12 @@
>  		m->m_len = hdrlen;
>  	}
>  	m->m_pkthdr.rcvif = (struct ifnet *)0;
> -	if (tp->t_template == 0)
> -		panic("tcp_output");
> +
>  #ifdef INET6
>  	if (isipv6) {
>  		ip6 = mtod(m, struct ip6_hdr *);
>  		th = (struct tcphdr *)(ip6 + 1);
> -		bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
> -		      sizeof(struct ip6_hdr));
> -		bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> -		      sizeof(struct tcphdr));
> +		tcp_fillheaders(tp, ip6, th);
>  	} else
>  #endif /* INET6 */
>        {
> @@ -647,10 +643,7 @@
>  	ipov = (struct ipovly *)ip;
>  	th = (struct tcphdr *)(ip + 1);
>  	/* this picks up the pseudo header (w/o the length) */
> -	bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
> -	      sizeof(struct ip));
> -	bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> -	      sizeof(struct tcphdr));
> +	tcp_fillheaders(tp, ip, th);
>        }
>  
>  	/*
> Only in netinet.old/: tcp_output.c.orig
> Only in netinet.old/: tcp_seq.h.orig
> diff -u -r netinet.old/tcp_subr.c netinet/tcp_subr.c
> --- netinet.old/tcp_subr.c	Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_subr.c	Tue Jun 19 11:57:56 2001
> @@ -220,32 +220,27 @@
>  #undef TCP_MINPROTOHDR
>  }
>  
> +

	Unnecessary newline. 

>  /*
> - * Create template to be used to send tcp packets on a connection.
> - * Call after host entry created, allocates an mbuf and fills
> - * in a skeletal tcp/ip header, minimizing the amount of work
> - * necessary when the connection is used.
> + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb.
> + * tcp_template used to store this data in mbufs, but we now recopy it out
> + * of the tcpcb each time to conserve mbufs.
>   */
> -struct tcptemp *
> -tcp_template(tp)
> +

	Unnecessary newline.

> +void
> +tcp_fillheaders(tp, ip_ptr, tcp_ptr)
>  	struct tcpcb *tp;
> +	void *ip_ptr;
> +	void *tcp_ptr;
>  {
> -	register struct inpcb *inp = tp->t_inpcb;
> -	register struct mbuf *m;
> -	register struct tcptemp *n;
> +	struct inpcb *inp = tp->t_inpcb;
> +	struct tcphdr *tcp_hdr = (struct tcphdr *)tcp_ptr;
>  
> -	if ((n = tp->t_template) == 0) {
> -		m = m_get(M_DONTWAIT, MT_HEADER);
> -		if (m == NULL)
> -			return (0);
> -		m->m_len = sizeof (struct tcptemp);
> -		n = mtod(m, struct tcptemp *);
> -	}
>  #ifdef INET6
>  	if ((inp->inp_vflag & INP_IPV6) != 0) {
> -		register struct ip6_hdr *ip6;
> +		struct ip6_hdr *ip6;

	Cool.
  
> -		ip6 = (struct ip6_hdr *)n->tt_ipgen;
> +		ip6 = (struct ip6_hdr *)ip_ptr;
>  		ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) |
>  			(inp->in6p_flowinfo & IPV6_FLOWINFO_MASK);
>  		ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) |
> @@ -254,29 +249,52 @@
>  		ip6->ip6_plen = sizeof(struct tcphdr);
>  		ip6->ip6_src = inp->in6p_laddr;
>  		ip6->ip6_dst = inp->in6p_faddr;
> -		n->tt_t.th_sum = 0;
> +		tcp_hdr->th_sum = 0;
>  	} else
>  #endif
> -      {
> -	struct ip *ip = (struct ip *)n->tt_ipgen;
> +	{
> +	struct ip *ip = (struct ip *) ip_ptr;

	Something seems stylistically wrong here. I think the "{" brace should
be on the same line as the "} else" above.

> -	bzero(ip, sizeof(struct ip));		/* XXX overkill? */
> +	bzero(ip, sizeof(struct ip));           /* XXX overkill? */

	Unnecessary whitespace. You changed the tabs separating the comment
from the statement to spaces.

>  	ip->ip_vhl = IP_VHL_BORING;
>  	ip->ip_p = IPPROTO_TCP;
>  	ip->ip_src = inp->inp_laddr;
>  	ip->ip_dst = inp->inp_faddr;
> -	n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
> -	    htons(sizeof(struct tcphdr) + IPPROTO_TCP));
> -      }
> -	n->tt_t.th_sport = inp->inp_lport;
> -	n->tt_t.th_dport = inp->inp_fport;
> -	n->tt_t.th_seq = 0;
> -	n->tt_t.th_ack = 0;
> -	n->tt_t.th_x2 = 0;
> -	n->tt_t.th_off = 5;
> -	n->tt_t.th_flags = 0;
> -	n->tt_t.th_win = 0;
> -	n->tt_t.th_urp = 0;
> +	tcp_hdr->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
> +		htons(sizeof(struct tcphdr) + IPPROTO_TCP));
> +	}
> +
> +	tcp_hdr->th_sport = inp->inp_lport;
> +	tcp_hdr->th_dport = inp->inp_fport;
> +	tcp_hdr->th_seq = 0;
> +	tcp_hdr->th_ack = 0;
> +	tcp_hdr->th_x2 = 0;
> +	tcp_hdr->th_off = 5;
> +	tcp_hdr->th_flags = 0;
> +	tcp_hdr->th_win = 0;
> +	tcp_hdr->th_urp = 0;
> +}
> +
> +

	I think one line too many. I could be wrong.

> +/*
> + * Create template to be used to send tcp packets on a connection.
> + * Allocates an mbuf and fills in a skeletal tcp/ip header.  The only
> + * use for this function is in keepalives, who like to use tcp_respond.
> + */
> +struct tcptemp *
> +tcp_maketemplate(tp)
> +	struct tcpcb *tp;
> +{
> +	struct mbuf *m;
> +	struct tcptemp *n;
> +
> +	m = m_get(M_DONTWAIT, MT_HEADER);
> +	if (m == NULL)
> +		return (0);
> +	m->m_len = sizeof (struct tcptemp);

	No space after sizeof, unless you get rid of the parantheses (style(9)).

> +	n = mtod(m, struct tcptemp *);
> +
> +	tcp_fillheaders(tp, (void *)&n->tt_ipgen, (void *)&n->tt_t);
>  	return (n);
>  }
>  
> @@ -706,7 +724,7 @@
>  		FREE(q, M_TSEGQ);
>  	}
>  	if (tp->t_template)
> -		(void) m_free(dtom(tp->t_template));
> +		panic("t_template non-null!");

	Perhaps this should be an INVARIANTS-enabled KASSERT()? Is this
something that can only happen due to programming error? It seems to me like
it's the case here (assuming we're not dealing with a huge-assed memory
corruption).

>  	inp->inp_ppcb = NULL;
>  	soisdisconnected(so);
>  #ifdef INET6
> @@ -1347,7 +1365,7 @@
>  #endif /* INET6 */
>  	struct tcphdr *th;
>  
> -	if (!tp || !tp->t_template || !(inp = tp->t_inpcb))
> +	if (!tp || !(inp = tp->t_inpcb))

	Hmmm, this is gross. can we do something like, assuming tp is a pointer:

	if (tp == NULL || (inp = tp->t_inpcb) == NULL)

	(especially now that we have enough space in the line to do it?)

>  		return 0;
>  	MGETHDR(m, M_DONTWAIT, MT_DATA);
>  	if (!m)
> @@ -1359,10 +1377,7 @@
>  		th = (struct tcphdr *)(ip6 + 1);
>  		m->m_pkthdr.len = m->m_len =
>  			sizeof(struct ip6_hdr) + sizeof(struct tcphdr);
> -		bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6,
> -		      sizeof(struct ip6_hdr));
> -		bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> -		      sizeof(struct tcphdr));
> +		tcp_fillheaders(tp, ip6, th);
>  		hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
>  	} else
>  #endif /* INET6 */
> @@ -1370,10 +1385,7 @@
>  	ip = mtod(m, struct ip *);
>  	th = (struct tcphdr *)(ip + 1);
>  	m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr);
> -	bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip,
> -	      sizeof(struct ip));
> -	bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th,
> -	      sizeof(struct tcphdr));
> +	tcp_fillheaders(tp, ip, th);
>  	hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp);
>        }
>  
> Only in netinet.old/: tcp_subr.c.orig
> diff -u -r netinet.old/tcp_timer.c netinet/tcp_timer.c
> --- netinet.old/tcp_timer.c	Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_timer.c	Tue Jun 19 11:56:13 2001
> @@ -41,6 +41,7 @@
>  #include <sys/param.h>
>  #include <sys/systm.h>
>  #include <sys/kernel.h>
> +#include <sys/mbuf.h>
>  #include <sys/sysctl.h>
>  #include <sys/socket.h>
>  #include <sys/socketvar.h>
> @@ -227,6 +228,7 @@
>  	void *xtp;
>  {
>  	struct tcpcb *tp = xtp;
> +	struct tcptemp *t_template;
>  	int s;
>  #ifdef TCPDEBUG
>  	int ostate;
> @@ -273,9 +275,14 @@
>  			    &tp->t_template->tt_t, (struct mbuf *)NULL,
>  			    tp->rcv_nxt - 1, tp->snd_una - 1, 0);
>  #else
> -		tcp_respond(tp, tp->t_template->tt_ipgen,
> -			    &tp->t_template->tt_t, (struct mbuf *)NULL,
> -			    tp->rcv_nxt, tp->snd_una - 1, 0);
> +
> +		t_template = tcp_maketemplate(tp);
> +		if (t_template) {
> +			tcp_respond(tp, t_template->tt_ipgen,
> +				&t_template->tt_t, (struct mbuf *)NULL,
> +				tp->rcv_nxt, tp->snd_una - 1, 0);
> +			(void) m_free(dtom(t_template));
> +		}
>  #endif
>  		callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
>  	} else
> Only in netinet.old/: tcp_timer.c.orig
> diff -u -r netinet.old/tcp_usrreq.c netinet/tcp_usrreq.c
> --- netinet.old/tcp_usrreq.c	Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_usrreq.c	Tue Jun 19 11:53:27 2001
> @@ -744,11 +744,7 @@
>  	inp->inp_fport = sin->sin_port;
>  	in_pcbrehash(inp);
>  
> -	tp->t_template = tcp_template(tp);
> -	if (tp->t_template == 0) {
> -		in_pcbdisconnect(inp);
> -		return ENOBUFS;
> -	}
> +	tp->t_template = NULL;
>  
>  	/* Compute window scaling to request.  */
>  	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
> @@ -841,11 +837,7 @@
>  		inp->in6p_flowinfo = sin6->sin6_flowinfo;
>  	in_pcbrehash(inp);
>  
> -	tp->t_template = tcp_template(tp);
> -	if (tp->t_template == 0) {
> -		in6_pcbdisconnect(inp);
> -		return ENOBUFS;
> -	}
> +	tp->t_template = NULL;
>  
>  	/* Compute window scaling to request.  */
>  	while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
> Only in netinet.old/: tcp_usrreq.c.orig
> Only in netinet.old/: tcp_usrreq.c.rej
> diff -u -r netinet.old/tcp_var.h netinet/tcp_var.h
> --- netinet.old/tcp_var.h	Tue Jun 19 11:53:16 2001
> +++ netinet/tcp_var.h	Tue Jun 19 11:53:27 2001
> @@ -400,7 +400,8 @@
>  void	 tcp_setpersist __P((struct tcpcb *));
>  void	 tcp_slowtimo __P((void));
>  struct tcptemp *
> -	 tcp_template __P((struct tcpcb *));
> +	 tcp_maketemplate __P((struct tcpcb *));
> +void	 tcp_fillheaders __P((struct tcpcb *, void *, void *));
>  struct tcpcb *
>  	 tcp_timers __P((struct tcpcb *, int));
>  void	 tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *,
> Only in netinet.old/: tcp_var.h.orig
> Only in netinet.old/: udp_usrreq.c.orig

[snipped second posting of patch - it was posted twice]

Cheers,
-- 
 Bosko Milekic
 bmilekic@technokratis.com


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-net" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20010619173424.A8399>