Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 03 Jun 2012 09:51:02 -0700
From:      Colin Percival <cperciva@freebsd.org>
To:        Andrew Gallatin <gallatin@cs.duke.edu>
Cc:        freebsd-net@freebsd.org
Subject:   Re: [please review] TSO mbuf chain length limiting patch
Message-ID:  <4FCB95F6.30204@freebsd.org>
In-Reply-To: <4FC63D27.70807@cs.duke.edu>
References:  <4FC635CC.5030608@freebsd.org> <4FC63D27.70807@cs.duke.edu>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------010705070900080106050908
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit

On 05/30/12 08:30, Andrew Gallatin wrote:
> On 05/30/12 10:59, Colin Percival wrote:
>> The Xen virtual network interface has an issue (ok, really the issue is with
>> the linux back-end, but that's what most people are using) where it can't
>> handle scatter-gather writes with lots of pieces, aka. long mbuf chains.
>> This currently bites us hard with TSO enabled, since it produces said long
>> mbuf chains.
> 
> I think a better approach would be to have a limit on the size of the
> pre-segmented TCP payload size sent to the driver.  I tend to think
> that this would be more generically useful, and it is a better match
> for the NDIS APIs, where a driver must specify the max TSO size.  I
> think the changes to the TCP stack might be simpler (eg, they
> would seem to jive better with the existing "maxmtu" approach).
> 
> I think this could work for you as well.  You could set the Xen max
> tso size to be 32K (derived from 18 pages/skb, multiplied by a typical
> 2KB mbuf size, with some slack built in).  If the chain was too large,
> you could m_defrag it down to size.

I've attached a new patch which:
1. adds a IFCAP_TSO_MSS "capability" and a if_tx_tso_mss field to struct ifnet,
2. sets these in netfront when the IFCAP_TSO4 flag is set,
3. extends tcp_maxmtu to read this value,
4. adds a tx_tso_mss field to struct tcpcb,
5. makes tcp_mss_update set tx_tso_mss using tcp_maxmtu, and
6. limits TSO lengths to tx_tso_mss in tcp_output.

-- 
Colin Percival
Security Officer Emeritus, FreeBSD | The power to serve
Founder, Tarsnap | www.tarsnap.com | Online backups for the truly paranoid

--------------010705070900080106050908
Content-Type: text/plain; charset=us-ascii;
 name="tx_tso_mss.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename="tx_tso_mss.patch"

Index: netinet/tcp_input.c
===================================================================
--- netinet/tcp_input.c	(revision 235780)
+++ netinet/tcp_input.c	(working copy)
@@ -3298,6 +3298,7 @@
 {
 	int mss = 0;
 	u_long maxmtu = 0;
+	int tx_tso_mss = 0;
 	struct inpcb *inp = tp->t_inpcb;
 	struct hc_metrics_lite metrics;
 	int origoffer;
@@ -3321,8 +3322,10 @@
 	/* Initialize. */
 #ifdef INET6
 	if (isipv6) {
-		maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags);
+		maxmtu = tcp_maxmtu6_ext(&inp->inp_inc, mtuflags,
+		    &tx_tso_mss);
 		tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt;
+		tp->t_tx_tso_mss = tx_tso_mss;
 	}
 #endif
 #if defined(INET) && defined(INET6)
@@ -3330,8 +3333,10 @@
 #endif
 #ifdef INET
 	{
-		maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags);
+		maxmtu = tcp_maxmtu_ext(&inp->inp_inc, mtuflags,
+		    &tx_tso_mss);
 		tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt;
+		tp->t_tx_tso_mss = tx_tso_mss;
 	}
 #endif
 
Index: netinet/tcp_subr.c
===================================================================
--- netinet/tcp_subr.c	(revision 235780)
+++ netinet/tcp_subr.c	(working copy)
@@ -1708,7 +1708,7 @@
  * tcp_mss_update to get the peer/interface MTU.
  */
 u_long
-tcp_maxmtu(struct in_conninfo *inc, int *flags)
+tcp_maxmtu_ext(struct in_conninfo *inc, int *flags, int * tx_tso_mss)
 {
 	struct route sro;
 	struct sockaddr_in *dst;
@@ -1738,15 +1738,26 @@
 			    ifp->if_hwassist & CSUM_TSO)
 				*flags |= CSUM_TSO;
 		}
+		if (tx_tso_mss != NULL) {
+			if (ifp->if_capabilities & IFCAP_TSO_MSS)
+				*tx_tso_mss = ifp->if_tx_tso_mss;
+		}
 		RTFREE(sro.ro_rt);
 	}
 	return (maxmtu);
 }
+
+u_long
+tcp_maxmtu(struct in_conninfo *inc, int *flags)
+{
+
+	return (tcp_maxmtu_ext(inc, flags, NULL));
+}
 #endif /* INET */
 
 #ifdef INET6
 u_long
-tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+tcp_maxmtu6_ext(struct in_conninfo *inc, int *flags, int * tx_tso_mss)
 {
 	struct route_in6 sro6;
 	struct ifnet *ifp;
@@ -1775,11 +1786,22 @@
 			    ifp->if_hwassist & CSUM_TSO)
 				*flags |= CSUM_TSO;
 		}
+		if (tx_tso_mss != NULL) {
+			if (ifp->if_capabilities & IFCAP_TSO_MSS)
+				*tx_tso_mss = ifp->if_tx_tso_mss;
+		}
 		RTFREE(sro6.ro_rt);
 	}
 
 	return (maxmtu);
 }
+
+u_long
+tcp_maxmtu6(struct in_conninfo *inc, int *flags)
+{
+
+	return (tcp_maxmtu6_ext(inc, flags, NULL));
+}
 #endif /* INET6 */
 
 #ifdef IPSEC
Index: netinet/tcp_var.h
===================================================================
--- netinet/tcp_var.h	(revision 235780)
+++ netinet/tcp_var.h	(working copy)
@@ -208,7 +208,8 @@
 	u_int	t_keepintvl;		/* interval between keepalives */
 	u_int	t_keepcnt;		/* number of keepalives before close */
 
-	uint32_t t_ispare[8];		/* 5 UTO, 3 TBD */
+	uint32_t t_tx_tso_mss;		/* max segment size for TSO offload */
+	uint32_t t_ispare[7];		/* 5 UTO, 3 TBD */
 	void	*t_pspare2[4];		/* 4 TBD */
 	uint64_t _pad[6];		/* 6 TBD (1-2 CC/RTT?) */
 };
@@ -674,7 +675,9 @@
 #endif
 void	 tcp_input(struct mbuf *, int);
 u_long	 tcp_maxmtu(struct in_conninfo *, int *);
+u_long	 tcp_maxmtu_ext(struct in_conninfo *, int *, int *);
 u_long	 tcp_maxmtu6(struct in_conninfo *, int *);
+u_long	 tcp_maxmtu6_ext(struct in_conninfo *, int *, int *);
 void	 tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
 	    int *);
 void	 tcp_mss(struct tcpcb *, int);
Index: netinet/tcp_output.c
===================================================================
--- netinet/tcp_output.c	(revision 235780)
+++ netinet/tcp_output.c	(working copy)
@@ -748,6 +748,15 @@
 			}
 
 			/*
+			 * Some drivers want an even shorter limit to the
+			 * length sent via TSO; respect their wishes.
+			 */
+			if (tp->t_tx_tso_mss != 0 && len > tp->t_tx_tso_mss) {
+				len = tp->t_tx_tso_mss;
+				sendalot = 1;
+			}
+
+			/*
 			 * Prevent the last segment from being
 			 * fractional unless the send sockbuf can
 			 * be emptied.
Index: dev/xen/netfront/netfront.c
===================================================================
--- dev/xen/netfront/netfront.c	(revision 235780)
+++ dev/xen/netfront/netfront.c	(working copy)
@@ -135,6 +135,7 @@
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #define	MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
+#define TX_TSO_MSS (MAX_TX_REQ_FRAGS - 2) * MCLBYTES
 
 #define RX_COPY_THRESHOLD 256
 
@@ -2040,6 +2041,9 @@
 	if (val) {
 		np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO;
 		printf(" feature-gso-tcp4");
+
+		np->xn_ifp->if_capabilities |= IFCAP_TSO_MSS;
+		np->xn_ifp->if_tx_tso_mss = TX_TSO_MSS;
 	}
 
 	printf("\n");
Index: net/if.h
===================================================================
--- net/if.h	(revision 235780)
+++ net/if.h	(working copy)
@@ -230,6 +230,7 @@
 #define	IFCAP_VLAN_HWTSO	0x40000 /* can do IFCAP_TSO on VLANs */
 #define	IFCAP_LINKSTATE		0x80000 /* the runtime link state is dynamic */
 #define	IFCAP_NETMAP		0x100000 /* netmap mode supported/enabled */
+#define	IFCAP_TSO_MSS		0x200000 /* TSO size is limited */
 
 #define IFCAP_HWCSUM	(IFCAP_RXCSUM | IFCAP_TXCSUM)
 #define	IFCAP_TSO	(IFCAP_TSO4 | IFCAP_TSO6)
Index: net/if_var.h
===================================================================
--- net/if_var.h	(revision 235780)
+++ net/if_var.h	(working copy)
@@ -205,7 +205,8 @@
 	 * be used with care where binary compatibility is required.
 	 */
 	char	if_cspare[3];
-	int	if_ispare[4];
+	int	if_tx_tso_mss;		/* if IFCAP_TSO_MSS */
+	int	if_ispare[3];
 	void	*if_pspare[8];		/* 1 netmap, 7 TDB */
 };
 

--------------010705070900080106050908--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4FCB95F6.30204>