Date: Sun, 03 Jun 2012 09:51:02 -0700 From: Colin Percival <cperciva@freebsd.org> To: Andrew Gallatin <gallatin@cs.duke.edu> Cc: freebsd-net@freebsd.org Subject: Re: [please review] TSO mbuf chain length limiting patch Message-ID: <4FCB95F6.30204@freebsd.org> In-Reply-To: <4FC63D27.70807@cs.duke.edu> References: <4FC635CC.5030608@freebsd.org> <4FC63D27.70807@cs.duke.edu>
next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format. --------------010705070900080106050908 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit On 05/30/12 08:30, Andrew Gallatin wrote: > On 05/30/12 10:59, Colin Percival wrote: >> The Xen virtual network interface has an issue (ok, really the issue is with >> the linux back-end, but that's what most people are using) where it can't >> handle scatter-gather writes with lots of pieces, aka. long mbuf chains. >> This currently bites us hard with TSO enabled, since it produces said long >> mbuf chains. > > I think a better approach would be to have a limit on the size of the > pre-segmented TCP payload size sent to the driver. I tend to think > that this would be more generically useful, and it is a better match > for the NDIS APIs, where a driver must specify the max TSO size. I > think the changes to the TCP stack might be simpler (eg, they > would seem to jive better with the existing "maxmtu" approach). > > I think this could work for you as well. You could set the Xen max > tso size to be 32K (derived from 18 pages/skb, multiplied by a typical > 2KB mbuf size, with some slack built in). If the chain was too large, > you could m_defrag it down to size. I've attached a new patch which: 1. adds a IFCAP_TSO_MSS "capability" and a if_tx_tso_mss field to struct ifnet, 2. sets these in netfront when the IFCAP_TSO4 flag is set, 3. extends tcp_maxmtu to read this value, 4. adds a tx_tso_mss field to struct tcpcb, 5. makes tcp_mss_update set tx_tso_mss using tcp_maxmtu, and 6. limits TSO lengths to tx_tso_mss in tcp_output. -- Colin Percival Security Officer Emeritus, FreeBSD | The power to serve Founder, Tarsnap | www.tarsnap.com | Online backups for the truly paranoid --------------010705070900080106050908 Content-Type: text/plain; charset=us-ascii; name="tx_tso_mss.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="tx_tso_mss.patch" Index: netinet/tcp_input.c =================================================================== --- netinet/tcp_input.c (revision 235780) +++ netinet/tcp_input.c (working copy) @@ -3298,6 +3298,7 @@ { int mss = 0; u_long maxmtu = 0; + int tx_tso_mss = 0; struct inpcb *inp = tp->t_inpcb; struct hc_metrics_lite metrics; int origoffer; @@ -3321,8 +3322,10 @@ /* Initialize. */ #ifdef INET6 if (isipv6) { - maxmtu = tcp_maxmtu6(&inp->inp_inc, mtuflags); + maxmtu = tcp_maxmtu6_ext(&inp->inp_inc, mtuflags, + &tx_tso_mss); tp->t_maxopd = tp->t_maxseg = V_tcp_v6mssdflt; + tp->t_tx_tso_mss = tx_tso_mss; } #endif #if defined(INET) && defined(INET6) @@ -3330,8 +3333,10 @@ #endif #ifdef INET { - maxmtu = tcp_maxmtu(&inp->inp_inc, mtuflags); + maxmtu = tcp_maxmtu_ext(&inp->inp_inc, mtuflags, + &tx_tso_mss); tp->t_maxopd = tp->t_maxseg = V_tcp_mssdflt; + tp->t_tx_tso_mss = tx_tso_mss; } #endif Index: netinet/tcp_subr.c =================================================================== --- netinet/tcp_subr.c (revision 235780) +++ netinet/tcp_subr.c (working copy) @@ -1708,7 +1708,7 @@ * tcp_mss_update to get the peer/interface MTU. */ u_long -tcp_maxmtu(struct in_conninfo *inc, int *flags) +tcp_maxmtu_ext(struct in_conninfo *inc, int *flags, int * tx_tso_mss) { struct route sro; struct sockaddr_in *dst; @@ -1738,15 +1738,26 @@ ifp->if_hwassist & CSUM_TSO) *flags |= CSUM_TSO; } + if (tx_tso_mss != NULL) { + if (ifp->if_capabilities & IFCAP_TSO_MSS) + *tx_tso_mss = ifp->if_tx_tso_mss; + } RTFREE(sro.ro_rt); } return (maxmtu); } + +u_long +tcp_maxmtu(struct in_conninfo *inc, int *flags) +{ + + return (tcp_maxmtu_ext(inc, flags, NULL)); +} #endif /* INET */ #ifdef INET6 u_long -tcp_maxmtu6(struct in_conninfo *inc, int *flags) +tcp_maxmtu6_ext(struct in_conninfo *inc, int *flags, int * tx_tso_mss) { struct route_in6 sro6; struct ifnet *ifp; @@ -1775,11 +1786,22 @@ ifp->if_hwassist & CSUM_TSO) *flags |= CSUM_TSO; } + if (tx_tso_mss != NULL) { + if (ifp->if_capabilities & IFCAP_TSO_MSS) + *tx_tso_mss = ifp->if_tx_tso_mss; + } RTFREE(sro6.ro_rt); } return (maxmtu); } + +u_long +tcp_maxmtu6(struct in_conninfo *inc, int *flags) +{ + + return (tcp_maxmtu6_ext(inc, flags, NULL)); +} #endif /* INET6 */ #ifdef IPSEC Index: netinet/tcp_var.h =================================================================== --- netinet/tcp_var.h (revision 235780) +++ netinet/tcp_var.h (working copy) @@ -208,7 +208,8 @@ u_int t_keepintvl; /* interval between keepalives */ u_int t_keepcnt; /* number of keepalives before close */ - uint32_t t_ispare[8]; /* 5 UTO, 3 TBD */ + uint32_t t_tx_tso_mss; /* max segment size for TSO offload */ + uint32_t t_ispare[7]; /* 5 UTO, 3 TBD */ void *t_pspare2[4]; /* 4 TBD */ uint64_t _pad[6]; /* 6 TBD (1-2 CC/RTT?) */ }; @@ -674,7 +675,9 @@ #endif void tcp_input(struct mbuf *, int); u_long tcp_maxmtu(struct in_conninfo *, int *); +u_long tcp_maxmtu_ext(struct in_conninfo *, int *, int *); u_long tcp_maxmtu6(struct in_conninfo *, int *); +u_long tcp_maxmtu6_ext(struct in_conninfo *, int *, int *); void tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *, int *); void tcp_mss(struct tcpcb *, int); Index: netinet/tcp_output.c =================================================================== --- netinet/tcp_output.c (revision 235780) +++ netinet/tcp_output.c (working copy) @@ -748,6 +748,15 @@ } /* + * Some drivers want an even shorter limit to the + * length sent via TSO; respect their wishes. + */ + if (tp->t_tx_tso_mss != 0 && len > tp->t_tx_tso_mss) { + len = tp->t_tx_tso_mss; + sendalot = 1; + } + + /* * Prevent the last segment from being * fractional unless the send sockbuf can * be emptied. Index: dev/xen/netfront/netfront.c =================================================================== --- dev/xen/netfront/netfront.c (revision 235780) +++ dev/xen/netfront/netfront.c (working copy) @@ -135,6 +135,7 @@ * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) +#define TX_TSO_MSS (MAX_TX_REQ_FRAGS - 2) * MCLBYTES #define RX_COPY_THRESHOLD 256 @@ -2040,6 +2041,9 @@ if (val) { np->xn_ifp->if_capabilities |= IFCAP_TSO4|IFCAP_LRO; printf(" feature-gso-tcp4"); + + np->xn_ifp->if_capabilities |= IFCAP_TSO_MSS; + np->xn_ifp->if_tx_tso_mss = TX_TSO_MSS; } printf("\n"); Index: net/if.h =================================================================== --- net/if.h (revision 235780) +++ net/if.h (working copy) @@ -230,6 +230,7 @@ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ +#define IFCAP_TSO_MSS 0x200000 /* TSO size is limited */ #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) Index: net/if_var.h =================================================================== --- net/if_var.h (revision 235780) +++ net/if_var.h (working copy) @@ -205,7 +205,8 @@ * be used with care where binary compatibility is required. */ char if_cspare[3]; - int if_ispare[4]; + int if_tx_tso_mss; /* if IFCAP_TSO_MSS */ + int if_ispare[3]; void *if_pspare[8]; /* 1 netmap, 7 TDB */ }; --------------010705070900080106050908--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?4FCB95F6.30204>