Date: Sat, 13 Sep 2014 08:26:09 +0000 (UTC) From: Hans Petter Selasky <hselasky@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r271504 - in head/sys: dev/oce dev/vmware/vmxnet3 dev/xen/netfront net netinet ofed/drivers/net/mlx4 Message-ID: <201409130826.s8D8Q9Wx078339@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: hselasky Date: Sat Sep 13 08:26:09 2014 New Revision: 271504 URL: http://svnweb.freebsd.org/changeset/base/271504 Log: Improve transmit sending offload, TSO, algorithm in general. The current TSO limitation feature only takes the total number of bytes in an mbuf chain into account and does not limit by the number of mbufs in a chain. Some kinds of hardware is limited by two factors. One is the fragment length and the second is the fragment count. Both of these limits need to be taken into account when doing TSO. Else some kinds of hardware might have to drop completely valid mbuf chains because they cannot loaded into the given hardware's DMA engine. The new way of doing TSO limitation has been made backwards compatible as input from other FreeBSD developers and will use defaults for values not set. MFC after: 1 week Sponsored by: Mellanox Technologies Modified: head/sys/dev/oce/oce_if.c head/sys/dev/oce/oce_if.h head/sys/dev/vmware/vmxnet3/if_vmx.c head/sys/dev/vmware/vmxnet3/if_vmxvar.h head/sys/dev/xen/netfront/netfront.c head/sys/net/if.c head/sys/net/if_lagg.c head/sys/net/if_var.h head/sys/net/if_vlan.c head/sys/netinet/tcp_output.c head/sys/ofed/drivers/net/mlx4/en_netdev.c Modified: head/sys/dev/oce/oce_if.c ============================================================================== --- head/sys/dev/oce/oce_if.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/dev/oce/oce_if.c Sat Sep 13 08:26:09 2014 (r271504) @@ -1731,7 +1731,10 @@ oce_attach_ifp(POCE_SOFTC sc) sc->ifp->if_baudrate = IF_Gbps(10); #if __FreeBSD_version >= 1000000 - sc->ifp->if_hw_tsomax = OCE_MAX_TSO_SIZE; + sc->ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */, + OCE_MAX_TX_ELEMENTS /* maximum frag count */, + 12 /* 4K frag size */); #endif ether_ifattach(sc->ifp, sc->macaddr.mac_addr); Modified: head/sys/dev/oce/oce_if.h ============================================================================== --- head/sys/dev/oce/oce_if.h Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/dev/oce/oce_if.h Sat Sep 13 08:26:09 2014 (r271504) @@ -152,7 +152,6 @@ extern int mp_ncpus; /* system's total #define OCE_MAX_TX_ELEMENTS 29 #define OCE_MAX_TX_DESC 1024 #define OCE_MAX_TX_SIZE 65535 -#define OCE_MAX_TSO_SIZE (65535 - ETHER_HDR_LEN) #define OCE_MAX_RX_SIZE 4096 #define OCE_MAX_RQ_POSTS 255 #define OCE_DEFAULT_PROMISCUOUS 0 Modified: head/sys/dev/vmware/vmxnet3/if_vmx.c ============================================================================== --- head/sys/dev/vmware/vmxnet3/if_vmx.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/dev/vmware/vmxnet3/if_vmx.c Sat Sep 13 08:26:09 2014 (r271504) @@ -1722,7 +1722,11 @@ vmxnet3_setup_interface(struct vmxnet3_s ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = vmxnet3_init; ifp->if_ioctl = vmxnet3_ioctl; - ifp->if_hw_tsomax = VMXNET3_TSO_MAXSIZE; + + ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - sizeof(struct ether_vlan_header) /* bytes */, + VMXNET3_TX_MAXSEGS /* maximum frag count */, + VMXNET3_TX_MAXSEGSHIFT /* frag size */); #ifdef VMXNET3_LEGACY_TX ifp->if_start = vmxnet3_start; Modified: head/sys/dev/vmware/vmxnet3/if_vmxvar.h ============================================================================== --- head/sys/dev/vmware/vmxnet3/if_vmxvar.h Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/dev/vmware/vmxnet3/if_vmxvar.h Sat Sep 13 08:26:09 2014 (r271504) @@ -277,14 +277,13 @@ struct vmxnet3_softc { */ #define VMXNET3_TX_MAXSEGS 32 #define VMXNET3_TX_MAXSIZE (VMXNET3_TX_MAXSEGS * MCLBYTES) -#define VMXNET3_TSO_MAXSIZE \ - (VMXNET3_TX_MAXSIZE - sizeof(struct ether_vlan_header)) /* * Maximum support Tx segments size. The length field in the * Tx descriptor is 14 bits. */ -#define VMXNET3_TX_MAXSEGSIZE (1 << 14) +#define VMXNET3_TX_MAXSEGSHIFT 14 +#define VMXNET3_TX_MAXSEGSIZE (1 << VMXNET3_TX_MAXSEGSHIFT) /* * The maximum number of Rx segments we accept. When LRO is enabled, Modified: head/sys/dev/xen/netfront/netfront.c ============================================================================== --- head/sys/dev/xen/netfront/netfront.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/dev/xen/netfront/netfront.c Sat Sep 13 08:26:09 2014 (r271504) @@ -134,7 +134,6 @@ static const int MODPARM_rx_flip = 0; * to mirror the Linux MAX_SKB_FRAGS constant. */ #define MAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) -#define NF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) #define RX_COPY_THRESHOLD 256 @@ -2102,7 +2101,10 @@ create_netdev(device_t dev) ifp->if_hwassist = XN_CSUM_FEATURES; ifp->if_capabilities = IFCAP_HWCSUM; - ifp->if_hw_tsomax = NF_TSO_MAXBURST; + ifp->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* bytes */, + MAX_TX_REQ_FRAGS /* maximum frag count */, + PAGE_SHIFT /* PAGE_SIZE frag size */); ether_ifattach(ifp, np->mac); callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); Modified: head/sys/net/if.c ============================================================================== --- head/sys/net/if.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/net/if.c Sat Sep 13 08:26:09 2014 (r271504) @@ -423,6 +423,52 @@ if_grow(void) } /* + * Compute the least common value of two "if_hw_tsomax" values: + */ +u_int +if_hw_tsomax_common(u_int a, u_int b) +{ + u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a); + u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a); + u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a); + u_int b_bytes = IF_HW_TSOMAX_GET_BYTES(b); + u_int b_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(b); + u_int b_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(b); + + return (IF_HW_TSOMAX_BUILD_VALUE(min(a_bytes, b_bytes), + min(a_frag_count, b_frag_count), + min(a_frag_size, b_frag_size))); +} + +/* + * Range check the "if_hw_tsomax" value: + */ +u_int +if_hw_tsomax_range_check(u_int a) +{ + u_int a_bytes = IF_HW_TSOMAX_GET_BYTES(a); + u_int a_frag_count = IF_HW_TSOMAX_GET_FRAG_COUNT(a); + u_int a_frag_size = IF_HW_TSOMAX_GET_FRAG_SIZE(a); + + /* round down to nearest 4 bytes */ + a_bytes &= 0xFFFC; + + /* use default, if zero */ + if (a_bytes == 0) + a_bytes = IF_HW_TSOMAX_DEFAULT_BYTES; + + /* use default, if zero */ + if (a_frag_count == 0) + a_frag_count = IF_HW_TSOMAX_DEFAULT_FRAG_COUNT; + + /* use default, if zero */ + if (a_frag_size == 0) + a_frag_size = IF_HW_TSOMAX_DEFAULT_FRAG_SIZE; + + return (IF_HW_TSOMAX_BUILD_VALUE(a_bytes, a_frag_count, a_frag_size)); +} + +/* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. @@ -445,6 +491,7 @@ if_alloc(u_char type) ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; + ifp->if_hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE(); if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { @@ -657,16 +704,9 @@ if_attach_internal(struct ifnet *ifp, in TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; - -#if defined(INET) || defined(INET6) - /* Initialize to max value. */ - if (ifp->if_hw_tsomax == 0) - ifp->if_hw_tsomax = min(IP_MAXPACKET, 32 * MCLBYTES - - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); - KASSERT(ifp->if_hw_tsomax <= IP_MAXPACKET && - ifp->if_hw_tsomax >= IP_MAXPACKET / 8, - ("%s: tsomax outside of range", __func__)); -#endif + /* range check TSO value */ + ifp->if_hw_tsomax = + if_hw_tsomax_range_check(ifp->if_hw_tsomax); } #ifdef VIMAGE else { Modified: head/sys/net/if_lagg.c ============================================================================== --- head/sys/net/if_lagg.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/net/if_lagg.c Sat Sep 13 08:26:09 2014 (r271504) @@ -445,11 +445,7 @@ lagg_capabilities(struct lagg_softc *sc) struct lagg_port *lp; int cap = ~0, ena = ~0; u_long hwa = ~0UL; -#if defined(INET) || defined(INET6) - u_int hw_tsomax = IP_MAXPACKET; /* Initialize to the maximum value. */ -#else - u_int hw_tsomax = ~0; /* if_hw_tsomax is only for INET/INET6, but.. */ -#endif + u_int hw_tsomax = IF_HW_TSOMAX_DEFAULT_VALUE(); LAGG_WLOCK_ASSERT(sc); @@ -458,10 +454,9 @@ lagg_capabilities(struct lagg_softc *sc) cap &= lp->lp_ifp->if_capabilities; ena &= lp->lp_ifp->if_capenable; hwa &= lp->lp_ifp->if_hwassist; - /* Set to the minimum value of the lagg ports. */ - if (lp->lp_ifp->if_hw_tsomax < hw_tsomax && - lp->lp_ifp->if_hw_tsomax > 0) - hw_tsomax = lp->lp_ifp->if_hw_tsomax; + /* Set to the common value of the lagg ports. */ + hw_tsomax = if_hw_tsomax_common(hw_tsomax, + lp->lp_ifp->if_hw_tsomax); } cap = (cap == ~0 ? 0 : cap); ena = (ena == ~0 ? 0 : ena); Modified: head/sys/net/if_var.h ============================================================================== --- head/sys/net/if_var.h Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/net/if_var.h Sat Sep 13 08:26:09 2014 (r271504) @@ -120,6 +120,43 @@ typedef int (*if_transmit_fn_t)(if_t, st typedef uint64_t (*if_get_counter_t)(if_t, ifnet_counter); /* + * Macros defining how to decode the "if_hw_tsomax" field: + */ +#define IF_HW_TSOMAX_GET_BYTES(x) \ + ((uint16_t)(x)) /* 32..65535 */ + +#define IF_HW_TSOMAX_GET_FRAG_COUNT(x) \ + ((uint8_t)((x) >> 16)) /* 1..255 */ + +#define IF_HW_TSOMAX_GET_FRAG_SIZE(x) \ + ((uint8_t)((x) >> 24)) /* 12..16 */ + +/* + * The following macro defines how to build the "if_hw_tsomax" + * field. The "bytes" field has unit 1 bytes and declares the maximum + * number of bytes which can be transferred by a single transmit + * offload, TSO, job. The "bytes" field is rounded down to the neares + * 4 bytes to avoid having the hardware do unaligned memory + * accesses. The "frag_count" field has unit 1 fragment and declares + * the maximum number of fragments a TSO job can contain. The + * "frag_size" field has unit logarithm in base 2 of the actual value + * in bytes and declares the maximum size of a fragment. + */ +#define IF_HW_TSOMAX_BUILD_VALUE(bytes, frag_count, frag_size) \ + (((bytes) & 0xFFFC) | (((frag_count) & 0xFF) << 16) | \ + (((frag_size) & 0xFF) << 24)) + +#define IF_HW_TSOMAX_DEFAULT_BYTES (65536 - 4) +#define IF_HW_TSOMAX_DEFAULT_FRAG_COUNT 255 +#define IF_HW_TSOMAX_DEFAULT_FRAG_SIZE 16 + +#define IF_HW_TSOMAX_DEFAULT_VALUE() \ + IF_HW_TSOMAX_BUILD_VALUE( \ + IF_HW_TSOMAX_DEFAULT_BYTES, \ + IF_HW_TSOMAX_DEFAULT_FRAG_COUNT, \ + IF_HW_TSOMAX_DEFAULT_FRAG_SIZE) + +/* * Structure defining a network interface. * * Size ILP32: 592 (approx) @@ -222,8 +259,7 @@ struct ifnet { if_get_counter_t if_get_counter; /* get counter values */ /* Stuff that's only temporary and doesn't belong here. */ - u_int if_hw_tsomax; /* tso burst length limit, the minimum - * is (IP_MAXPACKET / 8). + u_int if_hw_tsomax; /* TSO burst length limits. * XXXAO: Have to find a better place * for it eventually. */ /* @@ -608,6 +644,10 @@ void if_setioctlfn(if_t ifp, int (*)(if_ void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); + +/* "if_hw_tsomax" related functions */ +u_int if_hw_tsomax_common(u_int, u_int); +u_int if_hw_tsomax_range_check(u_int); /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); Modified: head/sys/net/if_vlan.c ============================================================================== --- head/sys/net/if_vlan.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/net/if_vlan.c Sat Sep 13 08:26:09 2014 (r271504) @@ -1511,8 +1511,8 @@ vlan_capabilities(struct ifvlan *ifv) * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ - if (p->if_hw_tsomax > 0) - ifp->if_hw_tsomax = p->if_hw_tsomax; + ifp->if_hw_tsomax = if_hw_tsomax_common(ifp->if_hw_tsomax, + p->if_hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) ifp->if_capabilities |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { Modified: head/sys/netinet/tcp_output.c ============================================================================== --- head/sys/netinet/tcp_output.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/netinet/tcp_output.c Sat Sep 13 08:26:09 2014 (r271504) @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <net/if.h> +#include <net/if_var.h> #include <net/route.h> #include <net/vnet.h> @@ -767,18 +768,88 @@ send: flags &= ~TH_FIN; if (tso) { + u_int if_hw_tsomax_bytes; + u_int if_hw_tsomax_frag_count; + u_int if_hw_tsomax_frag_size; + struct mbuf *mb; + u_int moff; + int max_len; + + /* extract TSO information */ + if_hw_tsomax_bytes = + IF_HW_TSOMAX_GET_BYTES(tp->t_tsomax); + if_hw_tsomax_frag_count = + IF_HW_TSOMAX_GET_FRAG_COUNT(tp->t_tsomax); + if_hw_tsomax_frag_size = + IF_HW_TSOMAX_GET_FRAG_SIZE(tp->t_tsomax); + + /* compute maximum TSO length */ + max_len = (if_hw_tsomax_bytes - hdrlen); + + /* clamp maximum length value */ + if (max_len > IP_MAXPACKET) + max_len = IP_MAXPACKET; + else if (max_len < 0) + max_len = 0; + + /* get smallest length */ + if (len > (u_int)max_len) { + if (max_len != 0) + sendalot = 1; + len = (u_int)max_len; + } + KASSERT(ipoptlen == 0, ("%s: TSO can't do IP options", __func__)); + max_len = 0; + mb = sbsndptr(&so->so_snd, off, len, &moff); + + /* now make sure the number of fragments fit too */ + while (mb != NULL && (u_int)max_len < len) { + u_int cur_length; + u_int cur_frags; + + /* + * Get length of mbuf fragment and how + * many hardware frags, rounded up, it + * would use: + */ + cur_length = (mb->m_len - moff); + cur_frags = (cur_length + + (1 << if_hw_tsomax_frag_size) - 1) >> + if_hw_tsomax_frag_size; + + /* Handle special case: Zero Length Mbuf */ + if (cur_frags == 0) + cur_frags = 1; + + /* + * Check if the fragment limit will be + * reached or exceeded: + */ + if (cur_frags >= if_hw_tsomax_frag_count) { + max_len += min(cur_length, + if_hw_tsomax_frag_count << + if_hw_tsomax_frag_size); + break; + } + max_len += cur_length; + if_hw_tsomax_frag_count -= cur_frags; + moff = 0; + mb = mb->m_next; + } + /* * Limit a burst to t_tsomax minus IP, * TCP and options length to keep ip->ip_len * from overflowing or exceeding the maximum * length allowed by the network interface. */ - if (len > tp->t_tsomax - hdrlen) { - len = tp->t_tsomax - hdrlen; - sendalot = 1; + if (len > (u_int)max_len) { + if (max_len != 0) + sendalot = 1; + len = (u_int)max_len; } /* Modified: head/sys/ofed/drivers/net/mlx4/en_netdev.c ============================================================================== --- head/sys/ofed/drivers/net/mlx4/en_netdev.c Sat Sep 13 07:45:03 2014 (r271503) +++ head/sys/ofed/drivers/net/mlx4/en_netdev.c Sat Sep 13 08:26:09 2014 (r271504) @@ -673,6 +673,12 @@ int mlx4_en_do_start_port(struct net_dev else priv->rx_csum = 0; + /* set TSO limits so that we don't have to drop TX packets */ + dev->if_hw_tsomax = IF_HW_TSOMAX_BUILD_VALUE( + 65535 - sizeof(struct ether_vlan_header) /* bytes */, + 16 /* maximum frag count */, + 16 /* can do up to 4GByte */); + err = mlx4_wol_read(priv->mdev->dev, &config, priv->port); if (err) { en_err(priv, "Failed to get WoL info, unable to modify\n");
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201409130826.s8D8Q9Wx078339>