Date: Fri, 15 May 2009 17:58:06 +0900 From: Pyun YongHyeon <pyunyh@gmail.com> To: Lars Eggert <lars.eggert@nokia.com> Cc: "d@delphij.net" <d@delphij.net>, "freebsd-stable@freebsd.org" <freebsd-stable@freebsd.org>, "nigel@eyede.com" <nigel@eyede.com> Subject: Re: TCP differences in 7.2 vs 7.1 Message-ID: <20090515085806.GX65350@michelle.cdnetworks.co.kr> In-Reply-To: <310A73CC-A32D-4794-BF23-A49715AFCF99@nokia.com> References: <guccc2$8b4$1@ger.gmane.org> <4A09DEF1.2010202@delphij.net> <4A09FDB2.5080307@eyede.com> <20090513004131.GP65350@michelle.cdnetworks.co.kr> <DAF693BD-D7B0-49FA-97EF-41C1EA1FAF84@nokia.com> <20090514082750.GU65350@michelle.cdnetworks.co.kr> <310A73CC-A32D-4794-BF23-A49715AFCF99@nokia.com>
next in thread | previous in thread | raw e-mail | index | archive | help
--YD3LsXFS42OYHhNZ Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Thu, May 14, 2009 at 11:28:43AM +0300, Lars Eggert wrote: > Hi, > > On 2009-5-14, at 11:27, Pyun YongHyeon wrote: > >Then you're seeing different problem on em(4). Last time I checked > >em(4) TSO code in em(4) didn't use m_pullup and just returned > >ENXIO to caller. I'm not sure that is related with your issue but > >would you tell us your network configuration? > > this box is a Dell 2950 server/router running 7.2-STABLE. It has an > onboard bce interface and four dual-port Intel PRO/1000 NICs, giving > it 8 em interfaces. (Let me know if you want the boot dmesg.) > > >If you can easily > >reproduce the issue would you let us know? > > Reproducing the issue is as easy as setting net.inet.tcp.tso=1. > > What's interesting is that I only see the issue on one of the eight em > interfaces. That interface is connected to a D-Link DIR-655 WLAN > router. When I tcpdump on the other interfaces with TSO enabled, I see > no "IP bad-len 0" messages. > Would you try attached patch? I'm using the patch on my development box. Originally the patch was written to address checksum offload breakage on multicast packets(r182463). However I didn't encounter TSO issue without the patch. Note, the patch was not heavily tested so it may have uncovered bugs. --YD3LsXFS42OYHhNZ Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="em.csum_tso.patch" Index: sys/dev/e1000/if_em.c =================================================================== --- sys/dev/e1000/if_em.c (revision 192130) +++ sys/dev/e1000/if_em.c (working copy) @@ -270,7 +270,7 @@ static void em_transmit_checksum_setup(struct adapter *, struct mbuf *, u32 *, u32 *); #if __FreeBSD_version >= 700000 -static bool em_tso_setup(struct adapter *, struct mbuf *, +static void em_tso_setup(struct adapter *, struct mbuf *, u32 *, u32 *); #endif /* FreeBSD_version >= 700000 */ static void em_set_promisc(struct adapter *); @@ -369,7 +369,6 @@ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) -#define M_TSO_LEN 66 /* Allow common code without TSO */ #ifndef CSUM_TSO @@ -2104,15 +2103,78 @@ /* + * Docuemtation explicitly recommends entire header section + * to be coalesced into a single buffer and described in a + * single data descriptor. + * * TSO workaround: * If an mbuf is only header we need * to pull 4 bytes of data into it. */ - if (do_tso && (m_head->m_len <= M_TSO_LEN)) { - m_head = m_pullup(m_head, M_TSO_LEN + 4); + if (do_tso || (m_head->m_pkthdr.csum_flags & + (CSUM_IP | CSUM_TCP | CSUM_UDP)) != 0) { + struct ether_header *eh; + struct ip *ip; + struct tcphdr *tcp; + uint32_t poff; + + if (M_WRITABLE(m_head) == 0) { + m_head = m_dup(*m_headp, M_DONTWAIT); + m_freem(*m_headp); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + *m_headp = m_head; + } + + poff = sizeof(struct ether_header); + m_head = m_pullup(m_head, poff); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + eh = mtod(m_head, struct ether_header *); + if (eh->ether_type == htons(ETHERTYPE_VLAN)) { + poff = sizeof(struct ether_vlan_header); + m_head = m_pullup(m_head, poff); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } + m_head = m_pullup(m_head, poff + sizeof(struct ip)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + ip = (struct ip *)(mtod(m_head, char *) + poff); + poff += (ip->ip_hl << 2); + + if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP) != 0) { + m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + tcp = (struct tcphdr *)(mtod(m_head, char *) + poff); + poff += (tcp->th_off << 2); + /* Pull 4 bytes of payload into the first mbuf. */ + if (do_tso) + poff += 4; + m_head = m_pullup(m_head, poff); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } else if ((m_head->m_pkthdr.csum_flags & (CSUM_UDP)) != 0) { + m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } *m_headp = m_head; - if (m_head == NULL) - return (ENOBUFS); } /* @@ -2143,7 +2205,7 @@ if (error == EFBIG) { struct mbuf *m; - m = m_defrag(*m_headp, M_DONTWAIT); + m = m_collapse(*m_headp, M_DONTWAIT, EM_MAX_SCATTER); if (m == NULL) { adapter->mbuf_alloc_failed++; m_freem(*m_headp); @@ -2189,9 +2251,7 @@ /* Do hardware assists */ #if __FreeBSD_version >= 700000 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { - error = em_tso_setup(adapter, m_head, &txd_upper, &txd_lower); - if (error != TRUE) - return (ENXIO); /* something foobar */ + em_tso_setup(adapter, m_head, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else @@ -3836,7 +3896,7 @@ * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ -static bool +static void em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, u32 *txd_lower) { @@ -3868,10 +3928,6 @@ ehdrlen = ETHER_HDR_LEN; } - /* Ensure we have at least the IP+TCP header in the first mbuf. */ - if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr)) - return FALSE; /* -1 */ - /* * We only support TCP for IPv4 and IPv6 (notyet) for the moment. * TODO: Support SCTP too when it hits the tree. @@ -3880,31 +3936,24 @@ case ETHERTYPE_IP: isip6 = 0; ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return FALSE; /* 0 */ ip->ip_len = 0; ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; - if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr)) - return FALSE; /* -1 */ th = (struct tcphdr *)((caddr_t)ip + ip_hlen); -#if 1 + /* Controller expects a psuedo checksum without TCP length. */ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); -#else - th->th_sum = mp->m_pkthdr.csum_data; -#endif break; case ETHERTYPE_IPV6: isip6 = 1; - return FALSE; /* Not supported yet. */ + return; /* Not supported yet. */ ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); if (ip6->ip6_nxt != IPPROTO_TCP) - return FALSE; /* 0 */ + return; /* 0 */ ip6->ip6_plen = 0; ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */ if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr)) - return FALSE; /* -1 */ + return; /* -1 */ th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); #if 0 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst, @@ -3914,7 +3963,7 @@ #endif break; default: - return FALSE; + return; } hdr_len = ehdrlen + ip_hlen + (th->th_off << 2); @@ -3976,8 +4025,6 @@ adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc = curr_txd; adapter->tx_tso = TRUE; - - return TRUE; } #endif /* __FreeBSD_version >= 700000 */ Index: sys/dev/e1000/if_em.h =================================================================== --- sys/dev/e1000/if_em.h (revision 192130) +++ sys/dev/e1000/if_em.h (working copy) @@ -231,7 +231,7 @@ #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) -#define EM_MAX_SCATTER 64 +#define EM_MAX_SCATTER 32 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ --YD3LsXFS42OYHhNZ--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090515085806.GX65350>