From owner-freebsd-stable@FreeBSD.ORG Fri May 15 08:49:15 2009 Return-Path: Delivered-To: freebsd-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id CB94E106566C for ; Fri, 15 May 2009 08:49:15 +0000 (UTC) (envelope-from pyunyh@gmail.com) Received: from rv-out-0506.google.com (rv-out-0506.google.com [209.85.198.228]) by mx1.freebsd.org (Postfix) with ESMTP id 904638FC08 for ; Fri, 15 May 2009 08:49:15 +0000 (UTC) (envelope-from pyunyh@gmail.com) Received: by rv-out-0506.google.com with SMTP id k40so1098703rvb.43 for ; Fri, 15 May 2009 01:49:15 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=gamma; h=domainkey-signature:received:received:received:from:date:to:cc :subject:message-id:reply-to:references:mime-version:content-type :content-disposition:in-reply-to:user-agent; bh=29o6Hfg6X7hStMOC2BGUeHfLE9w7u1FAxlCXr3oXeaA=; b=sgU2bd43RIIuztyQRYNMb1Khx7Kh0BW4e7yCWud8051dk/oqHitTc2kVhrvfdmDwJo tXQjdD4pskxOwNFbtK2SfcyAkYAfgo4YLMRHl+9fpVXSG0pVdaRREE0MFSeFhe3LyMOz noDzTmzLWYiTaB3OZMBv/nRKcDbRC1tQG/eqY= DomainKey-Signature: a=rsa-sha1; c=nofws; d=gmail.com; s=gamma; h=from:date:to:cc:subject:message-id:reply-to:references:mime-version :content-type:content-disposition:in-reply-to:user-agent; b=hGvWlKFU6dnaqcwNSeLNHY3aaBZyRHZkMRbMfTXtmgouJfOZfZaTA8f+wG85z9f8tj 0pBisX8BxqWfxmBsHd94uJDyFEpZrrCi7i5c5NrtWjlHcPAtiH0Z6Jsm+ND6uT7+sqqJ 8EPUOt/7j1Dr2Z2SboA81I8V85qO6y438z5hQ= Received: by 10.141.123.17 with SMTP id a17mr1126455rvn.89.1242377355255; Fri, 15 May 2009 01:49:15 -0700 (PDT) Received: from michelle.cdnetworks.co.kr ([114.111.62.249]) by mx.google.com with ESMTPS id c20sm2779209rvf.0.2009.05.15.01.49.12 (version=SSLv3 cipher=RC4-MD5); Fri, 15 May 2009 01:49:13 -0700 (PDT) Received: by michelle.cdnetworks.co.kr (sSMTP sendmail emulation); Fri, 15 May 2009 17:58:06 +0900 From: Pyun YongHyeon Date: Fri, 15 May 2009 17:58:06 +0900 To: Lars Eggert Message-ID: <20090515085806.GX65350@michelle.cdnetworks.co.kr> References: <4A09DEF1.2010202@delphij.net> <4A09FDB2.5080307@eyede.com> <20090513004131.GP65350@michelle.cdnetworks.co.kr> <20090514082750.GU65350@michelle.cdnetworks.co.kr> <310A73CC-A32D-4794-BF23-A49715AFCF99@nokia.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="YD3LsXFS42OYHhNZ" Content-Disposition: inline In-Reply-To: <310A73CC-A32D-4794-BF23-A49715AFCF99@nokia.com> User-Agent: Mutt/1.4.2.3i Cc: "d@delphij.net" , "freebsd-stable@freebsd.org" , "nigel@eyede.com" Subject: Re: TCP differences in 7.2 vs 7.1 X-BeenThere: freebsd-stable@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list Reply-To: pyunyh@gmail.com List-Id: Production branch of FreeBSD source code List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 15 May 2009 08:49:16 -0000 --YD3LsXFS42OYHhNZ Content-Type: text/plain; charset=us-ascii Content-Disposition: inline On Thu, May 14, 2009 at 11:28:43AM +0300, Lars Eggert wrote: > Hi, > > On 2009-5-14, at 11:27, Pyun YongHyeon wrote: > >Then you're seeing different problem on em(4). Last time I checked > >em(4) TSO code in em(4) didn't use m_pullup and just returned > >ENXIO to caller. I'm not sure that is related with your issue but > >would you tell us your network configuration? > > this box is a Dell 2950 server/router running 7.2-STABLE. It has an > onboard bce interface and four dual-port Intel PRO/1000 NICs, giving > it 8 em interfaces. (Let me know if you want the boot dmesg.) > > >If you can easily > >reproduce the issue would you let us know? > > Reproducing the issue is as easy as setting net.inet.tcp.tso=1. > > What's interesting is that I only see the issue on one of the eight em > interfaces. That interface is connected to a D-Link DIR-655 WLAN > router. When I tcpdump on the other interfaces with TSO enabled, I see > no "IP bad-len 0" messages. > Would you try attached patch? I'm using the patch on my development box. Originally the patch was written to address checksum offload breakage on multicast packets(r182463). However I didn't encounter TSO issue without the patch. Note, the patch was not heavily tested so it may have uncovered bugs. --YD3LsXFS42OYHhNZ Content-Type: text/x-diff; charset=us-ascii Content-Disposition: attachment; filename="em.csum_tso.patch" Index: sys/dev/e1000/if_em.c =================================================================== --- sys/dev/e1000/if_em.c (revision 192130) +++ sys/dev/e1000/if_em.c (working copy) @@ -270,7 +270,7 @@ static void em_transmit_checksum_setup(struct adapter *, struct mbuf *, u32 *, u32 *); #if __FreeBSD_version >= 700000 -static bool em_tso_setup(struct adapter *, struct mbuf *, +static void em_tso_setup(struct adapter *, struct mbuf *, u32 *, u32 *); #endif /* FreeBSD_version >= 700000 */ static void em_set_promisc(struct adapter *); @@ -369,7 +369,6 @@ #define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000) #define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024) -#define M_TSO_LEN 66 /* Allow common code without TSO */ #ifndef CSUM_TSO @@ -2104,15 +2103,78 @@ /* + * Docuemtation explicitly recommends entire header section + * to be coalesced into a single buffer and described in a + * single data descriptor. + * * TSO workaround: * If an mbuf is only header we need * to pull 4 bytes of data into it. */ - if (do_tso && (m_head->m_len <= M_TSO_LEN)) { - m_head = m_pullup(m_head, M_TSO_LEN + 4); + if (do_tso || (m_head->m_pkthdr.csum_flags & + (CSUM_IP | CSUM_TCP | CSUM_UDP)) != 0) { + struct ether_header *eh; + struct ip *ip; + struct tcphdr *tcp; + uint32_t poff; + + if (M_WRITABLE(m_head) == 0) { + m_head = m_dup(*m_headp, M_DONTWAIT); + m_freem(*m_headp); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + *m_headp = m_head; + } + + poff = sizeof(struct ether_header); + m_head = m_pullup(m_head, poff); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + eh = mtod(m_head, struct ether_header *); + if (eh->ether_type == htons(ETHERTYPE_VLAN)) { + poff = sizeof(struct ether_vlan_header); + m_head = m_pullup(m_head, poff); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } + m_head = m_pullup(m_head, poff + sizeof(struct ip)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + ip = (struct ip *)(mtod(m_head, char *) + poff); + poff += (ip->ip_hl << 2); + + if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP) != 0) { + m_head = m_pullup(m_head, poff + sizeof(struct tcphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + tcp = (struct tcphdr *)(mtod(m_head, char *) + poff); + poff += (tcp->th_off << 2); + /* Pull 4 bytes of payload into the first mbuf. */ + if (do_tso) + poff += 4; + m_head = m_pullup(m_head, poff); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } else if ((m_head->m_pkthdr.csum_flags & (CSUM_UDP)) != 0) { + m_head = m_pullup(m_head, poff + sizeof(struct udphdr)); + if (m_head == NULL) { + *m_headp = NULL; + return (ENOBUFS); + } + } *m_headp = m_head; - if (m_head == NULL) - return (ENOBUFS); } /* @@ -2143,7 +2205,7 @@ if (error == EFBIG) { struct mbuf *m; - m = m_defrag(*m_headp, M_DONTWAIT); + m = m_collapse(*m_headp, M_DONTWAIT, EM_MAX_SCATTER); if (m == NULL) { adapter->mbuf_alloc_failed++; m_freem(*m_headp); @@ -2189,9 +2251,7 @@ /* Do hardware assists */ #if __FreeBSD_version >= 700000 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { - error = em_tso_setup(adapter, m_head, &txd_upper, &txd_lower); - if (error != TRUE) - return (ENXIO); /* something foobar */ + em_tso_setup(adapter, m_head, &txd_upper, &txd_lower); /* we need to make a final sentinel transmit desc */ tso_desc = TRUE; } else @@ -3836,7 +3896,7 @@ * Setup work for hardware segmentation offload (TSO) * **********************************************************************/ -static bool +static void em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper, u32 *txd_lower) { @@ -3868,10 +3928,6 @@ ehdrlen = ETHER_HDR_LEN; } - /* Ensure we have at least the IP+TCP header in the first mbuf. */ - if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr)) - return FALSE; /* -1 */ - /* * We only support TCP for IPv4 and IPv6 (notyet) for the moment. * TODO: Support SCTP too when it hits the tree. @@ -3880,31 +3936,24 @@ case ETHERTYPE_IP: isip6 = 0; ip = (struct ip *)(mp->m_data + ehdrlen); - if (ip->ip_p != IPPROTO_TCP) - return FALSE; /* 0 */ ip->ip_len = 0; ip->ip_sum = 0; ip_hlen = ip->ip_hl << 2; - if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr)) - return FALSE; /* -1 */ th = (struct tcphdr *)((caddr_t)ip + ip_hlen); -#if 1 + /* Controller expects a psuedo checksum without TCP length. */ th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); -#else - th->th_sum = mp->m_pkthdr.csum_data; -#endif break; case ETHERTYPE_IPV6: isip6 = 1; - return FALSE; /* Not supported yet. */ + return; /* Not supported yet. */ ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); if (ip6->ip6_nxt != IPPROTO_TCP) - return FALSE; /* 0 */ + return; /* 0 */ ip6->ip6_plen = 0; ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */ if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr)) - return FALSE; /* -1 */ + return; /* -1 */ th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen); #if 0 th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst, @@ -3914,7 +3963,7 @@ #endif break; default: - return FALSE; + return; } hdr_len = ehdrlen + ip_hlen + (th->th_off << 2); @@ -3976,8 +4025,6 @@ adapter->num_tx_desc_avail--; adapter->next_avail_tx_desc = curr_txd; adapter->tx_tso = TRUE; - - return TRUE; } #endif /* __FreeBSD_version >= 700000 */ Index: sys/dev/e1000/if_em.h =================================================================== --- sys/dev/e1000/if_em.h (revision 192130) +++ sys/dev/e1000/if_em.h (working copy) @@ -231,7 +231,7 @@ #define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A) #define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B) -#define EM_MAX_SCATTER 64 +#define EM_MAX_SCATTER 32 #define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header)) #define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */ #define EM_MSIX_MASK 0x01F00000 /* For 82574 use */ --YD3LsXFS42OYHhNZ--