Date: Fri, 15 May 2009 17:58:06 +0900 From: Pyun YongHyeon <pyunyh@gmail.com> To: Lars Eggert <lars.eggert@nokia.com> Cc: "d@delphij.net" <d@delphij.net>, "freebsd-stable@freebsd.org" <freebsd-stable@freebsd.org>, "nigel@eyede.com" <nigel@eyede.com> Subject: Re: TCP differences in 7.2 vs 7.1 Message-ID: <20090515085806.GX65350@michelle.cdnetworks.co.kr> In-Reply-To: <310A73CC-A32D-4794-BF23-A49715AFCF99@nokia.com> References: <guccc2$8b4$1@ger.gmane.org> <4A09DEF1.2010202@delphij.net> <4A09FDB2.5080307@eyede.com> <20090513004131.GP65350@michelle.cdnetworks.co.kr> <DAF693BD-D7B0-49FA-97EF-41C1EA1FAF84@nokia.com> <20090514082750.GU65350@michelle.cdnetworks.co.kr> <310A73CC-A32D-4794-BF23-A49715AFCF99@nokia.com>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
On Thu, May 14, 2009 at 11:28:43AM +0300, Lars Eggert wrote:
> Hi,
>
> On 2009-5-14, at 11:27, Pyun YongHyeon wrote:
> >Then you're seeing different problem on em(4). Last time I checked
> >em(4) TSO code in em(4) didn't use m_pullup and just returned
> >ENXIO to caller. I'm not sure that is related with your issue but
> >would you tell us your network configuration?
>
> this box is a Dell 2950 server/router running 7.2-STABLE. It has an
> onboard bce interface and four dual-port Intel PRO/1000 NICs, giving
> it 8 em interfaces. (Let me know if you want the boot dmesg.)
>
> >If you can easily
> >reproduce the issue would you let us know?
>
> Reproducing the issue is as easy as setting net.inet.tcp.tso=1.
>
> What's interesting is that I only see the issue on one of the eight em
> interfaces. That interface is connected to a D-Link DIR-655 WLAN
> router. When I tcpdump on the other interfaces with TSO enabled, I see
> no "IP bad-len 0" messages.
>
Would you try attached patch? I'm using the patch on my development
box. Originally the patch was written to address checksum offload
breakage on multicast packets(r182463).
However I didn't encounter TSO issue without the patch. Note, the
patch was not heavily tested so it may have uncovered bugs.
[-- Attachment #2 --]
Index: sys/dev/e1000/if_em.c
===================================================================
--- sys/dev/e1000/if_em.c (revision 192130)
+++ sys/dev/e1000/if_em.c (working copy)
@@ -270,7 +270,7 @@
static void em_transmit_checksum_setup(struct adapter *, struct mbuf *,
u32 *, u32 *);
#if __FreeBSD_version >= 700000
-static bool em_tso_setup(struct adapter *, struct mbuf *,
+static void em_tso_setup(struct adapter *, struct mbuf *,
u32 *, u32 *);
#endif /* FreeBSD_version >= 700000 */
static void em_set_promisc(struct adapter *);
@@ -369,7 +369,6 @@
#define EM_TICKS_TO_USECS(ticks) ((1024 * (ticks) + 500) / 1000)
#define EM_USECS_TO_TICKS(usecs) ((1000 * (usecs) + 512) / 1024)
-#define M_TSO_LEN 66
/* Allow common code without TSO */
#ifndef CSUM_TSO
@@ -2104,15 +2103,78 @@
/*
+ * Docuemtation explicitly recommends entire header section
+ * to be coalesced into a single buffer and described in a
+ * single data descriptor.
+ *
* TSO workaround:
* If an mbuf is only header we need
* to pull 4 bytes of data into it.
*/
- if (do_tso && (m_head->m_len <= M_TSO_LEN)) {
- m_head = m_pullup(m_head, M_TSO_LEN + 4);
+ if (do_tso || (m_head->m_pkthdr.csum_flags &
+ (CSUM_IP | CSUM_TCP | CSUM_UDP)) != 0) {
+ struct ether_header *eh;
+ struct ip *ip;
+ struct tcphdr *tcp;
+ uint32_t poff;
+
+ if (M_WRITABLE(m_head) == 0) {
+ m_head = m_dup(*m_headp, M_DONTWAIT);
+ m_freem(*m_headp);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ *m_headp = m_head;
+ }
+
+ poff = sizeof(struct ether_header);
+ m_head = m_pullup(m_head, poff);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ eh = mtod(m_head, struct ether_header *);
+ if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
+ poff = sizeof(struct ether_vlan_header);
+ m_head = m_pullup(m_head, poff);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ }
+ m_head = m_pullup(m_head, poff + sizeof(struct ip));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ ip = (struct ip *)(mtod(m_head, char *) + poff);
+ poff += (ip->ip_hl << 2);
+
+ if (do_tso || (m_head->m_pkthdr.csum_flags & CSUM_TCP) != 0) {
+ m_head = m_pullup(m_head, poff + sizeof(struct tcphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ tcp = (struct tcphdr *)(mtod(m_head, char *) + poff);
+ poff += (tcp->th_off << 2);
+ /* Pull 4 bytes of payload into the first mbuf. */
+ if (do_tso)
+ poff += 4;
+ m_head = m_pullup(m_head, poff);
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ } else if ((m_head->m_pkthdr.csum_flags & (CSUM_UDP)) != 0) {
+ m_head = m_pullup(m_head, poff + sizeof(struct udphdr));
+ if (m_head == NULL) {
+ *m_headp = NULL;
+ return (ENOBUFS);
+ }
+ }
*m_headp = m_head;
- if (m_head == NULL)
- return (ENOBUFS);
}
/*
@@ -2143,7 +2205,7 @@
if (error == EFBIG) {
struct mbuf *m;
- m = m_defrag(*m_headp, M_DONTWAIT);
+ m = m_collapse(*m_headp, M_DONTWAIT, EM_MAX_SCATTER);
if (m == NULL) {
adapter->mbuf_alloc_failed++;
m_freem(*m_headp);
@@ -2189,9 +2251,7 @@
/* Do hardware assists */
#if __FreeBSD_version >= 700000
if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
- error = em_tso_setup(adapter, m_head, &txd_upper, &txd_lower);
- if (error != TRUE)
- return (ENXIO); /* something foobar */
+ em_tso_setup(adapter, m_head, &txd_upper, &txd_lower);
/* we need to make a final sentinel transmit desc */
tso_desc = TRUE;
} else
@@ -3836,7 +3896,7 @@
* Setup work for hardware segmentation offload (TSO)
*
**********************************************************************/
-static bool
+static void
em_tso_setup(struct adapter *adapter, struct mbuf *mp, u32 *txd_upper,
u32 *txd_lower)
{
@@ -3868,10 +3928,6 @@
ehdrlen = ETHER_HDR_LEN;
}
- /* Ensure we have at least the IP+TCP header in the first mbuf. */
- if (mp->m_len < ehdrlen + sizeof(struct ip) + sizeof(struct tcphdr))
- return FALSE; /* -1 */
-
/*
* We only support TCP for IPv4 and IPv6 (notyet) for the moment.
* TODO: Support SCTP too when it hits the tree.
@@ -3880,31 +3936,24 @@
case ETHERTYPE_IP:
isip6 = 0;
ip = (struct ip *)(mp->m_data + ehdrlen);
- if (ip->ip_p != IPPROTO_TCP)
- return FALSE; /* 0 */
ip->ip_len = 0;
ip->ip_sum = 0;
ip_hlen = ip->ip_hl << 2;
- if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
- return FALSE; /* -1 */
th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
-#if 1
+ /* Controller expects a psuedo checksum without TCP length. */
th->th_sum = in_pseudo(ip->ip_src.s_addr,
ip->ip_dst.s_addr, htons(IPPROTO_TCP));
-#else
- th->th_sum = mp->m_pkthdr.csum_data;
-#endif
break;
case ETHERTYPE_IPV6:
isip6 = 1;
- return FALSE; /* Not supported yet. */
+ return; /* Not supported yet. */
ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
if (ip6->ip6_nxt != IPPROTO_TCP)
- return FALSE; /* 0 */
+ return; /* 0 */
ip6->ip6_plen = 0;
ip_hlen = sizeof(struct ip6_hdr); /* XXX: no header stacking. */
if (mp->m_len < ehdrlen + ip_hlen + sizeof(struct tcphdr))
- return FALSE; /* -1 */
+ return; /* -1 */
th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
#if 0
th->th_sum = in6_pseudo(ip6->ip6_src, ip->ip6_dst,
@@ -3914,7 +3963,7 @@
#endif
break;
default:
- return FALSE;
+ return;
}
hdr_len = ehdrlen + ip_hlen + (th->th_off << 2);
@@ -3976,8 +4025,6 @@
adapter->num_tx_desc_avail--;
adapter->next_avail_tx_desc = curr_txd;
adapter->tx_tso = TRUE;
-
- return TRUE;
}
#endif /* __FreeBSD_version >= 700000 */
Index: sys/dev/e1000/if_em.h
===================================================================
--- sys/dev/e1000/if_em.h (revision 192130)
+++ sys/dev/e1000/if_em.h (working copy)
@@ -231,7 +231,7 @@
#define HW_DEBUGOUT1(S, A) if (DEBUG_HW) printf(S "\n", A)
#define HW_DEBUGOUT2(S, A, B) if (DEBUG_HW) printf(S "\n", A, B)
-#define EM_MAX_SCATTER 64
+#define EM_MAX_SCATTER 32
#define EM_TSO_SIZE (65535 + sizeof(struct ether_vlan_header))
#define EM_TSO_SEG_SIZE 4096 /* Max dma segment size */
#define EM_MSIX_MASK 0x01F00000 /* For 82574 use */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090515085806.GX65350>
