Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 17 Jun 2004 15:30:22 +0900
From:      Pyun YongHyeon <yongari@kt-is.co.kr>
To:        Thomas Moestl <t.moestl@tu-bs.de>
Cc:        freebsd-sparc64@freebsd.org
Subject:   Re: TCP/UDP cksum offload on hme(4)
Message-ID:  <20040617063022.GA11797@kt-is.co.kr>
In-Reply-To: <20040617024759.GA5610@timesink.dyndns.org>
References:  <20040616034520.GB7887@kt-is.co.kr> <20040617024759.GA5610@timesink.dyndns.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--9jxsPFA5p3P2qPhR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Thu, Jun 17, 2004 at 04:48:00AM +0200, Thomas Moestl wrote:
 > Hello,
 > 
 > On Wed, 2004/06/16 at 12:45:20 +0900, Pyun YongHyeon wrote:
 > >  1. UDP TX cksum offload has an issue. The hardware doesn't flip the
 > >     cksum bits when the computed cksum is 0x0000. I have no idea this
 > >     is the reason why STP2002QFP says it supports only TCP RX/TX cksum.
 > >     (pp. 29, pp. 40, pp. 42)
 > 
 > It would probably be better not to advertise CSUM_UDP by default then,
 > so that the checksum will never be unset unless that is desired. One
 > of the IFF_LINK* flags or a sysctl could be used to allow the user to
 > enable it anyway.
 > 
Ok. Patch was modified to use IFF_LINK0. Now UDP TX cksumming is
disabled by default.

 > > +
 > > +	for(; m && m->m_len == 0; m = m->m_next)
 > > +		;
 > > +	if (m == NULL || m->m_len < ETHER_HDR_LEN) {
 > > +		printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
 > > +		return; /* cksum will be corrupted */
 > > +	}
 > 
 > It would probably be best to try as hard as possible to not fail
 > here. Since the conditions in which we would fail because of mbuf
 > sizes should rarely be true, maybe a good option would be to just
 > sacrifice some CPU time and do an m_pullup() then to rectify the mbuf
 > chain according to our needs (hme_txcksum would need to return a
 > struct mbuf * for that)?
 > 
Because we will have ALTQ feature in near future, the driver
can't call IF_PREPEND anymore. So if m_pullup() failed it would
be dropped. I thought, we may not encounter such mbuf fragmentation
in real environments.(I didn't see any of these fragment condition
on my setup.)
If we use m_pullup() it will make ALTQ-enabled hme driver useless.

 > > +
 > > +	hlen = ip->ip_hl << 2;
 > > +	pktlen -= sizeof(struct ether_header);
 > > +	if (hlen < sizeof(struct ip))
 > > +		return;
 > > +	if (ntohs(ip->ip_len) < hlen)
 > > +		return;
 > 
 > This test is a bit redundant with the TCP/UDP ones below.
 > 
When we have a packet that contains IP header with options plus some
corrupted TCP/UDP header(i.e. less than the size of the header),
should the check be performed?

 > > +	if (ntohs(ip->ip_len) != pktlen)
 > > +		return;
 > > +	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
 > > +		return;	/* can't handle fragmented packet */
 > > +
 > > +	switch (ip->ip_p) {
 > > +	case IPPROTO_TCP:
 > > +		if (pktlen < (hlen + sizeof(struct tcphdr)))
 > > +			return;
 > > +		break;
 > > +	case IPPROTO_UDP:
 > > +		if (pktlen < (hlen + sizeof(struct udphdr)))
 > > +			return;
 > > +		uh = (struct udphdr *)((caddr_t)ip + hlen);
 > > +		if (uh->uh_sum == 0)
 > > +			return; /* no checksum */
 > > +		break;
 > > +	default:
 > > +		return;
 > > +	}

Thank you very much for your detailed explanation and suggestions.
New patch is available at:
http://www.kr.freebsd.org/~yongari/hme.freebsd.diff2

Thanks again.
Rgeards,
Pyun YongHyeon
-- 
Pyun YongHyeon <http://www.kr.freebsd.org/~yongari>;

--9jxsPFA5p3P2qPhR
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="hme.freebsd.diff2"

--- if_hme.c.orig	Tue Jun 15 17:05:24 2004
+++ if_hme.c	Thu Jun 17 14:55:36 2004
@@ -54,9 +54,15 @@
  * maximum packet size (this is not verified). Buffers starting on odd
  * boundaries must be mapped so that the burst can start on a natural boundary.
  *
- * Checksumming is not yet supported.
+ * STP2002QFP-UG says that Ethernet hardware supports TCP checksum offload.
+ * In reality, we can do the same technique for UDP datagram too. However,
+ * the hardware doesn't compensate the cksum for UDP datagram which can yield
+ * to 0x0.
+ * When the UDP datagram with cksum 0 sent to a system, it would think it
+ * received a datagram with 'no cksum'. I don't know this is compulsory reason
+ * to disable UDP cksum offload capability.
  */
-
+#define HME_CSUM_FEATURES	(CSUM_TCP)
 #define HMEDEBUG
 #define	KTR_HME		KTR_CT2		/* XXX */
 
@@ -80,6 +86,12 @@
 #include <net/if_media.h>
 #include <net/if_vlan_var.h>
 
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 
@@ -106,10 +118,12 @@
 static void	hme_mediastatus(struct ifnet *, struct ifmediareq *);
 
 static int	hme_load_txmbuf(struct hme_softc *, struct mbuf *);
-static void	hme_read(struct hme_softc *, int, int);
+static void	hme_read(struct hme_softc *, int, int, u_int32_t);
 static void	hme_eint(struct hme_softc *, u_int);
 static void	hme_rint(struct hme_softc *);
 static void	hme_tint(struct hme_softc *);
+static void	hme_txcksum(struct mbuf *, u_int32_t *);
+static void	hme_rxcksum(struct mbuf *, u_int32_t);
 
 static void	hme_cdma_callback(void *, bus_dma_segment_t *, int, int);
 static void	hme_rxdma_callback(void *, bus_dma_segment_t *, int,
@@ -120,6 +134,7 @@
 devclass_t hme_devclass;
 
 static int hme_nerr;
+static int hme_csum_features = HME_CSUM_FEATURES;
 
 DRIVER_MODULE(miibus, hme, miibus_driver, miibus_devclass, 0, 0);
 MODULE_DEPEND(hme, miibus, 1, 1, 1);
@@ -316,11 +331,12 @@
 	ether_ifattach(ifp, sc->sc_arpcom.ac_enaddr);
 
 	/*
-	 * Tell the upper layer(s) we support long frames.
+	 * Tell the upper layer(s) we support long frames and cksum offloads.
 	 */
 	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
-	ifp->if_capabilities |= IFCAP_VLAN_MTU;
-	ifp->if_capenable |= IFCAP_VLAN_MTU;
+	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
+	ifp->if_hwassist |= hme_csum_features;
+	ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
 
 	callout_init(&sc->sc_tick_ch, 0);
 	return (0);
@@ -656,7 +672,7 @@
 	struct hme_softc *sc = (struct hme_softc *)xsc;
 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
 	u_int8_t *ea;
-	u_int32_t v;
+	u_int32_t n, v;
 
 	/*
 	 * Initialization sequence. The numbered steps below correspond
@@ -740,6 +756,15 @@
 		v = HME_SEB_CFG_BURST64;
 		break;
 	}
+	/*
+	 * Blindly setting 64bit transfers may hang PCI cards(Cheerio?).
+	 * Allowing 64bit transfers breaks TX checksum offload as well.
+	 * Don't know this comes from hardware bug or driver's DMAing
+	 * scheme.
+	 *
+	 * if (sc->sc_pci == 0)
+ 	 *	v |= HME_SEB_CFG_64BIT;
+	 */
 	HME_SEB_WRITE_4(sc, HME_SEBI_CFG, v);
 
 	/* step 9. ETX Configuration: use mostly default values */
@@ -775,6 +800,10 @@
 	/* Enable DMA, fix RX first byte offset. */
 	v &= ~HME_ERX_CFG_FBO_MASK;
 	v |= HME_ERX_CFG_DMAENABLE | (HME_RXOFFS << HME_ERX_CFG_FBO_SHIFT);
+	/* RX TCP/UDP cksum offset */
+	n = (ETHER_HDR_LEN + sizeof(struct ip)) / 2;
+	n = (n << HME_ERX_CFG_CSUMSTART_SHIFT) & HME_ERX_CFG_CSUMSTART_MASK;
+	v |= n;
 	CTR1(KTR_HME, "hme_init: programming ERX_CFG to %x", (u_int)v);
 	HME_ERX_WRITE_4(sc, HME_ERXI_CFG, v);
 
@@ -893,6 +922,46 @@
 	    ("hme_txdma_callback: missed end of packet!"));
 }
 
+/* TX TCP/UDP cksum */
+static void
+hme_txcksum(struct mbuf *m, u_int32_t *cflags)
+{
+	struct ip *ip;
+	u_int32_t offset, offset2;
+	caddr_t p;
+
+	if ((m->m_pkthdr.csum_flags & hme_csum_features) == 0)
+		return;
+
+	for(; m && m->m_len == 0; m = m->m_next)
+		;
+	if (m == NULL || m->m_len < ETHER_HDR_LEN) {
+		printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
+		return; /* cksum will be corrupted */
+	}
+	if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) {
+		if (m->m_len != ETHER_HDR_LEN) {
+			printf("hme_txcksum: m_len != ETHER_HDR_LEN\n");
+			return;	/* cksum will be corrupted */
+		}
+		/* XXX */
+		for(m = m->m_next; m && m->m_len == 0; m = m->m_next)
+			;
+		if (m == NULL)
+			return; /* cksum will be corrupted */
+		ip = mtod(m, struct ip *);
+	} else {
+		p = mtod(m, caddr_t);
+		p += ETHER_HDR_LEN;
+		ip = (struct ip *)p;
+	}
+	offset2 = m->m_pkthdr.csum_data;
+	offset = (ip->ip_hl << 2) + ETHER_HDR_LEN;
+	*cflags = offset << HME_XD_TXCKSUM_SSHIFT;
+	*cflags |= ((offset + offset2) << HME_XD_TXCKSUM_OSHIFT); 
+	*cflags |= HME_XD_TXCKSUM;
+}
+
 /*
  * Routine to dma map an mbuf chain, set up the descriptor rings accordingly and
  * start the transmission.
@@ -905,11 +974,12 @@
 	struct hme_txdma_arg cba;
 	struct hme_txdesc *td;
 	int error, si, ri;
-	u_int32_t flags;
+	u_int32_t flags, cflags = 0;
 
 	si = sc->sc_rb.rb_tdhead;
 	if ((td = STAILQ_FIRST(&sc->sc_rb.rb_txfreeq)) == NULL)
 		return (-1);
+	hme_txcksum(m0, &cflags);
 	td->htx_m = m0;
 	cba.hta_sc = sc;
 	cba.hta_htx = td;
@@ -933,7 +1003,7 @@
 	do {
 		ri = (ri + HME_NTXDESC - 1) % HME_NTXDESC;
 		flags = HME_XD_GETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri) |
-		    HME_XD_OWN;
+		    HME_XD_OWN | cflags;
 		CTR3(KTR_HME, "hme_load_mbuf: activating ri %d, si %d (%#x)",
 		    ri, si, flags);
 		HME_XD_SETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri, flags);
@@ -951,7 +1021,7 @@
  * Pass a packet to the higher levels.
  */
 static void
-hme_read(struct hme_softc *sc, int ix, int len)
+hme_read(struct hme_softc *sc, int ix, int len, u_int32_t flags)
 {
 	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
 	struct mbuf *m;
@@ -986,6 +1056,9 @@
 	m->m_pkthdr.rcvif = ifp;
 	m->m_pkthdr.len = m->m_len = len + HME_RXOFFS;
 	m_adj(m, HME_RXOFFS);
+	/* RX TCP/UDP cksum */
+	if (ifp->if_capenable & IFCAP_RXCSUM)
+		hme_rxcksum(m, flags);
 	/* Pass the packet up. */
 	(*ifp->if_input)(ifp, m);
 }
@@ -1108,6 +1181,71 @@
 }
 
 /*
+ * RX TCP/UDP cksumming 
+ */
+static void
+hme_rxcksum(struct mbuf *m, u_int32_t flags)
+{
+	struct ether_header *eh;
+	struct ip *ip;
+	struct udphdr *uh;
+	int32_t hlen, len, pktlen;
+	u_int16_t cksum, *opts;
+	u_int32_t temp32;
+
+	pktlen = m->m_pkthdr.len;
+	if (pktlen < sizeof(struct ether_header) + sizeof(struct ip))
+		return;
+	eh = mtod(m, struct ether_header *);
+	if (eh->ether_type != htons(ETHERTYPE_IP))
+		return;
+	ip = (struct ip *)(eh + 1);
+	if (ip->ip_v != IPVERSION)
+		return;
+
+	hlen = ip->ip_hl << 2;
+	pktlen -= sizeof(struct ether_header);
+	if (hlen < sizeof(struct ip))
+		return;
+	if (ntohs(ip->ip_len) < hlen)
+		return;
+	if (ntohs(ip->ip_len) != pktlen)
+		return;
+	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
+		return;	/* can't handle fragmented packet */
+
+	switch (ip->ip_p) {
+	case IPPROTO_TCP:
+		if (pktlen < (hlen + sizeof(struct tcphdr)))
+			return;
+		break;
+	case IPPROTO_UDP:
+		if (pktlen < (hlen + sizeof(struct udphdr)))
+			return;
+		uh = (struct udphdr *)((caddr_t)ip + hlen);
+		if (uh->uh_sum == 0)
+			return; /* no checksum */
+		break;
+	default:
+		return;
+	}
+
+	cksum = ~(flags & HME_XD_RXCKSUM);
+	/* cksum fixup for IP options */
+	len = hlen - sizeof(struct ip);
+	if (len > 0) {
+		opts = (u_int16_t *)(ip + 1);
+		for (; len > 0; len -= sizeof(u_int16_t), opts++) {
+			temp32 = cksum - *opts;
+			temp32 = (temp32 >> 16) + (temp32 & 65535);
+			cksum = temp32 & 65535;
+		}
+	}
+	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
+	m->m_pkthdr.csum_data = cksum;
+}
+
+/*
  * Receive interrupt.
  */
 static void
@@ -1137,7 +1275,7 @@
 			hme_discard_rxbuf(sc, ri);
 		} else {
 			len = HME_XD_DECODE_RSIZE(flags);
-			hme_read(sc, ri, len);
+			hme_read(sc, ri, len, flags);
 		}
 	}
 	if (progress) {
@@ -1373,6 +1511,10 @@
 			 */
 			hme_init(sc);
 		}
+		if ((ifp->if_flags & IFF_LINK0) != 0)
+			hme_csum_features |= CSUM_UDP;
+		else
+			hme_csum_features &= ~CSUM_UDP;
 #ifdef HMEDEBUG
 		sc->sc_debug = (ifp->if_flags & IFF_DEBUG) != 0 ? 1 : 0;
 #endif
@@ -1386,6 +1528,13 @@
 	case SIOCGIFMEDIA:
 	case SIOCSIFMEDIA:
 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
+		break;
+	case SIOCSIFCAP:
+		ifp->if_capenable = ifr->ifr_reqcap;
+		if (ifp->if_capenable & IFCAP_TXCSUM)
+			ifp->if_hwassist = hme_csum_features;
+		else
+			ifp->if_hwassist = 0;
 		break;
 	default:
 		error = ether_ioctl(ifp, cmd, data);
--- if_hme_sbus.c.orig	Tue Jun 15 17:05:24 2004
+++ if_hme_sbus.c	Tue Jun 15 17:05:24 2004
@@ -244,8 +244,14 @@
 
 	burst = sbus_get_burstsz(dev);
 	/* Translate into plain numerical format */
-	sc->sc_burst =  (burst & SBUS_BURST_32) ? 32 :
-	    (burst & SBUS_BURST_16) ? 16 : 0;
+	if ((burst & SBUS_BURST_64))
+		sc->sc_burst = 64;
+	else if ((burst & SBUS_BURST_32))
+		sc->sc_burst = 32;
+	else if ((burst & SBUS_BURST_16))
+		sc->sc_burst = 16;
+	else
+		 sc->sc_burst = 0;
 
 	sc->sc_pci = 0;	/* XXX: should all be done in bus_dma. */
 	sc->sc_dev = dev;
--- if_hmereg.h.orig	Tue Jun 15 17:05:24 2004
+++ if_hmereg.h	Thu Jun 17 14:23:02 2004
@@ -54,8 +54,8 @@
 #define HME_SEB_CFG_BURST16	0x00000000	/* 16 byte bursts */
 #define HME_SEB_CFG_BURST32	0x00000001	/* 32 byte bursts */
 #define HME_SEB_CFG_BURST64	0x00000002	/* 64 byte bursts */
-#define HME_SEB_CFG_64BIT	0x00000004	/* ? */
-#define HME_SEB_CFG_PARITY	0x00000008	/* ? */
+#define HME_SEB_CFG_64BIT	0x00000004	/* extended transfer mode */
+#define HME_SEB_CFG_PARITY	0x00000008	/* parity check for DVMA/PIO */
 
 #define HME_SEB_STAT_GOTFRAME	0x00000001	/* frame received */
 #define HME_SEB_STAT_RCNTEXP	0x00000002	/* rx frame count expired */
@@ -154,7 +154,7 @@
 #define HME_ERXI_FIFO_WPTR	(3*4)		/* FIFO write pointer */
 #define HME_ERXI_FIFO_SWPTR	(4*4)		/* FIFO shadow write pointer */
 #define HME_ERXI_FIFO_RPTR	(5*4)		/* FIFO read pointer */
-#define HME_ERXI_FIFO_SRPTR	(6*4)		/* FIFO shadow read pointer */
+#define HME_ERXI_FIFO_PKTCNT	(6*4)		/* FIFO packet counter */
 #define HME_ERXI_STATEMACHINE	(7*4)		/* State machine */
 
 /* RXI_CFG bits */
@@ -166,7 +166,8 @@
 #define HME_ERX_CFG_RINGSIZE128	0x00000400	/* Descriptor ring size: 128 */
 #define HME_ERX_CFG_RINGSIZE256	0x00000600	/* Descriptor ring size: 256 */
 #define HME_ERX_CFG_RINGSIZEMSK	0x00000600	/* Descriptor ring size: 256 */
-#define HME_ERX_CFG_CSUMSTART	0x007f0000	/* cksum offset */
+#define HME_ERX_CFG_CSUMSTART_MASK 0x007f0000	/* cksum offset mask */
+#define HME_ERX_CFG_CSUMSTART_SHIFT	16
 
 /*
  * HME MAC-core register offsets
@@ -289,6 +290,8 @@
 #define HME_XD_RXLENMSK	0x3fff0000	/* packet length mask (rx) */
 #define HME_XD_RXLENSHIFT	16
 #define HME_XD_TXLENMSK	0x00003fff	/* packet length mask (tx) */
+#define HME_XD_TXCKSUM_SSHIFT	14
+#define HME_XD_TXCKSUM_OSHIFT	20
 #define HME_XD_RXCKSUM	0x0000ffff	/* packet checksum (rx) */
 
 /* Macros to encode/decode the receive buffer size from the flags field */

--9jxsPFA5p3P2qPhR--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20040617063022.GA11797>