Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 17 Jun 2004 04:48:00 +0200
From:      Thomas Moestl <t.moestl@tu-bs.de>
To:        Pyun YongHyeon <yongari@kt-is.co.kr>
Cc:        freebsd-sparc64@freebsd.org
Subject:   Re: TCP/UDP cksum offload on hme(4)
Message-ID:  <20040617024759.GA5610@timesink.dyndns.org>
In-Reply-To: <20040616034520.GB7887@kt-is.co.kr>
References:  <20040616034520.GB7887@kt-is.co.kr>

next in thread | previous in thread | raw e-mail | index | archive | help
Hello,

On Wed, 2004/06/16 at 12:45:20 +0900, Pyun YongHyeon wrote:
>  1. UDP TX cksum offload has an issue. The hardware doesn't flip the
>     cksum bits when the computed cksum is 0x0000. I have no idea this
>     is the reason why STP2002QFP says it supports only TCP RX/TX cksum.
>     (pp. 29, pp. 40, pp. 42)

It would probably be better not to advertise CSUM_UDP by default then,
so that the checksum will never be unset unless that is desired. One
of the IFF_LINK* flags or a sysctl could be used to allow the user to
enable it anyway.

> Corrections, suggestions welcome.

Great work! I had long been contemplating to implement this, but
always was too lazy :)

I have some comments regarding the patch, mostly about minor style issues
and oversights:

> --- if_hme.c.orig	Wed Jun 16 12:16:56 2004
> +++ if_hme.c	Wed Jun 16 12:16:56 2004
> @@ -54,9 +54,15 @@
>   * maximum packet size (this is not verified). Buffers starting on odd
>   * boundaries must be mapped so that the burst can start on a natural boundary.
>   *
> - * Checksumming is not yet supported.
> + * STP2002QFP-UG says that Ethernet hardware supports TCP checksum offload.
> + * In reality, we can do the same technique for UDP datagram too. However,
> + * the hardware doesn't compensate the cksum for UDP datagram which can yield
> + * to 0x0.
> + * When the UDP datagram with cksum 0 sent to a system, it would think it
> + * received a datagram with 'no cksum'. I don't know this is compulsory reason
> + * to disable UDP cksum offload capability.
>   */
> -
> +#define HME_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP)
>  #define HMEDEBUG
>  #define	KTR_HME		KTR_CT2		/* XXX */
>  
> @@ -80,6 +86,12 @@
>  #include <net/if_media.h>
>  #include <net/if_vlan_var.h>
>  
> +#include <netinet/in.h>
> +#include <netinet/in_systm.h>
> +#include <netinet/ip.h>
> +#include <netinet/tcp.h>
> +#include <netinet/udp.h>
> +
>  #include <dev/mii/mii.h>
>  #include <dev/mii/miivar.h>
>  
> @@ -106,10 +118,12 @@
>  static void	hme_mediastatus(struct ifnet *, struct ifmediareq *);
>  
>  static int	hme_load_txmbuf(struct hme_softc *, struct mbuf *);
> -static void	hme_read(struct hme_softc *, int, int);
> +static void	hme_read(struct hme_softc *, int, int, u_int32_t);
>  static void	hme_eint(struct hme_softc *, u_int);
>  static void	hme_rint(struct hme_softc *);
>  static void	hme_tint(struct hme_softc *);
> +static void	hme_txcksum(struct mbuf *, u_int32_t *);
> +static void	hme_rxcksum(struct mbuf *, u_int32_t);
>  
>  static void	hme_cdma_callback(void *, bus_dma_segment_t *, int, int);
>  static void	hme_rxdma_callback(void *, bus_dma_segment_t *, int,
> @@ -316,11 +330,12 @@
>  	ether_ifattach(ifp, sc->sc_arpcom.ac_enaddr);
>  
>  	/*
> -	 * Tell the upper layer(s) we support long frames.
> +	 * Tell the upper layer(s) we support long frames and cksum offloads.
>  	 */
>  	ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
> -	ifp->if_capabilities |= IFCAP_VLAN_MTU;
> -	ifp->if_capenable |= IFCAP_VLAN_MTU;
> +	ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
> +	ifp->if_hwassist |= HME_CSUM_FEATURES;
> +	ifp->if_capenable |= IFCAP_VLAN_MTU | IFCAP_HWCSUM;
>  
>  	callout_init(&sc->sc_tick_ch, 0);
>  	return (0);
> @@ -656,7 +671,7 @@
>  	struct hme_softc *sc = (struct hme_softc *)xsc;
>  	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
>  	u_int8_t *ea;
> -	u_int32_t v;
> +	u_int32_t n, v;
>  
>  	/*
>  	 * Initialization sequence. The numbered steps below correspond
> @@ -740,6 +755,15 @@
>  		v = HME_SEB_CFG_BURST64;
>  		break;
>  	}
> +	/*
> +	 * Blindly setting 64bit transfers may hang PCI cards(Cheerio?).
> +	 * Allowing 64bit transfers breaks TX checksum offload as well.
> +	 * Don't know this comes from hardware bug or driver's DMAing
> +	 * scheme.
> +	 *
> +	 * if (sc->sc_pci == 0)
> + 	 *	v |= HME_SEB_CFG_64BIT;
> +	 */
>  	HME_SEB_WRITE_4(sc, HME_SEBI_CFG, v);
>  
>  	/* step 9. ETX Configuration: use mostly default values */
> @@ -775,6 +799,12 @@
>  	/* Enable DMA, fix RX first byte offset. */
>  	v &= ~HME_ERX_CFG_FBO_MASK;
>  	v |= HME_ERX_CFG_DMAENABLE | (HME_RXOFFS << HME_ERX_CFG_FBO_SHIFT);
> +	/* RX TCP/UDP cksum offset */
> +	if (ifp->if_capenable & IFCAP_TXCSUM) {
> +		n = (ETHER_HDR_LEN + sizeof(struct ip)) / 2;
> +		n = (n << HME_ERX_CFG_CSUM_SHIFT) & HME_ERX_CFG_CSUMSTART;
> +		v |= n;
> +	}

Should this test not be for the IFCAP_RXCSUM bit? Also, a minor style
nit: please add a comparison against 0 to such bit tests, e.g.:
	if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) {

>  	CTR1(KTR_HME, "hme_init: programming ERX_CFG to %x", (u_int)v);
>  	HME_ERX_WRITE_4(sc, HME_ERXI_CFG, v);
>  
> @@ -893,6 +923,55 @@
>  	    ("hme_txdma_callback: missed end of packet!"));
>  }
>  
> +/* TX TCP/UDP cksum */
> +static void
> +hme_txcksum(struct mbuf *m, u_int32_t *cflags)
> +{
> +	struct ip *ip;
> +	u_int32_t offset, offset2, csumflag;
> +	caddr_t p;
> +
> +	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
> +		offset2 = offsetof(struct tcphdr, th_sum); 
> +		csumflag = HME_XD_TCPCKSUM;
> +	} else if((m->m_pkthdr.csum_flags & CSUM_UDP)) {
> +		offset2 = offsetof(struct udphdr, uh_sum);
> +		csumflag = HME_XD_UDPCKSUM;
> +	} else
> +		return;

(It seems that csum_data contains the offset of the field that the
checksum should be written into, relative to the start of the IP
payload, for outgoing packets; this might save some code here, but
that is of course not that important.)

> +
> +	for(; m && m->m_len == 0; m = m->m_next)
> +		;
> +	if (m == NULL || m->m_len < ETHER_HDR_LEN) {
> +		printf("hme_txcksum: m_len < ETHER_HDR_LEN\n");
> +		return; /* cksum will be corrupted */
> +	}

It would probably be best to try as hard as possible to not fail
here. Since the conditions in which we would fail because of mbuf
sizes should rarely be true, maybe a good option would be to just
sacrifice some CPU time and do an m_pullup() then to rectify the mbuf
chain according to our needs (hme_txcksum would need to return a
struct mbuf * for that)?

> +	if (m->m_len < ETHER_HDR_LEN + sizeof(u_int32_t)) {
> +		if (m->m_len != ETHER_HDR_LEN) {
> +			printf("hme_txcksum: m_len != ETHER_HDR_LEN\n");
> +			return;	/* cksum will be corrupted */
> +		}
> +		/* XXX */
> +		for(m = m->m_next; m && m->m_len == 0; m = m->m_next)
> +			;
> +		if (m == NULL)
> +			return; /* cksum will be corrupted */
> +		ip = mtod(m, struct ip *);
> +	} else {
> +		p = mtod(m, caddr_t);
> +		p += ETHER_HDR_LEN;
> +		ip = (struct ip *)p;
> +	}
> +	if ((ip->ip_hl << 2) == sizeof(*ip))
> +		*cflags = csumflag;
> +	else {
> +		offset = (ip->ip_hl << 2) + ETHER_HDR_LEN;
> +		*cflags = offset << HME_XD_TXCKSUM_SSHIFT;
> +		*cflags |= ((offset + offset2) << HME_XD_TXCKSUM_OSHIFT); 
> +		*cflags |= HME_XD_TXCKSUM;
> +	}

I would suggest to always use the code in the else branch of the
if. The time needed to compute the flags is negligible, and it would
make the code a bit clearer.

> +}
> +
>  /*
>   * Routine to dma map an mbuf chain, set up the descriptor rings accordingly and
>   * start the transmission.
> @@ -905,11 +984,12 @@
>  	struct hme_txdma_arg cba;
>  	struct hme_txdesc *td;
>  	int error, si, ri;
> -	u_int32_t flags;
> +	u_int32_t flags, cflags = 0;
>  
>  	si = sc->sc_rb.rb_tdhead;
>  	if ((td = STAILQ_FIRST(&sc->sc_rb.rb_txfreeq)) == NULL)
>  		return (-1);
> +	hme_txcksum(m0, &cflags);
>  	td->htx_m = m0;
>  	cba.hta_sc = sc;
>  	cba.hta_htx = td;
> @@ -933,7 +1013,7 @@
>  	do {
>  		ri = (ri + HME_NTXDESC - 1) % HME_NTXDESC;
>  		flags = HME_XD_GETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri) |
> -		    HME_XD_OWN;
> +		    HME_XD_OWN | cflags;
>  		CTR3(KTR_HME, "hme_load_mbuf: activating ri %d, si %d (%#x)",
>  		    ri, si, flags);
>  		HME_XD_SETFLAGS(sc->sc_pci, sc->sc_rb.rb_txd, ri, flags);
> @@ -951,7 +1031,7 @@
>   * Pass a packet to the higher levels.
>   */
>  static void
> -hme_read(struct hme_softc *sc, int ix, int len)
> +hme_read(struct hme_softc *sc, int ix, int len, u_int32_t flags)
>  {
>  	struct ifnet *ifp = &sc->sc_arpcom.ac_if;
>  	struct mbuf *m;
> @@ -986,6 +1066,9 @@
>  	m->m_pkthdr.rcvif = ifp;
>  	m->m_pkthdr.len = m->m_len = len + HME_RXOFFS;
>  	m_adj(m, HME_RXOFFS);
> +	/* RX TCP/UDP cksum */
> +	if (ifp->if_capenable & IFCAP_RXCSUM)
> +		hme_rxcksum(m, flags);
>  	/* Pass the packet up. */
>  	(*ifp->if_input)(ifp, m);
>  }
> @@ -1108,6 +1191,71 @@
>  }
>  
>  /*
> + * RX TCP/UDP cksumming 
> + */
> +static void
> +hme_rxcksum(struct mbuf *m, u_int32_t flags)
> +{
> +	struct ether_header *eh;
> +	struct ip *ip;
> +	struct udphdr *uh;
> +	int32_t hlen, len, pktlen;
> +	u_int16_t cksum, *opts;
> +	u_int32_t temp32;
> +
> +	pktlen = m->m_pkthdr.len;
> +	if (pktlen < sizeof(struct ether_header))
> +		return;
> +	eh = mtod(m, struct ether_header *);
> +	if (eh->ether_type != htons(ETHERTYPE_IP))
> +		return;
> +	ip = (struct ip *)(eh + 1);
> +	if (ip->ip_v != IPVERSION)
> +		return;

For paranoia reasons, it would be nice to check whether the packet is
large enough to contain an ip header before we access its fields, too;
I guess that could be conveniently combined with the sizeof(struct
ether_header) check above.

> +
> +	hlen = ip->ip_hl << 2;
> +	pktlen -= sizeof(struct ether_header);
> +	if (hlen < sizeof(struct ip))
> +		return;
> +	if (ntohs(ip->ip_len) < hlen)
> +		return;

This test is a bit redundant with the TCP/UDP ones below.

> +	if (ntohs(ip->ip_len) != pktlen)
> +		return;
> +	if (ip->ip_off & htons(IP_MF | IP_OFFMASK))
> +		return;	/* can't handle fragmented packet */
> +
> +	switch (ip->ip_p) {
> +	case IPPROTO_TCP:
> +		if (pktlen < (hlen + sizeof(struct tcphdr)))
> +			return;
> +		break;
> +	case IPPROTO_UDP:
> +		if (pktlen < (hlen + sizeof(struct udphdr)))
> +			return;
> +		uh = (struct udphdr *)((caddr_t)ip + hlen);
> +		if (uh->uh_sum == 0)
> +			return; /* no checksum */
> +		break;
> +	default:
> +		return;
> +	}
> +
> +	cksum = ~(flags & HME_XD_RXCKSUM);
> +	/* cksum fixup for IP options */
> +	len = hlen - sizeof(struct ip);
> +	if (len > 0) {
> +		opts = (u_int16_t *)(ip + 1);
> +		for (; len > 0; len -= sizeof(u_int16_t), opts++) {
> +			temp32 = cksum - *opts;
> +			temp32 = (temp32 >> 16) + (temp32 & 65535);
> +			cksum = temp32 & 65535;
> +		}
> +	}
> +	m->m_pkthdr.csum_flags |= CSUM_DATA_VALID;
> +	m->m_pkthdr.csum_data = cksum;
> +}
> +
> +/*
>   * Receive interrupt.
>   */
>  static void
> @@ -1137,7 +1285,7 @@
>  			hme_discard_rxbuf(sc, ri);
>  		} else {
>  			len = HME_XD_DECODE_RSIZE(flags);
> -			hme_read(sc, ri, len);
> +			hme_read(sc, ri, len, flags);
>  		}
>  	}
>  	if (progress) {
> @@ -1386,6 +1534,15 @@
>  	case SIOCGIFMEDIA:
>  	case SIOCSIFMEDIA:
>  		error = ifmedia_ioctl(ifp, ifr, &sc->sc_mii->mii_media, cmd);
> +		break;
> +	case SIOCSIFCAP:
> +		ifp->if_capenable = ifr->ifr_reqcap;
> +		if (ifp->if_capenable & IFCAP_HWCSUM)
> +			ifp->if_hwassist = HME_CSUM_FEATURES;
> +		else
> +			ifp->if_hwassist = 0;
> +		if (ifp->if_flags & IFF_RUNNING)
> +			hme_init(sc);

As I understand it, if_hwassist reflects only the types of checksums
for outgoing packets that we are able to do, so it should only be set
if IFCAP_TXCSUM is enabled, not either TXCSUM or RXCSUM. The network
stack will check only if_hwassist, not if_capenable, to determine
which TX checksums it may offload.

>  		break;
>  	default:
>  		error = ether_ioctl(ifp, cmd, data);
> --- if_hme_sbus.c.orig	Wed Jun 16 12:16:56 2004
> +++ if_hme_sbus.c	Wed Jun 16 12:16:56 2004
> @@ -244,8 +244,14 @@
>  
>  	burst = sbus_get_burstsz(dev);
>  	/* Translate into plain numerical format */
> -	sc->sc_burst =  (burst & SBUS_BURST_32) ? 32 :
> -	    (burst & SBUS_BURST_16) ? 16 : 0;
> +	if ((burst & SBUS_BURST_64))
> +		sc->sc_burst = 64;
> +	else if ((burst & SBUS_BURST_32))
> +		sc->sc_burst = 32;
> +	else if ((burst & SBUS_BURST_16))
> +		sc->sc_burst = 16;
> +	else
> +		 sc->sc_burst = 0;
>  
>  	sc->sc_pci = 0;	/* XXX: should all be done in bus_dma. */
>  	sc->sc_dev = dev;
> --- if_hmereg.h.orig	Wed Jun 16 12:16:56 2004
> +++ if_hmereg.h	Wed Jun 16 12:16:56 2004
> @@ -54,8 +54,8 @@
>  #define HME_SEB_CFG_BURST16	0x00000000	/* 16 byte bursts */
>  #define HME_SEB_CFG_BURST32	0x00000001	/* 32 byte bursts */
>  #define HME_SEB_CFG_BURST64	0x00000002	/* 64 byte bursts */
> -#define HME_SEB_CFG_64BIT	0x00000004	/* ? */
> -#define HME_SEB_CFG_PARITY	0x00000008	/* ? */
> +#define HME_SEB_CFG_64BIT	0x00000004	/* extended transfer mode */
> +#define HME_SEB_CFG_PARITY	0x00000008	/* parity check for DVMA/PIO */
>  
>  #define HME_SEB_STAT_GOTFRAME	0x00000001	/* frame received */
>  #define HME_SEB_STAT_RCNTEXP	0x00000002	/* rx frame count expired */
> @@ -154,7 +154,7 @@
>  #define HME_ERXI_FIFO_WPTR	(3*4)		/* FIFO write pointer */
>  #define HME_ERXI_FIFO_SWPTR	(4*4)		/* FIFO shadow write pointer */
>  #define HME_ERXI_FIFO_RPTR	(5*4)		/* FIFO read pointer */
> -#define HME_ERXI_FIFO_SRPTR	(6*4)		/* FIFO shadow read pointer */
> +#define HME_ERXI_FIFO_PKTCNT	(6*4)		/* FIFO packet counter */
>  #define HME_ERXI_STATEMACHINE	(7*4)		/* State machine */
>  
>  /* RXI_CFG bits */
> @@ -167,6 +167,7 @@
>  #define HME_ERX_CFG_RINGSIZE256	0x00000600	/* Descriptor ring size: 256 */
>  #define HME_ERX_CFG_RINGSIZEMSK	0x00000600	/* Descriptor ring size: 256 */
>  #define HME_ERX_CFG_CSUMSTART	0x007f0000	/* cksum offset */
> +#define HME_ERX_CFG_CSUM_SHIFT	16

Please name the constants consistently in the way similar ones are
(..._CSUMSTART was alread in error), e.g. ..._CSUMSTART_MASK and
..._CSUMSTART_SHIFT, or an abbreviation thereof if that is too long.

>  
>  /*
>   * HME MAC-core register offsets
> @@ -289,7 +290,11 @@
>  #define HME_XD_RXLENMSK	0x3fff0000	/* packet length mask (rx) */
>  #define HME_XD_RXLENSHIFT	16
>  #define HME_XD_TXLENMSK	0x00003fff	/* packet length mask (tx) */
> +#define HME_XD_TXCKSUM_SSHIFT	14
> +#define HME_XD_TXCKSUM_OSHIFT	20
>  #define HME_XD_RXCKSUM	0x0000ffff	/* packet checksum (rx) */
> +#define HME_XD_TCPCKSUM 0x13288000	/* precomputed tcp cksum */
> +#define HME_XD_UDPCKSUM 0x12888000	/* precomputed udp cksum */

If these two constants are still needed (one of my suggestion above
would eliminate them), please use the shift and bit constants defined
above, together with the actual offset values, to assemble them in
order to make them more readable and seem less like "magic values".

>  /* Macros to encode/decode the receive buffer size from the flags field */
>  #define HME_XD_ENCODE_RSIZE(sz)		\


Thanks,

	- Thomas

-- 
Thomas Moestl	<t.moestl@tu-bs.de>	http://www.tu-bs.de/~y0015675/
		<tmm@FreeBSD.org>	http://people.FreeBSD.org/~tmm/
"Reality continues to ruin my life."		-- Calvin and Hobbes



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20040617024759.GA5610>