Date: Wed, 22 Jun 2011 00:48:14 +0000 (UTC) From: Pyun YongHyeon <yongari@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r223394 - stable/8/sys/dev/msk Message-ID: <201106220048.p5M0mEct024285@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: yongari Date: Wed Jun 22 00:48:13 2011 New Revision: 223394 URL: http://svn.freebsd.org/changeset/base/223394 Log: MFC r205091,216860: r205091: Implement Rx checksum offloading for Yukon EC, Yukon Ultra, Yukon FE and Yukon Ultra2. These controllers provide very simple checksum computation mechanism and it requires additional pseudo header checksum computation in upper stack. Even though I couldn't see much performance difference with/without Rx checksum offloading it may help notebook based controllers. Actually controller can compute two checksum value by giving different starting position of checksum computation on received frame. However, for long time, Marvell's checksum offloading engine have been known to have several silicon bugs so don't blindly trust computed partial checksum value. Instead, compute partial checksum twice by giving the same checksum computation position and compare the result. If the value is different it's clear indication of hardware bug. This configuration lose IP checksum offloading capability but I think it's better to take safe route. Note, Rx checksum offloading for Yukon XL was still disabled due to known silicon bug. r216860: Fix endianness bug introduced in r205091. After controller updates control word in a RX LE, driver converts it to host byte order. The checksum value in the control word is stored in big endian form by controller. r205091 didn't account for the host byte order conversion such that the checksum value was incorrectly interpreted on big endian architectures which in turn made all TCP/UDP frames dropped. Make RX checksum offload work on any architectures by swapping the checksum value. Reported by: Sreekanth M. ( kanthms <> netlogicmicro dot com ) Tested by: Sreekanth M. ( kanthms <> netlogicmicro dot com ) Modified: stable/8/sys/dev/msk/if_msk.c stable/8/sys/dev/msk/if_mskreg.h Directory Properties: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) Modified: stable/8/sys/dev/msk/if_msk.c ============================================================================== --- stable/8/sys/dev/msk/if_msk.c Wed Jun 22 00:39:36 2011 (r223393) +++ stable/8/sys/dev/msk/if_msk.c Wed Jun 22 00:48:13 2011 (r223394) @@ -270,6 +270,7 @@ static void msk_intr_hwerr(struct msk_so #ifndef __NO_STRICT_ALIGNMENT static __inline void msk_fixup_rx(struct mbuf *); #endif +static __inline void msk_rxcsum(struct msk_if_softc *, uint32_t, struct mbuf *); static void msk_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int); static void msk_jumbo_rxeof(struct msk_if_softc *, uint32_t, uint32_t, int); static void msk_txeof(struct msk_if_softc *, int); @@ -294,6 +295,7 @@ static int msk_txrx_dma_alloc(struct msk static int msk_rx_dma_jalloc(struct msk_if_softc *); static void msk_txrx_dma_free(struct msk_if_softc *); static void msk_rx_dma_jfree(struct msk_if_softc *); +static int msk_rx_fill(struct msk_if_softc *, int); static int msk_init_rx_ring(struct msk_if_softc *); static int msk_init_jumbo_rx_ring(struct msk_if_softc *); static void msk_init_tx_ring(struct msk_if_softc *); @@ -642,6 +644,54 @@ msk_setvlan(struct msk_if_softc *sc_if, } static int +msk_rx_fill(struct msk_if_softc *sc_if, int jumbo) +{ + uint16_t idx; + int i; + + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && + (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { + /* Wait until controller executes OP_TCPSTART command. */ + for (i = 10; i > 0; i--) { + DELAY(10); + idx = CSR_READ_2(sc_if->msk_softc, + Y2_PREF_Q_ADDR(sc_if->msk_rxq, + PREF_UNIT_GET_IDX_REG)); + if (idx != 0) + break; + } + if (i == 0) { + device_printf(sc_if->msk_if_dev, + "prefetch unit stuck?\n"); + return (ETIMEDOUT); + } + /* + * Fill consumed LE with free buffer. This can be done + * in Rx handler but we don't want to add special code + * in fast handler. + */ + if (jumbo > 0) { + if (msk_jumbo_newbuf(sc_if, 0) != 0) + return (ENOBUFS); + bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag, + sc_if->msk_cdata.msk_jumbo_rx_ring_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + } else { + if (msk_newbuf(sc_if, 0) != 0) + return (ENOBUFS); + bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag, + sc_if->msk_cdata.msk_rx_ring_map, + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); + } + sc_if->msk_cdata.msk_rx_prod = 0; + CSR_WRITE_2(sc_if->msk_softc, + Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), + sc_if->msk_cdata.msk_rx_prod); + } + return (0); +} + +static int msk_init_rx_ring(struct msk_if_softc *sc_if) { struct msk_ring_data *rd; @@ -657,7 +707,21 @@ msk_init_rx_ring(struct msk_if_softc *sc rd = &sc_if->msk_rdata; bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT); prod = sc_if->msk_cdata.msk_rx_prod; - for (i = 0; i < MSK_RX_RING_CNT; i++) { + i = 0; + /* Have controller know how to compute Rx checksum. */ + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && + (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { + rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; + rxd->rx_m = NULL; + rxd->rx_le = &rd->msk_rx_ring[prod]; + rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | + ETHER_HDR_LEN); + rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); + MSK_INC(prod, MSK_RX_RING_CNT); + MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT); + i++; + } + for (; i < MSK_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_rx_ring[prod]; @@ -675,7 +739,8 @@ msk_init_rx_ring(struct msk_if_softc *sc CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod); - + if (msk_rx_fill(sc_if, 0) != 0) + return (ENOBUFS); return (0); } @@ -696,7 +761,21 @@ msk_init_jumbo_rx_ring(struct msk_if_sof bzero(rd->msk_jumbo_rx_ring, sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT); prod = sc_if->msk_cdata.msk_rx_prod; - for (i = 0; i < MSK_JUMBO_RX_RING_CNT; i++) { + i = 0; + /* Have controller know how to compute Rx checksum. */ + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && + (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) { + rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; + rxd->rx_m = NULL; + rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; + rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 | + ETHER_HDR_LEN); + rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER); + MSK_INC(prod, MSK_JUMBO_RX_RING_CNT); + MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT); + i++; + } + for (; i < MSK_JUMBO_RX_RING_CNT; i++) { rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod]; rxd->rx_m = NULL; rxd->rx_le = &rd->msk_jumbo_rx_ring[prod]; @@ -713,7 +792,8 @@ msk_init_jumbo_rx_ring(struct msk_if_sof CSR_WRITE_2(sc_if->msk_softc, Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG), sc_if->msk_cdata.msk_rx_prod); - + if (msk_rx_fill(sc_if, 1) != 0) + return (ENOBUFS); return (0); } @@ -922,7 +1002,7 @@ msk_ioctl(struct ifnet *ifp, u_long comm struct msk_if_softc *sc_if; struct ifreq *ifr; struct mii_data *mii; - int error, mask; + int error, mask, reinit; sc_if = ifp->if_softc; ifr = (struct ifreq *)data; @@ -981,6 +1061,7 @@ msk_ioctl(struct ifnet *ifp, u_long comm error = ifmedia_ioctl(ifp, ifr, &mii->mii_media, command); break; case SIOCSIFCAP: + reinit = 0; MSK_IF_LOCK(sc_if); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if ((mask & IFCAP_TXCSUM) != 0 && @@ -992,8 +1073,11 @@ msk_ioctl(struct ifnet *ifp, u_long comm ifp->if_hwassist &= ~MSK_CSUM_FEATURES; } if ((mask & IFCAP_RXCSUM) != 0 && - (IFCAP_RXCSUM & ifp->if_capabilities) != 0) + (IFCAP_RXCSUM & ifp->if_capabilities) != 0) { ifp->if_capenable ^= IFCAP_RXCSUM; + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0) + reinit = 1; + } if ((mask & IFCAP_VLAN_HWCSUM) != 0 && (IFCAP_VLAN_HWCSUM & ifp->if_capabilities) != 0) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; @@ -1021,8 +1105,11 @@ msk_ioctl(struct ifnet *ifp, u_long comm ifp->if_hwassist &= ~(MSK_CSUM_FEATURES | CSUM_TSO); ifp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); } - VLAN_CAPABILITIES(ifp); + if (reinit > 0 && (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + msk_init_locked(sc_if); + } MSK_IF_UNLOCK(sc_if); break; default: @@ -1485,23 +1572,14 @@ msk_attach(device_t dev) if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - /* - * IFCAP_RXCSUM capability is intentionally disabled as the hardware - * has serious bug in Rx checksum offload for all Yukon II family - * hardware. It seems there is a workaround to make it work somtimes. - * However, the workaround also have to check OP code sequences to - * verify whether the OP code is correct. Sometimes it should compute - * IP/TCP/UDP checksum in driver in order to verify correctness of - * checksum computed by hardware. If you have to compute checksum - * with software to verify the hardware's checksum why have hardware - * compute the checksum? I think there is no reason to spend time to - * make Rx checksum offload work on Yukon II hardware. - */ ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO4; /* - * Enable Rx checksum offloading if controller support new - * descriptor format. + * Enable Rx checksum offloading if controller supports + * new descriptor formant and controller is not Yukon XL. */ + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && + sc->msk_hw_id != CHIP_ID_YUKON_XL) + ifp->if_capabilities |= IFCAP_RXCSUM; if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0 && (sc_if->msk_flags & MSK_FLAG_NORX_CSUM) == 0) ifp->if_capabilities |= IFCAP_RXCSUM; @@ -2919,6 +2997,96 @@ msk_fixup_rx(struct mbuf *m) } #endif +static __inline void +msk_rxcsum(struct msk_if_softc *sc_if, uint32_t control, struct mbuf *m) +{ + struct ether_header *eh; + struct ip *ip; + struct udphdr *uh; + int32_t hlen, len, pktlen, temp32; + uint16_t csum, *opts; + + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) != 0) { + if ((control & (CSS_IPV4 | CSS_IPFRAG)) == CSS_IPV4) { + m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; + if ((control & CSS_IPV4_CSUM_OK) != 0) + m->m_pkthdr.csum_flags |= CSUM_IP_VALID; + if ((control & (CSS_TCP | CSS_UDP)) != 0 && + (control & (CSS_TCPUDP_CSUM_OK)) != 0) { + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | + CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } + } + return; + } + /* + * Marvell Yukon controllers that support OP_RXCHKS has known + * to have various Rx checksum offloading bugs. These + * controllers can be configured to compute simple checksum + * at two different positions. So we can compute IP and TCP/UDP + * checksum at the same time. We intentionally have controller + * compute TCP/UDP checksum twice by specifying the same + * checksum start position and compare the result. If the value + * is different it would indicate the hardware logic was wrong. + */ + if ((sc_if->msk_csum & 0xFFFF) != (sc_if->msk_csum >> 16)) { + if (bootverbose) + device_printf(sc_if->msk_if_dev, + "Rx checksum value mismatch!\n"); + return; + } + pktlen = m->m_pkthdr.len; + if (pktlen < sizeof(struct ether_header) + sizeof(struct ip)) + return; + eh = mtod(m, struct ether_header *); + if (eh->ether_type != htons(ETHERTYPE_IP)) + return; + ip = (struct ip *)(eh + 1); + if (ip->ip_v != IPVERSION) + return; + + hlen = ip->ip_hl << 2; + pktlen -= sizeof(struct ether_header); + if (hlen < sizeof(struct ip)) + return; + if (ntohs(ip->ip_len) < hlen) + return; + if (ntohs(ip->ip_len) != pktlen) + return; + if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) + return; /* can't handle fragmented packet. */ + + switch (ip->ip_p) { + case IPPROTO_TCP: + if (pktlen < (hlen + sizeof(struct tcphdr))) + return; + break; + case IPPROTO_UDP: + if (pktlen < (hlen + sizeof(struct udphdr))) + return; + uh = (struct udphdr *)((caddr_t)ip + hlen); + if (uh->uh_sum == 0) + return; /* no checksum */ + break; + default: + return; + } + csum = bswap16(sc_if->msk_csum & 0xFFFF); + /* Checksum fixup for IP options. */ + len = hlen - sizeof(struct ip); + if (len > 0) { + opts = (uint16_t *)(ip + 1); + for (; len > 0; len -= sizeof(uint16_t), opts++) { + temp32 = csum - *opts; + temp32 = (temp32 >> 16) + (temp32 & 65535); + csum = temp32 & 65535; + } + } + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID; + m->m_pkthdr.csum_data = csum; +} + static void msk_rxeof(struct msk_if_softc *sc_if, uint32_t status, uint32_t control, int len) @@ -2973,18 +3141,8 @@ msk_rxeof(struct msk_if_softc *sc_if, ui msk_fixup_rx(m); #endif ifp->if_ipackets++; - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0 && - (control & (CSS_IPV4 | CSS_IPFRAG)) == CSS_IPV4) { - m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; - if ((control & CSS_IPV4_CSUM_OK) != 0) - m->m_pkthdr.csum_flags |= CSUM_IP_VALID; - if ((control & (CSS_TCP | CSS_UDP)) != 0 && - (control & (CSS_TCPUDP_CSUM_OK)) != 0) { - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | - CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; - } - } + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + msk_rxcsum(sc_if, control, m); /* Check for VLAN tagged packets. */ if ((status & GMR_FS_VLAN) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { @@ -3043,18 +3201,8 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_ msk_fixup_rx(m); #endif ifp->if_ipackets++; - if ((ifp->if_capenable & IFCAP_RXCSUM) != 0 && - (control & (CSS_IPV4 | CSS_IPFRAG)) == CSS_IPV4) { - m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; - if ((control & CSS_IPV4_CSUM_OK) != 0) - m->m_pkthdr.csum_flags |= CSUM_IP_VALID; - if ((control & (CSS_TCP | CSS_UDP)) != 0 && - (control & (CSS_TCPUDP_CSUM_OK)) != 0) { - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | - CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; - } - } + if ((ifp->if_capenable & IFCAP_RXCSUM) != 0) + msk_rxcsum(sc_if, control, m); /* Check for VLAN tagged packets. */ if ((status & GMR_FS_VLAN) != 0 && (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { @@ -3371,6 +3519,9 @@ msk_handle_events(struct msk_softc *sc) break; case OP_RXCHKSVLAN: sc_if->msk_vtag = ntohs(len); + /* FALLTHROUGH */ + case OP_RXCHKS: + sc_if->msk_csum = status; break; case OP_RXSTAT: if (!(sc_if->msk_ifp->if_drv_flags & IFF_DRV_RUNNING)) @@ -3758,8 +3909,13 @@ msk_init_locked(struct msk_if_softc *sc_ msk_init_tx_ring(sc_if); /* Disable Rx checksum offload and RSS hash. */ - CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), - BMU_DIS_RX_CHKSUM | BMU_DIS_RX_RSS_HASH); + reg = BMU_DIS_RX_RSS_HASH; + if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 && + (ifp->if_capenable & IFCAP_RXCSUM) != 0) + reg |= BMU_ENA_RX_CHKSUM; + else + reg |= BMU_DIS_RX_CHKSUM; + CSR_WRITE_4(sc, Q_ADDR(sc_if->msk_rxq, Q_CSR), reg); if (sc_if->msk_framesize > (MCLBYTES - MSK_RX_BUF_ALIGN)) { msk_set_prefetch(sc, sc_if->msk_rxq, sc_if->msk_rdata.msk_jumbo_rx_ring_paddr, Modified: stable/8/sys/dev/msk/if_mskreg.h ============================================================================== --- stable/8/sys/dev/msk/if_mskreg.h Wed Jun 22 00:39:36 2011 (r223393) +++ stable/8/sys/dev/msk/if_mskreg.h Wed Jun 22 00:48:13 2011 (r223394) @@ -2555,6 +2555,7 @@ struct msk_if_softc { struct msk_hw_stats msk_stats; int msk_if_flags; uint16_t msk_vtag; /* VLAN tag id. */ + uint32_t msk_csum; }; #define MSK_TIMEOUT 1000
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201106220048.p5M0mEct024285>