From owner-svn-src-stable@FreeBSD.ORG Tue Feb 26 15:51:47 2013 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by hub.freebsd.org (Postfix) with ESMTP id EE10413A; Tue, 26 Feb 2013 15:51:47 +0000 (UTC) (envelope-from gallatin@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) by mx1.freebsd.org (Postfix) with ESMTP id D1A4BB68; Tue, 26 Feb 2013 15:51:47 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.5/8.14.5) with ESMTP id r1QFplT6066402; Tue, 26 Feb 2013 15:51:47 GMT (envelope-from gallatin@svn.freebsd.org) Received: (from gallatin@localhost) by svn.freebsd.org (8.14.5/8.14.5/Submit) id r1QFplDV066399; Tue, 26 Feb 2013 15:51:47 GMT (envelope-from gallatin@svn.freebsd.org) Message-Id: <201302261551.r1QFplDV066399@svn.freebsd.org> From: Andrew Gallatin Date: Tue, 26 Feb 2013 15:51:47 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r247320 - in stable/9/sys: dev/mxge modules/mxge/mxge X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 26 Feb 2013 15:51:48 -0000 Author: gallatin Date: Tue Feb 26 15:51:46 2013 New Revision: 247320 URL: http://svnweb.freebsd.org/changeset/base/247320 Log: MFC r247011: Add support to mxge for IPv6 TX csum offload & IPv6 TSO. Sponsored by: Myricom, Inc. Modified: stable/9/sys/dev/mxge/if_mxge.c stable/9/sys/dev/mxge/if_mxge_var.h stable/9/sys/modules/mxge/mxge/Makefile Directory Properties: stable/9/sys/ (props changed) stable/9/sys/dev/ (props changed) stable/9/sys/modules/ (props changed) Modified: stable/9/sys/dev/mxge/if_mxge.c ============================================================================== --- stable/9/sys/dev/mxge/if_mxge.c Tue Feb 26 15:25:43 2013 (r247319) +++ stable/9/sys/dev/mxge/if_mxge.c Tue Feb 26 15:51:46 2013 (r247320) @@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include +#include #include #include @@ -91,6 +93,7 @@ __FBSDID("$FreeBSD$"); #endif #include "opt_inet.h" +#include "opt_inet6.h" /* tunable params */ static int mxge_nvidia_ecrc_enable = 1; @@ -1809,21 +1812,99 @@ mxge_submit_req(mxge_tx_ring_t *tx, mcp_ wmb(); } +static int +mxge_parse_tx(struct mxge_slice_state *ss, struct mbuf *m, + struct mxge_pkt_info *pi) +{ + struct ether_vlan_header *eh; + uint16_t etype; + int tso = m->m_pkthdr.csum_flags & (CSUM_TSO); +#if IFCAP_TSO6 && defined(INET6) + int nxt; +#endif + + eh = mtod(m, struct ether_vlan_header *); + if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + etype = ntohs(eh->evl_proto); + pi->ip_off = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; + } else { + etype = ntohs(eh->evl_encap_proto); + pi->ip_off = ETHER_HDR_LEN; + } + + switch (etype) { + case ETHERTYPE_IP: + /* + * ensure ip header is in first mbuf, copy it to a + * scratch buffer if not + */ + pi->ip = (struct ip *)(m->m_data + pi->ip_off); + pi->ip6 = NULL; + if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip))) { + m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip), + ss->scratch); + pi->ip = (struct ip *)(ss->scratch + pi->ip_off); + } + pi->ip_hlen = pi->ip->ip_hl << 2; + if (!tso) + return 0; + + if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + + sizeof(struct tcphdr))) { + m_copydata(m, 0, pi->ip_off + pi->ip_hlen + + sizeof(struct tcphdr), ss->scratch); + pi->ip = (struct ip *)(ss->scratch + pi->ip_off); + } + pi->tcp = (struct tcphdr *)((char *)pi->ip + pi->ip_hlen); + break; +#if IFCAP_TSO6 && defined(INET6) + case ETHERTYPE_IPV6: + pi->ip6 = (struct ip6_hdr *)(m->m_data + pi->ip_off); + if (__predict_false(m->m_len < pi->ip_off + sizeof(*pi->ip6))) { + m_copydata(m, 0, pi->ip_off + sizeof(*pi->ip6), + ss->scratch); + pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); + } + nxt = 0; + pi->ip_hlen = ip6_lasthdr(m, pi->ip_off, IPPROTO_IPV6, &nxt); + pi->ip_hlen -= pi->ip_off; + if (nxt != IPPROTO_TCP && nxt != IPPROTO_UDP) + return EINVAL; + + if (!tso) + return 0; + + if (pi->ip_off + pi->ip_hlen > ss->sc->max_tso6_hlen) + return EINVAL; + + if (__predict_false(m->m_len < pi->ip_off + pi->ip_hlen + + sizeof(struct tcphdr))) { + m_copydata(m, 0, pi->ip_off + pi->ip_hlen + + sizeof(struct tcphdr), ss->scratch); + pi->ip6 = (struct ip6_hdr *)(ss->scratch + pi->ip_off); + } + pi->tcp = (struct tcphdr *)((char *)pi->ip6 + pi->ip_hlen); + break; +#endif + default: + return EINVAL; + } + return 0; +} + #if IFCAP_TSO4 static void mxge_encap_tso(struct mxge_slice_state *ss, struct mbuf *m, - int busdma_seg_cnt, int ip_off) + int busdma_seg_cnt, struct mxge_pkt_info *pi) { mxge_tx_ring_t *tx; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; - struct ip *ip; - struct tcphdr *tcp; uint32_t low, high_swapped; int len, seglen, cum_len, cum_len_next; int next_is_first, chop, cnt, rdma_count, small; - uint16_t pseudo_hdr_offset, cksum_offset, mss; + uint16_t pseudo_hdr_offset, cksum_offset, mss, sum; uint8_t flags, flags_next; static int once; @@ -1834,38 +1915,33 @@ mxge_encap_tso(struct mxge_slice_state * * header portion of the TSO packet. */ - /* ensure we have the ethernet, IP and TCP - header together in the first mbuf, copy - it to a scratch buffer if not */ - if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { - m_copydata(m, 0, ip_off + sizeof (*ip), - ss->scratch); - ip = (struct ip *)(ss->scratch + ip_off); - } else { - ip = (struct ip *)(mtod(m, char *) + ip_off); - } - if (__predict_false(m->m_len < ip_off + (ip->ip_hl << 2) - + sizeof (*tcp))) { - m_copydata(m, 0, ip_off + (ip->ip_hl << 2) - + sizeof (*tcp), ss->scratch); - ip = (struct ip *)(mtod(m, char *) + ip_off); - } - - tcp = (struct tcphdr *)((char *)ip + (ip->ip_hl << 2)); - cum_len = -(ip_off + ((ip->ip_hl + tcp->th_off) << 2)); - cksum_offset = ip_off + (ip->ip_hl << 2); + cksum_offset = pi->ip_off + pi->ip_hlen; + cum_len = -(cksum_offset + (pi->tcp->th_off << 2)); /* TSO implies checksum offload on this hardware */ - if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP)) == 0)) { + if (__predict_false((m->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) == 0)) { /* * If packet has full TCP csum, replace it with pseudo hdr * sum that the NIC expects, otherwise the NIC will emit * packets with bad TCP checksums. */ - m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); - tcp->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(IPPROTO_TCP + (m->m_pkthdr.len - cksum_offset))); + if (pi->ip6) { +#if (CSUM_TCP_IPV6 != 0) && defined(INET6) + m->m_pkthdr.csum_flags |= CSUM_TCP_IPV6; + sum = in6_cksum_pseudo(pi->ip6, + m->m_pkthdr.len - cksum_offset, + IPPROTO_TCP, 0); +#endif + } else { + m->m_pkthdr.csum_flags |= CSUM_TCP; + sum = in_pseudo(pi->ip->ip_src.s_addr, + pi->ip->ip_dst.s_addr, + htons(IPPROTO_TCP + (m->m_pkthdr.len - + cksum_offset))); + } + m_copyback(m, offsetof(struct tcphdr, th_sum) + + cksum_offset, sizeof(sum), (caddr_t)&sum); } flags = MXGEFW_FLAGS_TSO_HDR | MXGEFW_FLAGS_FIRST; @@ -1875,6 +1951,14 @@ mxge_encap_tso(struct mxge_slice_state * * the checksum by parsing the header. */ pseudo_hdr_offset = htobe16(mss); + if (pi->ip6) { + /* + * for IPv6 TSO, the "checksum offset" is re-purposed + * to store the TCP header len + */ + cksum_offset = (pi->tcp->th_off << 2); + } + tx = &ss->tx; req = tx->req_list; seg = tx->seg_list; @@ -1946,10 +2030,12 @@ mxge_encap_tso(struct mxge_slice_state * req++; cnt++; rdma_count++; - if (__predict_false(cksum_offset > seglen)) - cksum_offset -= seglen; - else - cksum_offset = 0; + if (cksum_offset != 0 && !pi->ip6) { + if (__predict_false(cksum_offset > seglen)) + cksum_offset -= seglen; + else + cksum_offset = 0; + } if (__predict_false(cnt > tx->max_desc)) goto drop; } @@ -2029,14 +2115,14 @@ mxge_vlan_tag_insert(struct mbuf *m) static void mxge_encap(struct mxge_slice_state *ss, struct mbuf *m) { + struct mxge_pkt_info pi = {0,0,0,0}; mxge_softc_t *sc; mcp_kreq_ether_send_t *req; bus_dma_segment_t *seg; struct mbuf *m_tmp; struct ifnet *ifp; mxge_tx_ring_t *tx; - struct ip *ip; - int cnt, cum_len, err, i, idx, odd_flag, ip_off; + int cnt, cum_len, err, i, idx, odd_flag; uint16_t pseudo_hdr_offset; uint8_t flags, cksum_offset; @@ -2045,15 +2131,19 @@ mxge_encap(struct mxge_slice_state *ss, ifp = sc->ifp; tx = &ss->tx; - ip_off = sizeof (struct ether_header); #ifdef MXGE_NEW_VLAN_API if (m->m_flags & M_VLANTAG) { m = mxge_vlan_tag_insert(m); if (__predict_false(m == NULL)) - goto drop; - ip_off += ETHER_VLAN_ENCAP_LEN; + goto drop_without_m; } #endif + if (m->m_pkthdr.csum_flags & + (CSUM_TSO | CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { + if (mxge_parse_tx(ss, m, &pi)) + goto drop; + } + /* (try to) map the frame for DMA */ idx = tx->req & tx->mask; err = bus_dmamap_load_mbuf_sg(tx->dmat, tx->info[idx].map, @@ -2085,7 +2175,7 @@ mxge_encap(struct mxge_slice_state *ss, #if IFCAP_TSO4 /* TSO is different enough, we handle it in another routine */ if (m->m_pkthdr.csum_flags & (CSUM_TSO)) { - mxge_encap_tso(ss, m, cnt, ip_off); + mxge_encap_tso(ss, m, cnt, &pi); return; } #endif @@ -2096,17 +2186,11 @@ mxge_encap(struct mxge_slice_state *ss, flags = MXGEFW_FLAGS_NO_TSO; /* checksum offloading? */ - if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA)) { + if (m->m_pkthdr.csum_flags & + (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6)) { /* ensure ip header is in first mbuf, copy it to a scratch buffer if not */ - if (__predict_false(m->m_len < ip_off + sizeof (*ip))) { - m_copydata(m, 0, ip_off + sizeof (*ip), - ss->scratch); - ip = (struct ip *)(ss->scratch + ip_off); - } else { - ip = (struct ip *)(mtod(m, char *) + ip_off); - } - cksum_offset = ip_off + (ip->ip_hl << 2); + cksum_offset = pi.ip_off + pi.ip_hlen; pseudo_hdr_offset = cksum_offset + m->m_pkthdr.csum_data; pseudo_hdr_offset = htobe16(pseudo_hdr_offset); req->cksum_offset = cksum_offset; @@ -2189,6 +2273,7 @@ mxge_encap(struct mxge_slice_state *ss, drop: m_freem(m); +drop_without_m: ss->oerrors++; return; } @@ -4125,8 +4210,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com if (mask & IFCAP_TXCSUM) { if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); - ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP - | CSUM_TSO); + ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP); } else { ifp->if_capenable |= IFCAP_TXCSUM; ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); @@ -4143,7 +4227,6 @@ mxge_ioctl(struct ifnet *ifp, u_long com if (mask & IFCAP_TSO4) { if (IFCAP_TSO4 & ifp->if_capenable) { ifp->if_capenable &= ~IFCAP_TSO4; - ifp->if_hwassist &= ~CSUM_TSO; } else if (IFCAP_TXCSUM & ifp->if_capenable) { ifp->if_capenable |= IFCAP_TSO4; ifp->if_hwassist |= CSUM_TSO; @@ -4153,6 +4236,43 @@ mxge_ioctl(struct ifnet *ifp, u_long com err = EINVAL; } } +#if IFCAP_TSO6 + if (mask & IFCAP_TXCSUM_IPV6) { + if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { + ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 + | IFCAP_TSO6); + ifp->if_hwassist &= ~(CSUM_TCP_IPV6 + | CSUM_UDP); + } else { + ifp->if_capenable |= IFCAP_TXCSUM_IPV6; + ifp->if_hwassist |= (CSUM_TCP_IPV6 + | CSUM_UDP_IPV6); + } +#ifdef NOTYET + } else if (mask & IFCAP_RXCSUM6) { + if (IFCAP_RXCSUM6 & ifp->if_capenable) { + ifp->if_capenable &= ~IFCAP_RXCSUM6; + sc->csum_flag = 0; + } else { + ifp->if_capenable |= IFCAP_RXCSUM6; + sc->csum_flag = 1; + } +#endif + } + if (mask & IFCAP_TSO6) { + if (IFCAP_TSO6 & ifp->if_capenable) { + ifp->if_capenable &= ~IFCAP_TSO6; + } else if (IFCAP_TXCSUM_IPV6 & ifp->if_capenable) { + ifp->if_capenable |= IFCAP_TSO6; + ifp->if_hwassist |= CSUM_TSO; + } else { + printf("mxge requires tx checksum offload" + " be enabled to use TSO\n"); + err = EINVAL; + } + } +#endif /*IFCAP_TSO6 */ + if (mask & IFCAP_LRO) { if (IFCAP_LRO & ifp->if_capenable) err = mxge_change_lro_locked(sc, 0); @@ -4645,6 +4765,7 @@ mxge_add_irq(mxge_softc_t *sc) static int mxge_attach(device_t dev) { + mxge_cmd_t cmd; mxge_softc_t *sc = device_get_softc(dev); struct ifnet *ifp; int err, rid; @@ -4775,7 +4896,7 @@ mxge_attach(device_t dev) ifp->if_baudrate = IF_Gbps(10UL); ifp->if_capabilities = IFCAP_RXCSUM | IFCAP_TXCSUM | IFCAP_TSO4 | - IFCAP_VLAN_MTU | IFCAP_LINKSTATE; + IFCAP_VLAN_MTU | IFCAP_LINKSTATE | IFCAP_TXCSUM_IPV6; #ifdef INET ifp->if_capabilities |= IFCAP_LRO; #endif @@ -4788,7 +4909,6 @@ mxge_attach(device_t dev) sc->fw_ver_tiny >= 32) ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif - sc->max_mtu = mxge_max_mtu(sc); if (sc->max_mtu >= 9000) ifp->if_capabilities |= IFCAP_JUMBO_MTU; @@ -4797,6 +4917,14 @@ mxge_attach(device_t dev) "latest firmware for 9000 byte jumbo support\n", sc->max_mtu - ETHER_HDR_LEN); ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_TSO; + ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; + /* check to see if f/w supports TSO for IPv6 */ + if (!mxge_send_cmd(sc, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, &cmd)) { + if (CSUM_TCP_IPV6) + ifp->if_capabilities |= IFCAP_TSO6; + sc->max_tso6_hlen = min(cmd.data0, + sizeof (sc->ss[0].scratch)); + } ifp->if_capenable = ifp->if_capabilities; if (sc->lro_cnt == 0) ifp->if_capenable &= ~IFCAP_LRO; Modified: stable/9/sys/dev/mxge/if_mxge_var.h ============================================================================== --- stable/9/sys/dev/mxge/if_mxge_var.h Tue Feb 26 15:25:43 2013 (r247319) +++ stable/9/sys/dev/mxge/if_mxge_var.h Tue Feb 26 15:51:46 2013 (r247320) @@ -50,6 +50,19 @@ $FreeBSD$ #define IFNET_BUF_RING 1 #endif +#if (__FreeBSD_version < 1000020) +#undef IF_Kbps +#undef IF_Mbps +#undef IF_Gbps +#define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ +#define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ +#define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ +static __inline void +if_initbaudrate(struct ifnet *ifp, uintmax_t baud) +{ + ifp->if_baudrate = baud; +} +#endif #ifndef VLAN_CAPABILITIES #define VLAN_CAPABILITIES(ifp) #define mxge_vlans_active(sc) (sc)->ifp->if_nvlans @@ -73,10 +86,33 @@ $FreeBSD$ #define IFCAP_TSO4 0 #endif +#ifndef IFCAP_TSO6 +#define IFCAP_TSO6 0 +#endif + +#ifndef IFCAP_TXCSUM_IPV6 +#define IFCAP_TXCSUM_IPV6 0 +#endif + +#ifndef IFCAP_RXCSUM_IPV6 +#define IFCAP_RXCSUM_IPV6 0 +#endif + #ifndef CSUM_TSO #define CSUM_TSO 0 #endif +#ifndef CSUM_TCP_IPV6 +#define CSUM_TCP_IPV6 0 +#endif + +#ifndef CSUM_UDP_IPV6 +#define CSUM_UDP_IPV6 0 +#endif + +#ifndef CSUM_DELAY_DATA_IPV6 +#define CSUM_DELAY_DATA_IPV6 0 +#endif typedef struct { void *addr; @@ -270,6 +306,7 @@ struct mxge_softc { int dying; int connector; int current_media; + int max_tso6_hlen; mxge_dma_t dmabench_dma; struct callout co_hdl; struct taskqueue *tq; @@ -312,6 +349,15 @@ struct mxge_media_type char *name; }; +struct mxge_pkt_info { + int ip_off; + int ip_hlen; + struct ip *ip; + struct ip6_hdr *ip6; + struct tcphdr *tcp; +}; + + /* implement our own memory barriers, since bus_space_barrier cannot handle write-combining regions */ Modified: stable/9/sys/modules/mxge/mxge/Makefile ============================================================================== --- stable/9/sys/modules/mxge/mxge/Makefile Tue Feb 26 15:25:43 2013 (r247319) +++ stable/9/sys/modules/mxge/mxge/Makefile Tue Feb 26 15:51:46 2013 (r247320) @@ -3,6 +3,6 @@ .PATH: ${.CURDIR}/../../../dev/mxge KMOD= if_mxge -SRCS= if_mxge.c mxge_lro.c device_if.h bus_if.h pci_if.h opt_inet.h +SRCS= if_mxge.c mxge_lro.c device_if.h bus_if.h pci_if.h opt_inet.h opt_inet6.h .include