Date: Thu, 24 May 2018 10:18:14 +0000 (UTC) From: Navdeep Parhar <np@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r334143 - head/sys/dev/cxgbe Message-ID: <201805241018.w4OAIEgO076752@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: np Date: Thu May 24 10:18:14 2018 New Revision: 334143 URL: https://svnweb.freebsd.org/changeset/base/334143 Log: cxgbe(4): Data path for rate-limited tx. This is hardware support for the SO_MAX_PACING_RATE sockopt (see setsockopt(2)), which is available in kernels built with "options RATELIMIT". Relnotes: Yes Sponsored by: Chelsio Communications Modified: head/sys/dev/cxgbe/adapter.h head/sys/dev/cxgbe/offload.h head/sys/dev/cxgbe/t4_main.c head/sys/dev/cxgbe/t4_sched.c head/sys/dev/cxgbe/t4_sge.c Modified: head/sys/dev/cxgbe/adapter.h ============================================================================== --- head/sys/dev/cxgbe/adapter.h Thu May 24 10:17:49 2018 (r334142) +++ head/sys/dev/cxgbe/adapter.h Thu May 24 10:18:14 2018 (r334143) @@ -1217,6 +1217,10 @@ void t4_register_an_handler(an_handler_t); void t4_register_fw_msg_handler(int, fw_msg_handler_t); void t4_register_cpl_handler(int, cpl_handler_t); void t4_register_shared_cpl_handler(int, cpl_handler_t, int); +#ifdef RATELIMIT +int ethofld_transmit(struct ifnet *, struct mbuf *); +void send_etid_flush_wr(struct cxgbe_snd_tag *); +#endif /* t4_tracer.c */ struct t4_tracer; @@ -1239,11 +1243,13 @@ void t4_release_cl_rl_kbps(struct adapter *, int, int) #ifdef RATELIMIT void t4_init_etid_table(struct adapter *); void t4_free_etid_table(struct adapter *); +struct cxgbe_snd_tag *lookup_etid(struct adapter *, int); int cxgbe_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); int cxgbe_snd_tag_modify(struct m_snd_tag *, union if_snd_tag_modify_params *); int cxgbe_snd_tag_query(struct m_snd_tag *, union if_snd_tag_query_params *); void cxgbe_snd_tag_free(struct m_snd_tag *); +void cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *); #endif /* t4_filter.c */ Modified: head/sys/dev/cxgbe/offload.h ============================================================================== --- head/sys/dev/cxgbe/offload.h Thu May 24 10:17:49 2018 (r334142) +++ head/sys/dev/cxgbe/offload.h Thu May 24 10:18:14 2018 (r334143) @@ -79,6 +79,14 @@ union aopen_entry { union aopen_entry *next; }; +/* cxgbe_snd_tag flags */ +enum { + EO_FLOWC_PENDING = (1 << 0), /* flowc needs to be sent */ + EO_FLOWC_RPL_PENDING = (1 << 1), /* flowc credits due back */ + EO_SND_TAG_REF = (1 << 2), /* kernel has a ref on us */ + EO_FLUSH_RPL_PENDING = (1 << 3), /* credit flush rpl due back */ +}; + struct cxgbe_snd_tag { struct m_snd_tag com; struct adapter *adapter; @@ -86,13 +94,13 @@ struct cxgbe_snd_tag { struct mtx lock; int port_id; int etid; + struct mbufq pending_tx, pending_fwack; + int plen; struct sge_wrq *eo_txq; + uint32_t ctrl0; uint16_t iqid; int8_t schedcl; uint64_t max_rate; /* in bytes/s */ - int8_t next_credits; /* need these many tx credits next */ - uint8_t next_nsegs; /* next WR will have these many GL segs total */ - uint8_t next_msegs; /* max segs for a single mbuf in next chain */ uint8_t tx_total; /* total tx WR credits (in 16B units) */ uint8_t tx_credits; /* tx WR credits (in 16B units) available */ uint8_t tx_nocompl; /* tx WR credits since last compl request */ Modified: head/sys/dev/cxgbe/t4_main.c ============================================================================== --- head/sys/dev/cxgbe/t4_main.c Thu May 24 10:17:49 2018 (r334142) +++ head/sys/dev/cxgbe/t4_main.c Thu May 24 10:18:14 2018 (r334143) @@ -1891,6 +1891,17 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } +#ifdef RATELIMIT + if (m->m_pkthdr.snd_tag != NULL) { + /* EAGAIN tells the stack we are not the correct interface. */ + if (__predict_false(ifp != m->m_pkthdr.snd_tag->ifp)) { + m_freem(m); + return (EAGAIN); + } + + return (ethofld_transmit(ifp, m)); + } +#endif /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; Modified: head/sys/dev/cxgbe/t4_sched.c ============================================================================== --- head/sys/dev/cxgbe/t4_sched.c Thu May 24 10:17:49 2018 (r334142) +++ head/sys/dev/cxgbe/t4_sched.c Thu May 24 10:18:14 2018 (r334143) @@ -529,7 +529,6 @@ alloc_etid(struct adapter *sc, struct cxgbe_snd_tag *c return (etid); } -#ifdef notyet struct cxgbe_snd_tag * lookup_etid(struct adapter *sc, int etid) { @@ -537,7 +536,6 @@ lookup_etid(struct adapter *sc, int etid) return (t->etid_tab[etid - t->etid_base].cst); } -#endif static void free_etid(struct adapter *sc, int etid) @@ -585,14 +583,21 @@ failed: } mtx_init(&cst->lock, "cst_lock", NULL, MTX_DEF); + mbufq_init(&cst->pending_tx, INT_MAX); + mbufq_init(&cst->pending_fwack, INT_MAX); cst->com.ifp = ifp; + cst->flags |= EO_FLOWC_PENDING | EO_SND_TAG_REF; cst->adapter = sc; cst->port_id = pi->port_id; cst->schedcl = schedcl; cst->max_rate = params->rate_limit.max_rate; - cst->next_credits = -1; cst->tx_credits = sc->params.ofldq_wr_cred; cst->tx_total = cst->tx_credits; + cst->plen = 0; + cst->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) | + V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(G_FW_VIID_PFN(vi->viid)) | + V_TXPKT_VF(G_FW_VIID_VIN(vi->viid)) | + V_TXPKT_VF_VLD(G_FW_VIID_VIVLD(vi->viid))); /* * Queues will be selected later when the connection flowid is available. @@ -616,6 +621,8 @@ cxgbe_snd_tag_modify(struct m_snd_tag *mst, /* XXX: is schedcl -1 ok here? */ MPASS(cst->schedcl >= 0 && cst->schedcl < sc->chip_params->nsched_cls); + mtx_lock(&cst->lock); + MPASS(cst->flags & EO_SND_TAG_REF); rc = t4_reserve_cl_rl_kbps(sc, cst->port_id, (params->rate_limit.max_rate * 8ULL / 1000), &schedcl); if (rc != 0) @@ -624,6 +631,7 @@ cxgbe_snd_tag_modify(struct m_snd_tag *mst, t4_release_cl_rl_kbps(sc, cst->port_id, cst->schedcl); cst->schedcl = schedcl; cst->max_rate = params->rate_limit.max_rate; + mtx_unlock(&cst->lock); return (0); } @@ -643,18 +651,53 @@ cxgbe_snd_tag_query(struct m_snd_tag *mst, return (0); } +/* + * Unlocks cst and frees it. + */ void -cxgbe_snd_tag_free(struct m_snd_tag *mst) +cxgbe_snd_tag_free_locked(struct cxgbe_snd_tag *cst) { - struct cxgbe_snd_tag *cst = mst_to_cst(mst); struct adapter *sc = cst->adapter; + mtx_assert(&cst->lock, MA_OWNED); + MPASS((cst->flags & EO_SND_TAG_REF) == 0); + MPASS(cst->tx_credits == cst->tx_total); + MPASS(cst->plen == 0); + MPASS(mbufq_first(&cst->pending_tx) == NULL); + MPASS(mbufq_first(&cst->pending_fwack) == NULL); + if (cst->etid >= 0) free_etid(sc, cst->etid); if (cst->schedcl != -1) t4_release_cl_rl_kbps(sc, cst->port_id, cst->schedcl); - if (mtx_initialized(&cst->lock)) - mtx_destroy(&cst->lock); + mtx_unlock(&cst->lock); + mtx_destroy(&cst->lock); free(cst, M_CXGBE); +} + +void +cxgbe_snd_tag_free(struct m_snd_tag *mst) +{ + struct cxgbe_snd_tag *cst = mst_to_cst(mst); + + mtx_lock(&cst->lock); + + /* The kernel is done with the snd_tag. Remove its reference. */ + MPASS(cst->flags & EO_SND_TAG_REF); + cst->flags &= ~EO_SND_TAG_REF; + + if (cst->ncompl == 0) { + /* + * No fw4_ack in flight. Free the tag right away if there are + * no outstanding credits. Request the firmware to return all + * credits for the etid otherwise. + */ + if (cst->tx_credits == cst->tx_total) { + cxgbe_snd_tag_free_locked(cst); + return; /* cst is gone. */ + } + send_etid_flush_wr(cst); + } + mtx_unlock(&cst->lock); } #endif Modified: head/sys/dev/cxgbe/t4_sge.c ============================================================================== --- head/sys/dev/cxgbe/t4_sge.c Thu May 24 10:17:49 2018 (r334142) +++ head/sys/dev/cxgbe/t4_sge.c Thu May 24 10:18:14 2018 (r334143) @@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$"); #include <netinet/ip.h> #include <netinet/ip6.h> #include <netinet/tcp.h> +#include <netinet/udp.h> #include <machine/in_cksum.h> #include <machine/md_var.h> #include <vm/vm.h> @@ -153,7 +154,24 @@ TUNABLE_INT("hw.cxgbe.largest_rx_cluster", &largest_rx static int safest_rx_cluster = PAGE_SIZE; TUNABLE_INT("hw.cxgbe.safest_rx_cluster", &safest_rx_cluster); +#ifdef RATELIMIT /* + * Knob to control TCP timestamp rewriting, and the granularity of the tick used + * for rewriting. -1 and 0-3 are all valid values. + * -1: hardware should leave the TCP timestamps alone. + * 0: 1ms + * 1: 100us + * 2: 10us + * 3: 1us + */ +static int tsclk = -1; +TUNABLE_INT("hw.cxgbe.tsclk", &tsclk); + +static int eo_max_backlog = 1024 * 1024; +TUNABLE_INT("hw.cxgbe.eo_max_backlog", &eo_max_backlog); +#endif + +/* * The interrupt holdoff timers are multiplied by this value on T6+. * 1 and 3-17 (both inclusive) are legal values. */ @@ -279,6 +297,11 @@ static void drain_wrq_wr_list(struct adapter *, struct static int sysctl_uint16(SYSCTL_HANDLER_ARGS); static int sysctl_bufsizes(SYSCTL_HANDLER_ARGS); static int sysctl_tc(SYSCTL_HANDLER_ARGS); +#ifdef RATELIMIT +static inline u_int txpkt_eo_len16(u_int, u_int, u_int); +static int ethofld_fw4_ack(struct sge_iq *, const struct rss_header *, + struct mbuf *); +#endif static counter_u64_t extfree_refs; static counter_u64_t extfree_rels; @@ -515,6 +538,10 @@ t4_sge_modload(void) t4_register_cpl_handler(CPL_FW6_MSG, handle_fw_msg); t4_register_cpl_handler(CPL_SGE_EGR_UPDATE, handle_sge_egr_update); t4_register_cpl_handler(CPL_RX_PKT, t4_eth_rx); +#ifdef RATELIMIT + t4_register_shared_cpl_handler(CPL_FW4_ACK, ethofld_fw4_ack, + CPL_COOKIE_ETHOFLD); +#endif t4_register_fw_msg_handler(FW6_TYPE_CMD_RPL, t4_handle_fw_rpl); t4_register_fw_msg_handler(FW6_TYPE_WRERR_RPL, t4_handle_wrerr_rpl); } @@ -2078,7 +2105,68 @@ set_mbuf_len16(struct mbuf *m, uint8_t len16) m->m_pkthdr.PH_loc.eight[0] = len16; } +#ifdef RATELIMIT static inline int +mbuf_eo_nsegs(struct mbuf *m) +{ + + M_ASSERTPKTHDR(m); + return (m->m_pkthdr.PH_loc.eight[1]); +} + +static inline void +set_mbuf_eo_nsegs(struct mbuf *m, uint8_t nsegs) +{ + + M_ASSERTPKTHDR(m); + m->m_pkthdr.PH_loc.eight[1] = nsegs; +} + +static inline int +mbuf_eo_len16(struct mbuf *m) +{ + int n; + + M_ASSERTPKTHDR(m); + n = m->m_pkthdr.PH_loc.eight[2]; + MPASS(n > 0 && n <= SGE_MAX_WR_LEN / 16); + + return (n); +} + +static inline void +set_mbuf_eo_len16(struct mbuf *m, uint8_t len16) +{ + + M_ASSERTPKTHDR(m); + m->m_pkthdr.PH_loc.eight[2] = len16; +} + +static inline int +mbuf_eo_tsclk_tsoff(struct mbuf *m) +{ + + M_ASSERTPKTHDR(m); + return (m->m_pkthdr.PH_loc.eight[3]); +} + +static inline void +set_mbuf_eo_tsclk_tsoff(struct mbuf *m, uint8_t tsclk_tsoff) +{ + + M_ASSERTPKTHDR(m); + m->m_pkthdr.PH_loc.eight[3] = tsclk_tsoff; +} + +static inline int +needs_eo(struct mbuf *m) +{ + + return (m->m_pkthdr.snd_tag != NULL); +} +#endif + +static inline int needs_tso(struct mbuf *m) { @@ -2107,6 +2195,22 @@ needs_l4_csum(struct mbuf *m) } static inline int +needs_tcp_csum(struct mbuf *m) +{ + + M_ASSERTPKTHDR(m); + return (m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_TCP_IPV6 | CSUM_TSO)); +} + +static inline int +needs_udp_csum(struct mbuf *m) +{ + + M_ASSERTPKTHDR(m); + return (m->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_UDP_IPV6)); +} + +static inline int needs_vlan_insertion(struct mbuf *m) { @@ -2142,16 +2246,19 @@ m_advance(struct mbuf **pm, int *poffset, int len) /* * Can deal with empty mbufs in the chain that have m_len = 0, but the chain - * must have at least one mbuf that's not empty. + * must have at least one mbuf that's not empty. It is possible for this + * routine to return 0 if skip accounts for all the contents of the mbuf chain. */ static inline int -count_mbuf_nsegs(struct mbuf *m) +count_mbuf_nsegs(struct mbuf *m, int skip) { vm_paddr_t lastb, next; vm_offset_t va; int len, nsegs; - MPASS(m != NULL); + M_ASSERTPKTHDR(m); + MPASS(m->m_pkthdr.len > 0); + MPASS(m->m_pkthdr.len >= skip); nsegs = 0; lastb = 0; @@ -2160,15 +2267,20 @@ count_mbuf_nsegs(struct mbuf *m) len = m->m_len; if (__predict_false(len == 0)) continue; - va = mtod(m, vm_offset_t); + if (skip >= len) { + skip -= len; + continue; + } + va = mtod(m, vm_offset_t) + skip; + len -= skip; + skip = 0; next = pmap_kextract(va); - nsegs += sglist_count(m->m_data, len); + nsegs += sglist_count((void *)(uintptr_t)va, len); if (lastb + 1 == next) nsegs--; lastb = pmap_kextract(va + len - 1); } - MPASS(nsegs > 0); return (nsegs); } @@ -2204,7 +2316,7 @@ restart: */ M_ASSERTPKTHDR(m0); MPASS(m0->m_pkthdr.len > 0); - nsegs = count_mbuf_nsegs(m0); + nsegs = count_mbuf_nsegs(m0, 0); if (nsegs > (needs_tso(m0) ? TX_SGL_SEGS_TSO : TX_SGL_SEGS)) { if (defragged++ > 0 || (m = m_defrag(m0, M_NOWAIT)) == NULL) { rc = EFBIG; @@ -2230,7 +2342,20 @@ restart: else set_mbuf_len16(m0, txpkt_len16(nsegs, needs_tso(m0))); +#ifdef RATELIMIT + /* + * Ethofld is limited to TCP and UDP for now, and only when L4 hw + * checksumming is enabled. needs_l4_csum happens to check for all the + * right things. + */ + if (__predict_false(needs_eo(m0) && !needs_l4_csum(m0))) + m0->m_pkthdr.snd_tag = NULL; +#endif + if (!needs_tso(m0) && +#ifdef RATELIMIT + !needs_eo(m0) && +#endif !(sc->flags & IS_VF && (needs_l3_csum(m0) || needs_l4_csum(m0)))) return (0); @@ -2276,11 +2401,34 @@ restart: } #if defined(INET) || defined(INET6) - if (needs_tso(m0)) { + if (needs_tcp_csum(m0)) { tcp = m_advance(&m, &offset, m0->m_pkthdr.l3hlen); m0->m_pkthdr.l4hlen = tcp->th_off * 4; +#ifdef RATELIMIT + if (tsclk >= 0 && *(uint32_t *)(tcp + 1) == ntohl(0x0101080a)) { + set_mbuf_eo_tsclk_tsoff(m0, + V_FW_ETH_TX_EO_WR_TSCLK(tsclk) | + V_FW_ETH_TX_EO_WR_TSOFF(sizeof(*tcp) / 2 + 1)); + } else + set_mbuf_eo_tsclk_tsoff(m0, 0); + } else if (needs_udp_csum(m)) { + m0->m_pkthdr.l4hlen = sizeof(struct udphdr); +#endif } +#ifdef RATELIMIT + if (needs_eo(m0)) { + u_int immhdrs; + + /* EO WRs have the headers in the WR and not the GL. */ + immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + + m0->m_pkthdr.l4hlen; + nsegs = count_mbuf_nsegs(m0, immhdrs); + set_mbuf_eo_nsegs(m0, nsegs); + set_mbuf_eo_len16(m0, + txpkt_eo_len16(nsegs, immhdrs, needs_tso(m0))); + } #endif +#endif MPASS(m0 == *mp); return (0); } @@ -5302,3 +5450,416 @@ done: mtx_unlock(&sc->tc_lock); return (rc); } + +#ifdef RATELIMIT +/* + * len16 for a txpkt WR with a GL. Includes the firmware work request header. + */ +static inline u_int +txpkt_eo_len16(u_int nsegs, u_int immhdrs, u_int tso) +{ + u_int n; + + MPASS(immhdrs > 0); + + n = roundup2(sizeof(struct fw_eth_tx_eo_wr) + + sizeof(struct cpl_tx_pkt_core) + immhdrs, 16); + if (__predict_false(nsegs == 0)) + goto done; + + nsegs--; /* first segment is part of ulptx_sgl */ + n += sizeof(struct ulptx_sgl) + 8 * ((3 * nsegs) / 2 + (nsegs & 1)); + if (tso) + n += sizeof(struct cpl_tx_pkt_lso_core); + +done: + return (howmany(n, 16)); +} + +#define ETID_FLOWC_NPARAMS 6 +#define ETID_FLOWC_LEN (roundup2((sizeof(struct fw_flowc_wr) + \ + ETID_FLOWC_NPARAMS * sizeof(struct fw_flowc_mnemval)), 16)) +#define ETID_FLOWC_LEN16 (howmany(ETID_FLOWC_LEN, 16)) + +static int +send_etid_flowc_wr(struct cxgbe_snd_tag *cst, struct port_info *pi, + struct vi_info *vi) +{ + struct wrq_cookie cookie; + u_int pfvf = G_FW_VIID_PFN(vi->viid) << S_FW_VIID_PFN; + struct fw_flowc_wr *flowc; + + mtx_assert(&cst->lock, MA_OWNED); + MPASS((cst->flags & (EO_FLOWC_PENDING | EO_FLOWC_RPL_PENDING)) == + EO_FLOWC_PENDING); + + flowc = start_wrq_wr(cst->eo_txq, ETID_FLOWC_LEN16, &cookie); + if (__predict_false(flowc == NULL)) + return (ENOMEM); + + bzero(flowc, ETID_FLOWC_LEN); + flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | + V_FW_FLOWC_WR_NPARAMS(ETID_FLOWC_NPARAMS) | V_FW_WR_COMPL(0)); + flowc->flowid_len16 = htonl(V_FW_WR_LEN16(ETID_FLOWC_LEN16) | + V_FW_WR_FLOWID(cst->etid)); + flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN; + flowc->mnemval[0].val = htobe32(pfvf); + flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH; + flowc->mnemval[1].val = htobe32(pi->tx_chan); + flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT; + flowc->mnemval[2].val = htobe32(pi->tx_chan); + flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID; + flowc->mnemval[3].val = htobe32(cst->iqid); + flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_EOSTATE; + flowc->mnemval[4].val = htobe32(FW_FLOWC_MNEM_EOSTATE_ESTABLISHED); + flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS; + flowc->mnemval[5].val = htobe32(cst->schedcl); + + commit_wrq_wr(cst->eo_txq, flowc, &cookie); + + cst->flags &= ~EO_FLOWC_PENDING; + cst->flags |= EO_FLOWC_RPL_PENDING; + MPASS(cst->tx_credits >= ETID_FLOWC_LEN16); /* flowc is first WR. */ + cst->tx_credits -= ETID_FLOWC_LEN16; + + return (0); +} + +#define ETID_FLUSH_LEN16 (howmany(sizeof (struct fw_flowc_wr), 16)) + +void +send_etid_flush_wr(struct cxgbe_snd_tag *cst) +{ + struct fw_flowc_wr *flowc; + struct wrq_cookie cookie; + + mtx_assert(&cst->lock, MA_OWNED); + + flowc = start_wrq_wr(cst->eo_txq, ETID_FLUSH_LEN16, &cookie); + if (__predict_false(flowc == NULL)) + CXGBE_UNIMPLEMENTED(__func__); + + bzero(flowc, ETID_FLUSH_LEN16 * 16); + flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | + V_FW_FLOWC_WR_NPARAMS(0) | F_FW_WR_COMPL); + flowc->flowid_len16 = htobe32(V_FW_WR_LEN16(ETID_FLUSH_LEN16) | + V_FW_WR_FLOWID(cst->etid)); + + commit_wrq_wr(cst->eo_txq, flowc, &cookie); + + cst->flags |= EO_FLUSH_RPL_PENDING; + MPASS(cst->tx_credits >= ETID_FLUSH_LEN16); + cst->tx_credits -= ETID_FLUSH_LEN16; + cst->ncompl++; +} + +static void +write_ethofld_wr(struct cxgbe_snd_tag *cst, struct fw_eth_tx_eo_wr *wr, + struct mbuf *m0, int compl) +{ + struct cpl_tx_pkt_core *cpl; + uint64_t ctrl1; + uint32_t ctrl; /* used in many unrelated places */ + int len16, pktlen, nsegs, immhdrs; + caddr_t dst; + uintptr_t p; + struct ulptx_sgl *usgl; + struct sglist sg; + struct sglist_seg segs[38]; /* XXX: find real limit. XXX: get off the stack */ + + mtx_assert(&cst->lock, MA_OWNED); + M_ASSERTPKTHDR(m0); + KASSERT(m0->m_pkthdr.l2hlen > 0 && m0->m_pkthdr.l3hlen > 0 && + m0->m_pkthdr.l4hlen > 0, + ("%s: ethofld mbuf %p is missing header lengths", __func__, m0)); + + if (needs_udp_csum(m0)) { + CXGBE_UNIMPLEMENTED("UDP ethofld"); + } + + len16 = mbuf_eo_len16(m0); + nsegs = mbuf_eo_nsegs(m0); + pktlen = m0->m_pkthdr.len; + ctrl = sizeof(struct cpl_tx_pkt_core); + if (needs_tso(m0)) + ctrl += sizeof(struct cpl_tx_pkt_lso_core); + immhdrs = m0->m_pkthdr.l2hlen + m0->m_pkthdr.l3hlen + m0->m_pkthdr.l4hlen; + ctrl += immhdrs; + + wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_EO_WR) | + V_FW_ETH_TX_EO_WR_IMMDLEN(ctrl) | V_FW_WR_COMPL(!!compl)); + wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(len16) | + V_FW_WR_FLOWID(cst->etid)); + wr->r3 = 0; + wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; + wr->u.tcpseg.ethlen = m0->m_pkthdr.l2hlen; + wr->u.tcpseg.iplen = htobe16(m0->m_pkthdr.l3hlen); + wr->u.tcpseg.tcplen = m0->m_pkthdr.l4hlen; + wr->u.tcpseg.tsclk_tsoff = mbuf_eo_tsclk_tsoff(m0); + wr->u.tcpseg.r4 = 0; + wr->u.tcpseg.r5 = 0; + wr->u.tcpseg.plen = htobe32(pktlen - immhdrs); + + if (needs_tso(m0)) { + struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); + + wr->u.tcpseg.mss = htobe16(m0->m_pkthdr.tso_segsz); + + ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE | + F_LSO_LAST_SLICE | V_LSO_IPHDR_LEN(m0->m_pkthdr.l3hlen >> 2) + | V_LSO_TCPHDR_LEN(m0->m_pkthdr.l4hlen >> 2); + if (m0->m_pkthdr.l2hlen == sizeof(struct ether_vlan_header)) + ctrl |= V_LSO_ETHHDR_LEN(1); + if (m0->m_pkthdr.l3hlen == sizeof(struct ip6_hdr)) + ctrl |= F_LSO_IPV6; + lso->lso_ctrl = htobe32(ctrl); + lso->ipid_ofst = htobe16(0); + lso->mss = htobe16(m0->m_pkthdr.tso_segsz); + lso->seqno_offset = htobe32(0); + lso->len = htobe32(pktlen); + + cpl = (void *)(lso + 1); + } else { + wr->u.tcpseg.mss = htobe16(0xffff); + cpl = (void *)(wr + 1); + } + + /* Checksum offload must be requested for ethofld. */ + ctrl1 = 0; + MPASS(needs_l4_csum(m0)); + + /* VLAN tag insertion */ + if (needs_vlan_insertion(m0)) { + ctrl1 |= F_TXPKT_VLAN_VLD | + V_TXPKT_VLAN(m0->m_pkthdr.ether_vtag); + } + + /* CPL header */ + cpl->ctrl0 = cst->ctrl0; + cpl->pack = 0; + cpl->len = htobe16(pktlen); + cpl->ctrl1 = htobe64(ctrl1); + + /* Copy Ethernet, IP & TCP hdrs as immediate data */ + p = (uintptr_t)(cpl + 1); + m_copydata(m0, 0, immhdrs, (void *)p); + + /* SGL */ + dst = (void *)(cpl + 1); + if (nsegs > 0) { + int i, pad; + + /* zero-pad upto next 16Byte boundary, if not 16Byte aligned */ + p += immhdrs; + pad = 16 - (immhdrs & 0xf); + bzero((void *)p, pad); + + usgl = (void *)(p + pad); + usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) | + V_ULPTX_NSGE(nsegs)); + + sglist_init(&sg, nitems(segs), segs); + for (; m0 != NULL; m0 = m0->m_next) { + if (__predict_false(m0->m_len == 0)) + continue; + if (immhdrs >= m0->m_len) { + immhdrs -= m0->m_len; + continue; + } + + sglist_append(&sg, mtod(m0, char *) + immhdrs, + m0->m_len - immhdrs); + immhdrs = 0; + } + MPASS(sg.sg_nseg == nsegs); + + /* + * Zero pad last 8B in case the WR doesn't end on a 16B + * boundary. + */ + *(uint64_t *)((char *)wr + len16 * 16 - 8) = 0; + + usgl->len0 = htobe32(segs[0].ss_len); + usgl->addr0 = htobe64(segs[0].ss_paddr); + for (i = 0; i < nsegs - 1; i++) { + usgl->sge[i / 2].len[i & 1] = htobe32(segs[i + 1].ss_len); + usgl->sge[i / 2].addr[i & 1] = htobe64(segs[i + 1].ss_paddr); + } + if (i & 1) + usgl->sge[i / 2].len[1] = htobe32(0); + } + +} + +static void +ethofld_tx(struct cxgbe_snd_tag *cst) +{ + struct mbuf *m; + struct wrq_cookie cookie; + int next_credits, compl; + struct fw_eth_tx_eo_wr *wr; + + mtx_assert(&cst->lock, MA_OWNED); + + while ((m = mbufq_first(&cst->pending_tx)) != NULL) { + M_ASSERTPKTHDR(m); + + /* How many len16 credits do we need to send this mbuf. */ + next_credits = mbuf_eo_len16(m); + MPASS(next_credits > 0); + if (next_credits > cst->tx_credits) { + /* + * Tx will make progress eventually because there is at + * least one outstanding fw4_ack that will return + * credits and kick the tx. + */ + MPASS(cst->ncompl > 0); + return; + } + wr = start_wrq_wr(cst->eo_txq, next_credits, &cookie); + if (__predict_false(wr == NULL)) { + /* XXX: wishful thinking, not a real assertion. */ + MPASS(cst->ncompl > 0); + return; + } + cst->tx_credits -= next_credits; + cst->tx_nocompl += next_credits; + compl = cst->ncompl == 0 || cst->tx_nocompl >= cst->tx_total / 2; + ETHER_BPF_MTAP(cst->com.ifp, m); + write_ethofld_wr(cst, wr, m, compl); + commit_wrq_wr(cst->eo_txq, wr, &cookie); + if (compl) { + cst->ncompl++; + cst->tx_nocompl = 0; + } + (void) mbufq_dequeue(&cst->pending_tx); + mbufq_enqueue(&cst->pending_fwack, m); + } +} + +int +ethofld_transmit(struct ifnet *ifp, struct mbuf *m0) +{ + struct cxgbe_snd_tag *cst; + int rc; + + MPASS(m0->m_nextpkt == NULL); + MPASS(m0->m_pkthdr.snd_tag != NULL); + cst = mst_to_cst(m0->m_pkthdr.snd_tag); + + mtx_lock(&cst->lock); + MPASS(cst->flags & EO_SND_TAG_REF); + + if (__predict_false(cst->flags & EO_FLOWC_PENDING)) { + struct vi_info *vi = ifp->if_softc; + struct port_info *pi = vi->pi; + struct adapter *sc = pi->adapter; + const uint32_t rss_mask = vi->rss_size - 1; + uint32_t rss_hash; + + cst->eo_txq = &sc->sge.ofld_txq[vi->first_ofld_txq]; + if (M_HASHTYPE_ISHASH(m0)) + rss_hash = m0->m_pkthdr.flowid; + else + rss_hash = arc4random(); + /* We assume RSS hashing */ + cst->iqid = vi->rss[rss_hash & rss_mask]; + cst->eo_txq += rss_hash % vi->nofldtxq; + rc = send_etid_flowc_wr(cst, pi, vi); + if (rc != 0) + goto done; + } + + if (__predict_false(cst->plen + m0->m_pkthdr.len > eo_max_backlog)) { + rc = ENOBUFS; + goto done; + } + + mbufq_enqueue(&cst->pending_tx, m0); + cst->plen += m0->m_pkthdr.len; + + ethofld_tx(cst); + rc = 0; +done: + mtx_unlock(&cst->lock); + if (__predict_false(rc != 0)) + m_freem(m0); + return (rc); +} + +static int +ethofld_fw4_ack(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) +{ + struct adapter *sc = iq->adapter; + const struct cpl_fw4_ack *cpl = (const void *)(rss + 1); + struct mbuf *m; + u_int etid = G_CPL_FW4_ACK_FLOWID(be32toh(OPCODE_TID(cpl))); + struct cxgbe_snd_tag *cst; + uint8_t credits = cpl->credits; + + cst = lookup_etid(sc, etid); + mtx_lock(&cst->lock); + if (__predict_false(cst->flags & EO_FLOWC_RPL_PENDING)) { + MPASS(credits >= ETID_FLOWC_LEN16); + credits -= ETID_FLOWC_LEN16; + cst->flags &= ~EO_FLOWC_RPL_PENDING; + } + + KASSERT(cst->ncompl > 0, + ("%s: etid %u (%p) wasn't expecting completion.", + __func__, etid, cst)); + cst->ncompl--; + + while (credits > 0) { + m = mbufq_dequeue(&cst->pending_fwack); + if (__predict_false(m == NULL)) { + /* + * The remaining credits are for the final flush that + * was issued when the tag was freed by the kernel. + */ + MPASS((cst->flags & + (EO_FLUSH_RPL_PENDING | EO_SND_TAG_REF)) == + EO_FLUSH_RPL_PENDING); + MPASS(credits == ETID_FLUSH_LEN16); + MPASS(cst->tx_credits + cpl->credits == cst->tx_total); + MPASS(cst->ncompl == 0); + + cst->flags &= ~EO_FLUSH_RPL_PENDING; + cst->tx_credits += cpl->credits; +freetag: + cxgbe_snd_tag_free_locked(cst); + return (0); /* cst is gone. */ + } + KASSERT(m != NULL, + ("%s: too many credits (%u, %u)", __func__, cpl->credits, + credits)); + KASSERT(credits >= mbuf_eo_len16(m), + ("%s: too few credits (%u, %u, %u)", __func__, + cpl->credits, credits, mbuf_eo_len16(m))); + credits -= mbuf_eo_len16(m); + cst->plen -= m->m_pkthdr.len; + m_freem(m); + } + + cst->tx_credits += cpl->credits; + MPASS(cst->tx_credits <= cst->tx_total); + + m = mbufq_first(&cst->pending_tx); + if (m != NULL && cst->tx_credits >= mbuf_eo_len16(m)) + ethofld_tx(cst); + + if (__predict_false((cst->flags & EO_SND_TAG_REF) == 0) && + cst->ncompl == 0) { + if (cst->tx_credits == cst->tx_total) + goto freetag; + else { + MPASS((cst->flags & EO_FLUSH_RPL_PENDING) == 0); + send_etid_flush_wr(cst); + } + } + + mtx_unlock(&cst->lock); + + return (0); +} +#endif
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805241018.w4OAIEgO076752>