Date: Wed, 11 Jan 2017 23:48:17 +0000 (UTC) From: Navdeep Parhar <np@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r311949 - head/sys/dev/cxgbe/tom Message-ID: <201701112348.v0BNmHWu037273@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: np Date: Wed Jan 11 23:48:17 2017 New Revision: 311949 URL: https://svnweb.freebsd.org/changeset/base/311949 Log: cxgbe/tom: Add VIMAGE support to the TOE driver. Active Open: - Save the socket's vnet at the time of the active open (t4_connect) and switch to it when processing the reply (do_act_open_rpl or do_act_establish). Passive Open: - Save the listening socket's vnet in the driver's listen_ctx and switch to it when processing incoming SYNs for the socket. - Reject SYNs that arrive on an ifnet that's not in the same vnet as the listening socket. CLIP (Compressed Local IPv6) table: - Add only those IPv6 addresses to the CLIP that are in a vnet associated with one of the card's ifnets. Misc: - Set vnet from the toepcb when processing TCP state transitions. - The kernel sets the vnet when calling the driver's output routine so t4_push_frames runs in proper vnet context already. One exception is when incoming credits trigger tx within the driver's ithread. Set the vnet explicitly in do_fw4_ack for that case. MFC after: 3 days Sponsored by: Chelsio Communications Modified: head/sys/dev/cxgbe/tom/t4_connect.c head/sys/dev/cxgbe/tom/t4_cpl_io.c head/sys/dev/cxgbe/tom/t4_ddp.c head/sys/dev/cxgbe/tom/t4_listen.c head/sys/dev/cxgbe/tom/t4_tom.c head/sys/dev/cxgbe/tom/t4_tom.h Modified: head/sys/dev/cxgbe/tom/t4_connect.c ============================================================================== --- head/sys/dev/cxgbe/tom/t4_connect.c Wed Jan 11 23:32:40 2017 (r311948) +++ head/sys/dev/cxgbe/tom/t4_connect.c Wed Jan 11 23:48:17 2017 (r311949) @@ -126,6 +126,7 @@ do_act_establish(struct sge_iq *iq, cons CTR3(KTR_CXGBE, "%s: atid %u, tid %u", __func__, atid, tid); free_atid(sc, atid); + CURVNET_SET(toep->vnet); INP_WLOCK(inp); toep->tid = tid; insert_tid(sc, tid, toep, inp->inp_vflag & INP_IPV6 ? 2 : 1); @@ -141,6 +142,7 @@ do_act_establish(struct sge_iq *iq, cons make_established(toep, cpl->snd_isn, cpl->rcv_isn, cpl->tcp_opt); done: INP_WUNLOCK(inp); + CURVNET_RESTORE(); return (0); } @@ -178,6 +180,7 @@ act_open_failure_cleanup(struct adapter free_atid(sc, atid); toep->tid = -1; + CURVNET_SET(toep->vnet); if (status != EAGAIN) INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); @@ -185,6 +188,7 @@ act_open_failure_cleanup(struct adapter final_cpl_received(toep); /* unlocks inp */ if (status != EAGAIN) INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); } /* @@ -360,6 +364,7 @@ t4_connect(struct toedev *tod, struct so if (wr == NULL) DONT_OFFLOAD_ACTIVE_OPEN(ENOMEM); + toep->vnet = so->so_vnet; if (sc->tt.ddp && (so->so_options & SO_NO_DDP) == 0) set_tcpddp_ulp_mode(toep); else Modified: head/sys/dev/cxgbe/tom/t4_cpl_io.c ============================================================================== --- head/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jan 11 23:32:40 2017 (r311948) +++ head/sys/dev/cxgbe/tom/t4_cpl_io.c Wed Jan 11 23:48:17 2017 (r311949) @@ -306,7 +306,6 @@ make_established(struct toepcb *toep, ui uint16_t tcpopt = be16toh(opt); struct flowc_tx_params ftxp; - CURVNET_SET(so->so_vnet); INP_WLOCK_ASSERT(inp); KASSERT(tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED, @@ -357,7 +356,6 @@ make_established(struct toepcb *toep, ui send_flowc_wr(toep, &ftxp); soisconnected(so); - CURVNET_RESTORE(); } static int @@ -1146,6 +1144,7 @@ do_peer_close(struct sge_iq *iq, const s KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); + CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); @@ -1191,6 +1190,7 @@ do_peer_close(struct sge_iq *iq, const s tcp_twstart(tp); INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); @@ -1203,6 +1203,7 @@ do_peer_close(struct sge_iq *iq, const s done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); return (0); } @@ -1229,6 +1230,7 @@ do_close_con_rpl(struct sge_iq *iq, cons KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__)); + CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = intotcpcb(inp); @@ -1248,6 +1250,7 @@ do_close_con_rpl(struct sge_iq *iq, cons release: INP_UNLOCK_ASSERT(inp); /* safe, we have a ref on the inp */ INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); INP_WLOCK(inp); final_cpl_received(toep); /* no more CPLs expected */ @@ -1272,6 +1275,7 @@ release: done: INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); return (0); } @@ -1345,6 +1349,7 @@ do_abort_req(struct sge_iq *iq, const st } inp = toep->inp; + CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); /* for tcp_close */ INP_WLOCK(inp); @@ -1380,6 +1385,7 @@ do_abort_req(struct sge_iq *iq, const st final_cpl_received(toep); done: INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); send_abort_rpl(sc, ofld_txq, tid, CPL_ABORT_NO_RST); return (0); } @@ -1501,18 +1507,21 @@ do_rx_data(struct sge_iq *iq, const stru DDP_UNLOCK(toep); INP_WUNLOCK(inp); + CURVNET_SET(toep->vnet); INP_INFO_RLOCK(&V_tcbinfo); INP_WLOCK(inp); tp = tcp_drop(tp, ECONNRESET); if (tp) INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); return (0); } /* receive buffer autosize */ - CURVNET_SET(so->so_vnet); + MPASS(toep->vnet == so->so_vnet); + CURVNET_SET(toep->vnet); if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && sb->sb_hiwat < V_tcp_autorcvbuf_max && @@ -1713,10 +1722,12 @@ do_fw4_ack(struct sge_iq *iq, const stru tid); #endif toep->flags &= ~TPF_TX_SUSPENDED; + CURVNET_SET(toep->vnet); if (toep->ulp_mode == ULP_MODE_ISCSI) t4_push_pdus(sc, toep, plen); else t4_push_frames(sc, toep, plen); + CURVNET_RESTORE(); } else if (plen > 0) { struct sockbuf *sb = &so->so_snd; int sbu; @@ -2143,7 +2154,7 @@ t4_aiotx_task(void *context, int pending struct socket *so = inp->inp_socket; struct kaiocb *job; - CURVNET_SET(so->so_vnet); + CURVNET_SET(toep->vnet); SOCKBUF_LOCK(&so->so_snd); while (!TAILQ_EMPTY(&toep->aiotx_jobq) && sowriteable(so)) { job = TAILQ_FIRST(&toep->aiotx_jobq); Modified: head/sys/dev/cxgbe/tom/t4_ddp.c ============================================================================== --- head/sys/dev/cxgbe/tom/t4_ddp.c Wed Jan 11 23:32:40 2017 (r311948) +++ head/sys/dev/cxgbe/tom/t4_ddp.c Wed Jan 11 23:48:17 2017 (r311949) @@ -546,7 +546,8 @@ handle_ddp_data(struct toepcb *toep, __b #endif /* receive buffer autosize */ - CURVNET_SET(so->so_vnet); + MPASS(toep->vnet == so->so_vnet); + CURVNET_SET(toep->vnet); SOCKBUF_LOCK(sb); if (sb->sb_flags & SB_AUTOSIZE && V_tcp_do_autorcvbuf && Modified: head/sys/dev/cxgbe/tom/t4_listen.c ============================================================================== --- head/sys/dev/cxgbe/tom/t4_listen.c Wed Jan 11 23:32:40 2017 (r311948) +++ head/sys/dev/cxgbe/tom/t4_listen.c Wed Jan 11 23:48:17 2017 (r311949) @@ -222,6 +222,7 @@ alloc_lctx(struct adapter *sc, struct in TAILQ_INIT(&lctx->synq); lctx->inp = inp; + lctx->vnet = inp->inp_socket->so_vnet; in_pcbref(inp); return (lctx); @@ -1200,6 +1201,8 @@ do_pass_accept_req(struct sge_iq *iq, co pi = sc->port[G_SYN_INTF(be16toh(cpl->l2info))]; + CURVNET_SET(lctx->vnet); + /* * Use the MAC index to lookup the associated VI. If this SYN * didn't match a perfect MAC filter, punt. @@ -1274,6 +1277,13 @@ found: ntids = 1; } + /* + * Don't offload if the ifnet that the SYN came in on is not in the same + * vnet as the listening socket. + */ + if (lctx->vnet != ifp->if_vnet) + REJECT_PASS_ACCEPT(); + e = get_l2te_for_nexthop(pi, ifp, &inc); if (e == NULL) REJECT_PASS_ACCEPT(); @@ -1313,7 +1323,6 @@ found: REJECT_PASS_ACCEPT(); } so = inp->inp_socket; - CURVNET_SET(so->so_vnet); mtu_idx = find_best_mtu_idx(sc, &inc, be16toh(cpl->tcpopt.mss)); rscale = cpl->tcpopt.wsf && V_tcp_do_rfc1323 ? select_rcv_wscale() : 0; @@ -1360,7 +1369,6 @@ found: */ toe_syncache_add(&inc, &to, &th, inp, tod, synqe); INP_UNLOCK_ASSERT(inp); /* ok to assert, we have a ref on the inp */ - CURVNET_RESTORE(); /* * If we replied during syncache_add (synqe->wr has been consumed), @@ -1415,10 +1423,12 @@ found: return (__LINE__); } INP_WUNLOCK(inp); + CURVNET_RESTORE(); release_synqe(synqe); /* extra hold */ return (0); reject: + CURVNET_RESTORE(); CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid, reject_reason); @@ -1490,6 +1500,7 @@ do_pass_establish(struct sge_iq *iq, con KASSERT(synqe->flags & TPF_SYNQE, ("%s: tid %u (ctx %p) not a synqe", __func__, tid, synqe)); + CURVNET_SET(lctx->vnet); INP_INFO_RLOCK(&V_tcbinfo); /* for syncache_expand */ INP_WLOCK(inp); @@ -1507,6 +1518,7 @@ do_pass_establish(struct sge_iq *iq, con INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); return (0); } @@ -1532,6 +1544,7 @@ reset: send_reset_synqe(TOEDEV(ifp), synqe); INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); return (0); } toep->tid = tid; @@ -1568,6 +1581,8 @@ reset: /* New connection inpcb is already locked by syncache_expand(). */ new_inp = sotoinpcb(so); INP_WLOCK_ASSERT(new_inp); + MPASS(so->so_vnet == lctx->vnet); + toep->vnet = lctx->vnet; /* * This is for the unlikely case where the syncache entry that we added @@ -1591,6 +1606,7 @@ reset: if (inp != NULL) INP_WUNLOCK(inp); INP_INFO_RUNLOCK(&V_tcbinfo); + CURVNET_RESTORE(); release_synqe(synqe); return (0); Modified: head/sys/dev/cxgbe/tom/t4_tom.c ============================================================================== --- head/sys/dev/cxgbe/tom/t4_tom.c Wed Jan 11 23:32:40 2017 (r311948) +++ head/sys/dev/cxgbe/tom/t4_tom.c Wed Jan 11 23:48:17 2017 (r311949) @@ -799,74 +799,96 @@ update_clip_table(struct adapter *sc, st struct in6_addr *lip, tlip; struct clip_head stale; struct clip_entry *ce, *ce_temp; - int rc, gen = atomic_load_acq_int(&in6_ifaddr_gen); + struct vi_info *vi; + int rc, gen, i, j; + uintptr_t last_vnet; ASSERT_SYNCHRONIZED_OP(sc); IN6_IFADDR_RLOCK(&in6_ifa_tracker); mtx_lock(&td->clip_table_lock); + gen = atomic_load_acq_int(&in6_ifaddr_gen); if (gen == td->clip_gen) goto done; TAILQ_INIT(&stale); TAILQ_CONCAT(&stale, &td->clip_table, link); - TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { - lip = &ia->ia_addr.sin6_addr; + /* + * last_vnet optimizes the common cases where all if_vnet = NULL (no + * VIMAGE) or all if_vnet = vnet0. + */ + last_vnet = (uintptr_t)(-1); + for_each_port(sc, i) + for_each_vi(sc->port[i], j, vi) { + if (last_vnet == (uintptr_t)vi->ifp->if_vnet) + continue; - KASSERT(!IN6_IS_ADDR_MULTICAST(lip), - ("%s: mcast address in in6_ifaddr list", __func__)); + /* XXX: races with if_vmove */ + CURVNET_SET(vi->ifp->if_vnet); + TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { + lip = &ia->ia_addr.sin6_addr; + + KASSERT(!IN6_IS_ADDR_MULTICAST(lip), + ("%s: mcast address in in6_ifaddr list", __func__)); + + if (IN6_IS_ADDR_LOOPBACK(lip)) + continue; + if (IN6_IS_SCOPE_EMBED(lip)) { + /* Remove the embedded scope */ + tlip = *lip; + lip = &tlip; + in6_clearscope(lip); + } + /* + * XXX: how to weed out the link local address for the + * loopback interface? It's fe80::1 usually (always?). + */ + + /* + * If it's in the main list then we already know it's + * not stale. + */ + TAILQ_FOREACH(ce, &td->clip_table, link) { + if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) + goto next; + } - if (IN6_IS_ADDR_LOOPBACK(lip)) - continue; - if (IN6_IS_SCOPE_EMBED(lip)) { - /* Remove the embedded scope */ - tlip = *lip; - lip = &tlip; - in6_clearscope(lip); - } - /* - * XXX: how to weed out the link local address for the loopback - * interface? It's fe80::1 usually (always?). - */ - - /* - * If it's in the main list then we already know it's not stale. - */ - TAILQ_FOREACH(ce, &td->clip_table, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) - goto next; - } + /* + * If it's in the stale list we should move it to the + * main list. + */ + TAILQ_FOREACH(ce, &stale, link) { + if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { + TAILQ_REMOVE(&stale, ce, link); + TAILQ_INSERT_TAIL(&td->clip_table, ce, + link); + goto next; + } + } - /* - * If it's in the stale list we should move it to the main list. - */ - TAILQ_FOREACH(ce, &stale, link) { - if (IN6_ARE_ADDR_EQUAL(&ce->lip, lip)) { - TAILQ_REMOVE(&stale, ce, link); + /* A new IP6 address; add it to the CLIP table */ + ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); + memcpy(&ce->lip, lip, sizeof(ce->lip)); + ce->refcount = 0; + rc = add_lip(sc, lip); + if (rc == 0) TAILQ_INSERT_TAIL(&td->clip_table, ce, link); - goto next; - } - } + else { + char ip[INET6_ADDRSTRLEN]; - /* A new IP6 address; add it to the CLIP table */ - ce = malloc(sizeof(*ce), M_CXGBE, M_NOWAIT); - memcpy(&ce->lip, lip, sizeof(ce->lip)); - ce->refcount = 0; - rc = add_lip(sc, lip); - if (rc == 0) - TAILQ_INSERT_TAIL(&td->clip_table, ce, link); - else { - char ip[INET6_ADDRSTRLEN]; - - inet_ntop(AF_INET6, &ce->lip, &ip[0], sizeof(ip)); - log(LOG_ERR, "%s: could not add %s (%d)\n", - __func__, ip, rc); - free(ce, M_CXGBE); - } + inet_ntop(AF_INET6, &ce->lip, &ip[0], + sizeof(ip)); + log(LOG_ERR, "%s: could not add %s (%d)\n", + __func__, ip, rc); + free(ce, M_CXGBE); + } next: - continue; + continue; + } + CURVNET_RESTORE(); + last_vnet = (uintptr_t)vi->ifp->if_vnet; } /* Modified: head/sys/dev/cxgbe/tom/t4_tom.h ============================================================================== --- head/sys/dev/cxgbe/tom/t4_tom.h Wed Jan 11 23:32:40 2017 (r311948) +++ head/sys/dev/cxgbe/tom/t4_tom.h Wed Jan 11 23:48:17 2017 (r311949) @@ -141,6 +141,7 @@ struct toepcb { int refcount; struct tom_data *td; struct inpcb *inp; /* backpointer to host stack's PCB */ + struct vnet *vnet; struct vi_info *vi; /* virtual interface */ struct sge_wrq *ofld_txq; struct sge_ofld_rxq *ofld_rxq; @@ -232,6 +233,7 @@ struct listen_ctx { struct stid_region stid_region; int flags; struct inpcb *inp; /* listening socket's inp */ + struct vnet *vnet; struct sge_wrq *ctrlq; struct sge_ofld_rxq *ofld_rxq; struct clip_entry *ce;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201701112348.v0BNmHWu037273>