Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 6 Dec 2012 18:23:23 +0000 (UTC)
From:      Navdeep Parhar <np@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r243952 - stable/9/sys/dev/cxgbe/tom
Message-ID:  <201212061823.qB6INNOd008527@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: np
Date: Thu Dec  6 18:23:23 2012
New Revision: 243952
URL: http://svnweb.freebsd.org/changeset/base/243952

Log:
  MFC r243680, r243681.
  
  r243680:
  cxgbe/tom: Add a flag to indicate that the L2 table entry for an
  embryonic connection has been setup and never attempt to abort a tid
  before this is done.  This fixes a bad race where a listening socket is
  closed when the driver is in the middle of step (b) here.  The symptom
  of this were "ARP miss" errors from the driver followed by tid leaks.
  
  A hardware-offloaded passive open works this way:
  
  a) A SYN "hits" the TCAM entry for a server tid and the chip delivers it
  to the queue associated with the server tid (say, queue A).  It waits
  for a response from the driver telling it what to do.
  
  b) The driver decides it is ok to proceed.  It adds the new tid to the
  list of embryonic connections associated with the server tid and then
  hands off the SYN to the kernel's syncache to make sure that the kernel
  okays it too.  If it does then the driver provides an L2 table entry,
  queue id (say, queue B), etc. and instructs the chip to send the SYN/ACK
  response.
  
  c) The chip delivers a status to queue B depending on how the third step
  of the 3-way handshake goes.  The driver removes the tid from its list
  of embryonic connections and either expands the syncache entry or
  destroys the tid.  In any case all subsequent messages for the new tid
  will be delivered to queue B, not queue A.  Anything running in queue B
  knows that the L2 entry has long been setup and the new flag is of no
  interest from here on.  If the listener is closed it will deal with
  so_comp as normal.
  
  r243681:
  cxgbe/tom: Handle the case where the chip falls out of DDP mode by
  itself.  The hole in the receive sequence space corresponds to the
  number of bytes placed directly up to that point.
  
  Submittey by:

Modified:
  stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c
  stable/9/sys/dev/cxgbe/tom/t4_ddp.c
  stable/9/sys/dev/cxgbe/tom/t4_listen.c
  stable/9/sys/dev/cxgbe/tom/t4_tom.h
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/dev/   (props changed)

Modified: stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c
==============================================================================
--- stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Dec  6 15:36:24 2012	(r243951)
+++ stable/9/sys/dev/cxgbe/tom/t4_cpl_io.c	Thu Dec  6 18:23:23 2012	(r243952)
@@ -786,6 +786,29 @@ do_peer_close(struct sge_iq *iq, const s
 	KASSERT(opcode == CPL_PEER_CLOSE,
 	    ("%s: unexpected opcode 0x%x", __func__, opcode));
 	KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__));
+
+	if (__predict_false(toep->flags & TPF_SYNQE)) {
+#ifdef INVARIANTS
+		struct synq_entry *synqe = (void *)toep;
+
+		INP_WLOCK(synqe->lctx->inp);
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
+			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
+			    ("%s: listen socket closed but tid %u not aborted.",
+			    __func__, tid));
+		} else {
+			/*
+			 * do_pass_accept_req is still running and will
+			 * eventually take care of this tid.
+			 */
+		}
+		INP_WUNLOCK(synqe->lctx->inp);
+#endif
+		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
+		    toep, toep->flags);
+		return (0);
+	}
+
 	KASSERT(toep->tid == tid, ("%s: toep tid mismatch", __func__));
 
 	INP_INFO_WLOCK(&V_tcbinfo);
@@ -1090,15 +1113,27 @@ do_rx_data(struct sge_iq *iq, const stru
 	struct socket *so;
 	struct sockbuf *sb;
 	int len;
+	uint32_t ddp_placed = 0;
 
 	if (__predict_false(toep->flags & TPF_SYNQE)) {
-		/*
-		 * do_pass_establish failed and must be attempting to abort the
-		 * synqe's tid.  Meanwhile, the T4 has sent us data for such a
-		 * connection.
-		 */
-		KASSERT(toep->flags & TPF_ABORT_SHUTDOWN,
-		    ("%s: synqe and tid isn't being aborted.", __func__));
+#ifdef INVARIANTS
+		struct synq_entry *synqe = (void *)toep;
+
+		INP_WLOCK(synqe->lctx->inp);
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
+			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
+			    ("%s: listen socket closed but tid %u not aborted.",
+			    __func__, tid));
+		} else {
+			/*
+			 * do_pass_accept_req is still running and will
+			 * eventually take care of this tid.
+			 */
+		}
+		INP_WUNLOCK(synqe->lctx->inp);
+#endif
+		CTR4(KTR_CXGBE, "%s: tid %u, synqe %p (0x%x)", __func__, tid,
+		    toep, toep->flags);
 		m_freem(m);
 		return (0);
 	}
@@ -1120,13 +1155,8 @@ do_rx_data(struct sge_iq *iq, const stru
 
 	tp = intotcpcb(inp);
 
-#ifdef INVARIANTS
-	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq))) {
-		log(LOG_ERR,
-		    "%s: unexpected seq# %x for TID %u, rcv_nxt %x\n",
-		    __func__, be32toh(cpl->seq), toep->tid, tp->rcv_nxt);
-	}
-#endif
+	if (__predict_false(tp->rcv_nxt != be32toh(cpl->seq)))
+		ddp_placed = be32toh(cpl->seq) - tp->rcv_nxt;
 
 	tp->rcv_nxt += len;
 	KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__));
@@ -1173,12 +1203,20 @@ do_rx_data(struct sge_iq *iq, const stru
 		int changed = !(toep->ddp_flags & DDP_ON) ^ cpl->ddp_off;
 
 		if (changed) {
-			if (__predict_false(!(toep->ddp_flags & DDP_SC_REQ))) {
-				/* XXX: handle this if legitimate */
-				panic("%s: unexpected DDP state change %d",
-				    __func__, cpl->ddp_off);
+			if (toep->ddp_flags & DDP_SC_REQ)
+				toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
+			else {
+				KASSERT(cpl->ddp_off == 1,
+				    ("%s: DDP switched on by itself.",
+				    __func__));
+
+				/* Fell out of DDP mode */
+				toep->ddp_flags &= ~(DDP_ON | DDP_BUF0_ACTIVE |
+				    DDP_BUF1_ACTIVE);
+
+				if (ddp_placed)
+					insert_ddp_data(toep, ddp_placed);
 			}
-			toep->ddp_flags ^= DDP_ON | DDP_SC_REQ;
 		}
 
 		if ((toep->ddp_flags & DDP_OK) == 0 &&

Modified: stable/9/sys/dev/cxgbe/tom/t4_ddp.c
==============================================================================
--- stable/9/sys/dev/cxgbe/tom/t4_ddp.c	Thu Dec  6 15:36:24 2012	(r243951)
+++ stable/9/sys/dev/cxgbe/tom/t4_ddp.c	Thu Dec  6 18:23:23 2012	(r243952)
@@ -205,6 +205,42 @@ release_ddp_resources(struct toepcb *toe
 	}
 }
 
+/* XXX: handle_ddp_data code duplication */
+void
+insert_ddp_data(struct toepcb *toep, uint32_t n)
+{
+	struct inpcb *inp = toep->inp;
+	struct tcpcb *tp = intotcpcb(inp);
+	struct sockbuf *sb = &inp->inp_socket->so_rcv;
+	struct mbuf *m;
+
+	INP_WLOCK_ASSERT(inp);
+	SOCKBUF_LOCK_ASSERT(sb);
+
+	m = m_get(M_NOWAIT, MT_DATA);
+	if (m == NULL)
+		CXGBE_UNIMPLEMENTED("mbuf alloc failure");
+	m->m_len = n;
+	m->m_flags |= M_DDP;	/* Data is already where it should be */
+	m->m_data = "nothing to see here";
+
+	tp->rcv_nxt += n;
+#ifndef USE_DDP_RX_FLOW_CONTROL
+	KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__));
+	tp->rcv_wnd -= n;
+#endif
+
+	KASSERT(toep->sb_cc >= sb->sb_cc,
+	    ("%s: sb %p has more data (%d) than last time (%d).",
+	    __func__, sb, sb->sb_cc, toep->sb_cc));
+	toep->rx_credits += toep->sb_cc - sb->sb_cc;
+#ifdef USE_DDP_RX_FLOW_CONTROL
+	toep->rx_credits -= n;	/* adjust for F_RX_FC_DDP */
+#endif
+	sbappendstream_locked(sb, m);
+	toep->sb_cc = sb->sb_cc;
+}
+
 /* SET_TCB_FIELD sent as a ULP command looks like this */
 #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \
     sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core))

Modified: stable/9/sys/dev/cxgbe/tom/t4_listen.c
==============================================================================
--- stable/9/sys/dev/cxgbe/tom/t4_listen.c	Thu Dec  6 15:36:24 2012	(r243951)
+++ stable/9/sys/dev/cxgbe/tom/t4_listen.c	Thu Dec  6 18:23:23 2012	(r243952)
@@ -282,8 +282,8 @@ send_reset_synqe(struct toedev *tod, str
 
 	INP_WLOCK_ASSERT(synqe->lctx->inp);
 
-	CTR4(KTR_CXGBE, "%s: synqe %p, tid %d%s",
-	    __func__, synqe, synqe->tid,
+	CTR5(KTR_CXGBE, "%s: synqe %p (0x%x), tid %d%s",
+	    __func__, synqe, synqe->flags, synqe->tid,
 	    synqe->flags & TPF_ABORT_SHUTDOWN ?
 	    " (abort already in progress)" : "");
 	if (synqe->flags & TPF_ABORT_SHUTDOWN)
@@ -501,8 +501,10 @@ t4_listen_stop(struct toedev *tod, struc
 	 * socket's so_comp.  It doesn't know about the connections on the synq
 	 * so we need to take care of those.
 	 */
-	TAILQ_FOREACH(synqe, &lctx->synq, link)
-		send_reset_synqe(tod, synqe);
+	TAILQ_FOREACH(synqe, &lctx->synq, link) {
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE)
+			send_reset_synqe(tod, synqe);
+	}
 
 	destroy_server(sc, lctx);
 	return (0);
@@ -1131,8 +1133,7 @@ do_pass_accept_req(struct sge_iq *iq, co
 	synqe->lctx = lctx;
 	synqe->syn = m;
 	m = NULL;
-	refcount_init(&synqe->refcnt, 1); /* 1 so that it is held for the
-					     duration of this function */
+	refcount_init(&synqe->refcnt, 0);
 	synqe->l2e_idx = e->idx;
 	synqe->rcv_bufsize = rx_credits;
 	atomic_store_rel_ptr(&synqe->wr, (uintptr_t)wr);
@@ -1155,46 +1156,9 @@ do_pass_accept_req(struct sge_iq *iq, co
 	 * If we replied during syncache_add (synqe->wr has been consumed),
 	 * good.  Otherwise, set it to 0 so that further syncache_respond
 	 * attempts by the kernel will be ignored.
-	 *
-	 * The extra hold on the synqe makes sure that it is still around, even
-	 * if the listener has been dropped and the synqe was aborted and the
-	 * reply to the abort has removed and released the synqe from the synq
-	 * list.
 	 */
 	if (atomic_cmpset_ptr(&synqe->wr, (uintptr_t)wr, 0)) {
 
-		INP_WLOCK(inp);
-		if (__predict_false(inp->inp_flags & INP_DROPPED)) {
-			/* listener closed.  synqe must have been aborted. */
-			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
-			    ("%s: listener %p closed but synqe %p not aborted",
-			    __func__, inp, synqe));
-
-			CTR5(KTR_CXGBE,
-			    "%s: stid %u, tid %u, lctx %p, synqe %p, ABORTED",
-			    __func__, stid, tid, lctx, synqe);
-			INP_WUNLOCK(inp);
-			free(wr, M_CXGBE);
-			release_synqe(synqe);	/* about to exit function */
-			return (__LINE__);
-		}
-
-		/*
-		 * synqe aborted before TOM replied to PASS_ACCEPT_REQ.  But
-		 * that can only happen if the listener was closed and we just
-		 * checked for that.
-		 */
-		KASSERT(!(synqe->flags & TPF_ABORT_SHUTDOWN),
-		    ("%s: synqe %p aborted, but listener %p not dropped.",
-		    __func__, synqe, inp));
-
-		/* Yank the synqe out of the lctx synq. */
-		TAILQ_REMOVE(&lctx->synq, synqe, link);
-		release_synqe(synqe);	/* removed from synq list */
-		inp = release_lctx(sc, lctx);
-		if (inp)
-			INP_WUNLOCK(inp);
-
 		/*
 		 * syncache may or may not have a hold on the synqe, which may
 		 * or may not be stashed in the original SYN mbuf passed to us.
@@ -1205,13 +1169,39 @@ do_pass_accept_req(struct sge_iq *iq, co
 			m->m_pkthdr.rcvif = ifp;
 
 		remove_tid(sc, synqe->tid);
-		release_synqe(synqe);	/* about to exit function */
 		free(wr, M_CXGBE);
+
+		/* Yank the synqe out of the lctx synq. */
+		INP_WLOCK(inp);
+		TAILQ_REMOVE(&lctx->synq, synqe, link);
+		release_synqe(synqe);	/* removed from synq list */
+		inp = release_lctx(sc, lctx);
+		if (inp)
+			INP_WUNLOCK(inp);
+
 		REJECT_PASS_ACCEPT();
 	}
-	release_synqe(synqe);	/* about to exit function */
+
 	CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, SYNACK",
 	    __func__, stid, tid, lctx, synqe);
+
+	INP_WLOCK(inp);
+	synqe->flags |= TPF_SYNQE_HAS_L2TE;
+	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
+		/*
+		 * Listening socket closed but tod_listen_stop did not abort
+		 * this tid because there was no L2T entry for the tid at that
+		 * time.  Abort it now.  The reply to the abort will clean up.
+		 */
+		CTR5(KTR_CXGBE, "%s: stid %u, tid %u, lctx %p, synqe %p, ABORT",
+		    __func__, stid, tid, lctx, synqe);
+		send_reset_synqe(tod, synqe);
+		INP_WUNLOCK(inp);
+
+		return (__LINE__);
+	}
+	INP_WUNLOCK(inp);
+
 	return (0);
 reject:
 	CTR4(KTR_CXGBE, "%s: stid %u, tid %u, REJECT (%d)", __func__, stid, tid,
@@ -1293,15 +1283,12 @@ do_pass_establish(struct sge_iq *iq, con
 	    __func__, stid, tid, synqe, synqe->flags, inp->inp_flags);
 
 	if (__predict_false(inp->inp_flags & INP_DROPPED)) {
-		/*
-		 * The listening socket has closed.  The TOM must have aborted
-		 * all the embryonic connections (including this one) that were
-		 * on the lctx's synq.  do_abort_rpl for the tid is responsible
-		 * for cleaning up.
-		 */
-		KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
-		    ("%s: listen socket dropped but tid %u not aborted.",
-		    __func__, tid));
+
+		if (synqe->flags & TPF_SYNQE_HAS_L2TE) {
+			KASSERT(synqe->flags & TPF_ABORT_SHUTDOWN,
+			    ("%s: listen socket closed but tid %u not aborted.",
+			    __func__, tid));
+		}
 
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);
@@ -1321,7 +1308,12 @@ do_pass_establish(struct sge_iq *iq, con
 	toep = alloc_toepcb(pi, txqid, rxqid, M_NOWAIT);
 	if (toep == NULL) {
 reset:
-		/* The reply to this abort will perform final cleanup */
+		/*
+		 * The reply to this abort will perform final cleanup.  There is
+		 * no need to check for HAS_L2TE here.  We can be here only if
+		 * we responded to the PASS_ACCEPT_REQ, and our response had the
+		 * L2T idx.
+		 */
 		send_reset_synqe(TOEDEV(ifp), synqe);
 		INP_WUNLOCK(inp);
 		INP_INFO_WUNLOCK(&V_tcbinfo);

Modified: stable/9/sys/dev/cxgbe/tom/t4_tom.h
==============================================================================
--- stable/9/sys/dev/cxgbe/tom/t4_tom.h	Thu Dec  6 15:36:24 2012	(r243951)
+++ stable/9/sys/dev/cxgbe/tom/t4_tom.h	Thu Dec  6 18:23:23 2012	(r243952)
@@ -67,6 +67,7 @@ enum {
 	TPF_SYNQE_NEEDFREE = (1 << 9),	/* synq_entry was malloc'd separately */
 	TPF_SYNQE_TCPDDP   = (1 << 10),	/* ulp_mode TCPDDP in toepcb */
 	TPF_SYNQE_EXPANDED = (1 << 11),	/* toepcb ready, tid context updated */
+	TPF_SYNQE_HAS_L2TE = (1 << 12),	/* we've replied to PASS_ACCEPT_REQ */
 };
 
 enum {
@@ -272,4 +273,5 @@ int t4_soreceive_ddp(struct socket *, st
     struct mbuf **, struct mbuf **, int *);
 void enable_ddp(struct adapter *, struct toepcb *toep);
 void release_ddp_resources(struct toepcb *toep);
+void insert_ddp_data(struct toepcb *, uint32_t);
 #endif



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201212061823.qB6INNOd008527>