From owner-svn-src-all@freebsd.org  Thu Feb  8 14:39:08 2018
Return-Path: <owner-svn-src-all@freebsd.org>
Delivered-To: svn-src-all@mailman.ysv.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1])
 by mailman.ysv.freebsd.org (Postfix) with ESMTP id 19EB2F08E29;
 Thu,  8 Feb 2018 14:39:08 +0000 (UTC) (envelope-from np@FreeBSD.org)
Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org
 [IPv6:2610:1c1:1:606c::19:3])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client CN "mxrelay.nyi.freebsd.org",
 Issuer "Let's Encrypt Authority X3" (verified OK))
 by mx1.freebsd.org (Postfix) with ESMTPS id BAF2D6AF05;
 Thu,  8 Feb 2018 14:39:07 +0000 (UTC) (envelope-from np@FreeBSD.org)
Received: from repo.freebsd.org (repo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:0])
 (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
 (Client did not present a certificate)
 by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id B5AED17444;
 Thu,  8 Feb 2018 14:39:07 +0000 (UTC) (envelope-from np@FreeBSD.org)
Received: from repo.freebsd.org ([127.0.1.37])
 by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w18Ed7MC052551;
 Thu, 8 Feb 2018 14:39:07 GMT (envelope-from np@FreeBSD.org)
Received: (from np@localhost)
 by repo.freebsd.org (8.15.2/8.15.2/Submit) id w18Ed7jO052547;
 Thu, 8 Feb 2018 14:39:07 GMT (envelope-from np@FreeBSD.org)
Message-Id: <201802081439.w18Ed7jO052547@repo.freebsd.org>
X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org
 using -f
From: Navdeep Parhar <np@FreeBSD.org>
Date: Thu, 8 Feb 2018 14:39:07 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
 svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org
Subject: svn commit: r329017 - stable/11/sys/dev/cxgbe/iw_cxgbe
X-SVN-Group: stable-11
X-SVN-Commit-Author: np
X-SVN-Commit-Paths: stable/11/sys/dev/cxgbe/iw_cxgbe
X-SVN-Commit-Revision: 329017
X-SVN-Commit-Repository: base
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
X-BeenThere: svn-src-all@freebsd.org
X-Mailman-Version: 2.1.25
Precedence: list
List-Id: "SVN commit messages for the entire src tree \(except for &quot;
 user&quot; and &quot; projects&quot; \)" <svn-src-all.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-all/>
List-Post: <mailto:svn-src-all@freebsd.org>
List-Help: <mailto:svn-src-all-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/svn-src-all>,
 <mailto:svn-src-all-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Thu, 08 Feb 2018 14:39:08 -0000

Author: np
Date: Thu Feb  8 14:39:07 2018
New Revision: 329017
URL: https://svnweb.freebsd.org/changeset/base/329017

Log:
  iw_cxgbe: Manually backport changes related to QP flush.  This fixes a
  panic where poll_cq sees an empty RQ while processing an incoming SEND
  for a QP that is being taken down.
  
  This is a direct commit to stable/11.
  
  Sponsored by:	Chelsio Communications

Modified:
  stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c
  stable/11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
  stable/11/sys/dev/cxgbe/iw_cxgbe/qp.c
  stable/11/sys/dev/cxgbe/iw_cxgbe/t4.h

Modified: stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c
==============================================================================
--- stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c	Thu Feb  8 13:22:40 2018	(r329016)
+++ stable/11/sys/dev/cxgbe/iw_cxgbe/cq.c	Thu Feb  8 14:39:07 2018	(r329017)
@@ -245,137 +245,82 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_
 	t4_swcq_produce(cq);
 }
 
-int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count)
+static void advance_oldest_read(struct t4_wq *wq);
+
+int c4iw_flush_sq(struct c4iw_qp *qhp)
 {
 	int flushed = 0;
-	struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count];
-	int in_use = wq->sq.in_use - count;
+	struct t4_wq *wq = &qhp->wq;
+	struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq);
+	struct t4_cq *cq = &chp->cq;
+	int idx;
+	struct t4_swsqe *swsqe;
 
-	BUG_ON(in_use < 0);
-	while (in_use--) {
-		swsqe->signaled = 0;
+	if (wq->sq.flush_cidx == -1)
+		wq->sq.flush_cidx = wq->sq.cidx;
+	idx = wq->sq.flush_cidx;
+	BUG_ON(idx >= wq->sq.size);
+	while (idx != wq->sq.pidx) {
+		swsqe = &wq->sq.sw_sq[idx];
+		BUG_ON(swsqe->flushed);
+		swsqe->flushed = 1;
 		insert_sq_cqe(wq, cq, swsqe);
-		swsqe++;
-		if (swsqe == (wq->sq.sw_sq + wq->sq.size))
-			swsqe = wq->sq.sw_sq;
+		if (wq->sq.oldest_read == swsqe) {
+			BUG_ON(swsqe->opcode != FW_RI_READ_REQ);
+			advance_oldest_read(wq);
+		}
 		flushed++;
+		if (++idx == wq->sq.size)
+			idx = 0;
 	}
+	wq->sq.flush_cidx += flushed;
+	if (wq->sq.flush_cidx >= wq->sq.size)
+        	wq->sq.flush_cidx -= wq->sq.size;
 	return flushed;
 }
 
-/*
- * Move all CQEs from the HWCQ into the SWCQ.
- */
-void c4iw_flush_hw_cq(struct t4_cq *cq)
-{
-	struct t4_cqe *cqe = NULL, *swcqe;
-	int ret;
-
-	CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, cq, cq->cqid);
-	ret = t4_next_hw_cqe(cq, &cqe);
-	while (!ret) {
-		CTR3(KTR_IW_CXGBE, "%s flushing hwcq cidx 0x%x swcq pidx 0x%x",
-		    __func__, cq->cidx, cq->sw_pidx);
-		swcqe = &cq->sw_queue[cq->sw_pidx];
-		*swcqe = *cqe;
-		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
-		t4_swcq_produce(cq);
-		t4_hwcq_consume(cq);
-		ret = t4_next_hw_cqe(cq, &cqe);
-	}
-}
-
-static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
-{
-	if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
-		return 0;
-
-	if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
-		return 0;
-
-	if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
-		return 0;
-
-	if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
-		return 0;
-	return 1;
-}
-
-void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
-{
-	struct t4_cqe *cqe;
-	u32 ptr;
-
-	*count = 0;
-	ptr = cq->sw_cidx;
-	while (ptr != cq->sw_pidx) {
-		cqe = &cq->sw_queue[ptr];
-		if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) &&
-				      wq->sq.oldest_read)) &&
-		    (CQE_QPID(cqe) == wq->sq.qid))
-			(*count)++;
-		if (++ptr == cq->size)
-			ptr = 0;
-	}
-	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
-}
-
-void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
-{
-	struct t4_cqe *cqe;
-	u32 ptr;
-
-	*count = 0;
-	CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
-	ptr = cq->sw_cidx;
-	while (ptr != cq->sw_pidx) {
-		cqe = &cq->sw_queue[ptr];
-		if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
-		    (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
-			(*count)++;
-		if (++ptr == cq->size)
-			ptr = 0;
-	}
-	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
-}
-
 static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq)
 {
 	struct t4_swsqe *swsqe;
-	u16 ptr = wq->sq.cidx;
-	int count = wq->sq.in_use;
-	int unsignaled = 0;
+	int cidx;
 
-	swsqe = &wq->sq.sw_sq[ptr];
-	while (count--)
+	if (wq->sq.flush_cidx == -1)
+		wq->sq.flush_cidx = wq->sq.cidx;
+	cidx = wq->sq.flush_cidx;
+	BUG_ON(cidx > wq->sq.size);
+
+	while (cidx != wq->sq.pidx) {
+		swsqe = &wq->sq.sw_sq[cidx];
 		if (!swsqe->signaled) {
-			if (++ptr == wq->sq.size)
-				ptr = 0;
-			swsqe = &wq->sq.sw_sq[ptr];
-			unsignaled++;
+			if (++cidx == wq->sq.size)
+				cidx = 0;
 		} else if (swsqe->complete) {
 
+			BUG_ON(swsqe->flushed);
+
 			/*
 			 * Insert this completed cqe into the swcq.
 			 */
 			CTR3(KTR_IW_CXGBE,
-			    "%s moving cqe into swcq sq idx %u cq idx %u",
-			    __func__, ptr, cq->sw_pidx);
+				"%s moving cqe into swcq sq idx %u cq idx %u\n",
+				__func__, cidx, cq->sw_pidx);
 			swsqe->cqe.header |= htonl(V_CQE_SWCQE(1));
 			cq->sw_queue[cq->sw_pidx] = swsqe->cqe;
 			t4_swcq_produce(cq);
-			swsqe->signaled = 0;
-			wq->sq.in_use -= unsignaled;
-			break;
+			swsqe->flushed = 1;
+			if (++cidx == wq->sq.size)
+				cidx = 0;
+			wq->sq.flush_cidx = cidx;
 		} else
 			break;
+	}
 }
 
 static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe,
 				struct t4_cqe *read_cqe)
 {
 	read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx;
-	read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len);
+	read_cqe->len = htonl(wq->sq.oldest_read->read_len);
 	read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) |
 				 V_CQE_SWCQE(SW_CQE(hw_cqe)) |
 				 V_CQE_OPCODE(FW_RI_READ_REQ) |
@@ -383,9 +328,6 @@ static void create_read_req_cqe(struct t4_wq *wq, stru
 	read_cqe->bits_type_ts = hw_cqe->bits_type_ts;
 }
 
-/*
- * Return a ptr to the next read wr in the SWSQ or NULL.
- */
 static void advance_oldest_read(struct t4_wq *wq)
 {
 
@@ -405,6 +347,128 @@ static void advance_oldest_read(struct t4_wq *wq)
 }
 
 /*
+ * Move all CQEs from the HWCQ into the SWCQ.
+ * Deal with out-of-order and/or completions that complete
+ * prior unsignalled WRs.
+ */
+void c4iw_flush_hw_cq(struct c4iw_cq *chp)
+{
+	struct t4_cqe *hw_cqe, *swcqe, read_cqe;
+	struct c4iw_qp *qhp;
+	struct t4_swsqe *swsqe;
+	int ret;
+
+	CTR3(KTR_IW_CXGBE, "%s c4iw_cq %p cqid 0x%x", __func__, chp,
+	    chp->cq.cqid);
+	ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+
+	/*
+	 * This logic is similar to poll_cq(), but not quite the same
+	 * unfortunately.  Need to move pertinent HW CQEs to the SW CQ but
+	 * also do any translation magic that poll_cq() normally does.
+	 */
+	while (!ret) {
+		qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe));
+
+		/*
+		 * drop CQEs with no associated QP
+		 */
+		if (qhp == NULL)
+			goto next_cqe;
+
+		if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE)
+			goto next_cqe;
+
+		if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) {
+
+			/*
+			 * If we have reached here because of async
+			 * event or other error, and have egress error
+			 * then drop
+			 */
+			if (CQE_TYPE(hw_cqe) == 1) {
+				goto next_cqe;
+			}
+
+			/*
+			 * drop peer2peer RTR reads.
+			 */
+			if (CQE_WRID_STAG(hw_cqe) == 1)
+				goto next_cqe;
+
+			/*
+			 * Eat completions for unsignaled read WRs.
+			 */
+			if (!qhp->wq.sq.oldest_read->signaled) {
+				advance_oldest_read(&qhp->wq);
+				goto next_cqe;
+			}
+
+			/*
+			 * Don't write to the HWCQ, create a new read req CQE
+			 * in local memory and move it into the swcq.
+			 */
+			create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe);
+			hw_cqe = &read_cqe;
+			advance_oldest_read(&qhp->wq);
+		}
+
+		/* if its a SQ completion, then do the magic to move all the
+		 * unsignaled and now in-order completions into the swcq.
+		 */
+		if (SQ_TYPE(hw_cqe)) {
+			swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)];
+			swsqe->cqe = *hw_cqe;
+			swsqe->complete = 1;
+			flush_completed_wrs(&qhp->wq, &chp->cq);
+		} else {
+			swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx];
+			*swcqe = *hw_cqe;
+		swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1));
+			t4_swcq_produce(&chp->cq);
+		}
+next_cqe:
+		t4_hwcq_consume(&chp->cq);
+		ret = t4_next_hw_cqe(&chp->cq, &hw_cqe);
+	}
+}
+
+static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq)
+{
+	if (CQE_OPCODE(cqe) == FW_RI_TERMINATE)
+		return 0;
+
+	if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe))
+		return 0;
+
+	if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe))
+		return 0;
+
+	if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq))
+		return 0;
+	return 1;
+}
+
+void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
+{
+	struct t4_cqe *cqe;
+	u32 ptr;
+
+	*count = 0;
+	CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count);
+	ptr = cq->sw_cidx;
+	while (ptr != cq->sw_pidx) {
+		cqe = &cq->sw_queue[ptr];
+		if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
+		    (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq))
+			(*count)++;
+		if (++ptr == cq->size)
+			ptr = 0;
+	}
+	CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count);
+}
+
+/*
  * poll_cq
  *
  * Caller must:
@@ -450,6 +514,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
 	}
 
 	/*
+	* skip hw cqe's if the wq is flushed.
+	*/
+	if (wq->flushed && !SW_CQE(hw_cqe)) {
+		ret = -EAGAIN;
+		goto skip_cqe;
+	}
+
+	/*
+	 * skip TERMINATE cqes...
+	 */
+	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
+		ret = -EAGAIN;
+		goto skip_cqe;
+	}
+
+	/*
 	 * Special cqe for drain WR completions...
 	 */
 	if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) {
@@ -467,12 +547,22 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
 	 */
 	if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) {
 
-		/*
-		 * If this is an unsolicited read response, then the read
+		/* If we have reached here because of async
+		 * event or other error, and have egress error
+		 * then drop
+		 */
+		if (CQE_TYPE(hw_cqe) == 1) {
+			if (CQE_STATUS(hw_cqe))
+				t4_set_wq_in_error(wq);
+			ret = -EAGAIN;
+			goto skip_cqe;
+		}
+
+		/* If this is an unsolicited read response, then the read
 		 * was generated by the kernel driver as part of peer-2-peer
 		 * connection setup.  So ignore the completion.
 		 */
-		if (!wq->sq.oldest_read) {
+		if (CQE_WRID_STAG(hw_cqe) == 1) {
 			if (CQE_STATUS(hw_cqe))
 				t4_set_wq_in_error(wq);
 			ret = -EAGAIN;
@@ -480,6 +570,15 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
 		}
 
 		/*
+		 * Eat completions for unsignaled read WRs.
+		 */
+		if (!wq->sq.oldest_read->signaled) {
+			advance_oldest_read(wq);
+			ret = -EAGAIN;
+			goto skip_cqe;
+		}
+
+		/*
 		 * Don't write to the HWCQ, so create a new read req CQE
 		 * in local memory.
 		 */
@@ -489,16 +588,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
 	}
 
 	if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) {
-		*cqe_flushed = t4_wq_in_error(wq);
+		*cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH);
 		t4_set_wq_in_error(wq);
-		goto proc_cqe;
 	}
 
-	if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) {
-		ret = -EAGAIN;
-		goto skip_cqe;
-	}
-
 	/*
 	 * RECV completion.
 	 */
@@ -510,12 +603,7 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq,
 		 * then we complete this with T4_ERR_MSN and mark the wq in
 		 * error.
 		 */
-
-		if (t4_rq_empty(wq)) {
-			t4_set_wq_in_error(wq);
-			ret = -EAGAIN;
-			goto skip_cqe;
-		}
+		BUG_ON(t4_rq_empty(wq));
 		if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
 			t4_set_wq_in_error(wq);
 			hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN));
@@ -556,9 +644,26 @@ proc_cqe:
 	 * completion.
 	 */
 	if (SQ_TYPE(hw_cqe)) {
-		wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe);
-		CTR2(KTR_IW_CXGBE, "%s completing sq idx %u",
-		     __func__, wq->sq.cidx);
+		int idx = CQE_WRID_SQ_IDX(hw_cqe);
+		BUG_ON(idx >= wq->sq.size);
+
+		/*
+		* Account for any unsignaled completions completed by
+		* this signaled completion.  In this case, cidx points
+		* to the first unsignaled one, and idx points to the
+		* signaled one.  So adjust in_use based on this delta.
+		* if this is not completing any unsigned wrs, then the
+		* delta will be 0. Handle wrapping also!
+		*/
+		if (idx < wq->sq.cidx)
+			wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx;
+		else
+			wq->sq.in_use -= idx - wq->sq.cidx;
+		BUG_ON(wq->sq.in_use <= 0 || wq->sq.in_use >= wq->sq.size);
+
+		wq->sq.cidx = (uint16_t)idx;
+		CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n",
+				__func__, wq->sq.cidx);
 		*cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id;
 		t4_sq_consume(wq);
 	} else {
@@ -567,6 +672,7 @@ proc_cqe:
 		*cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id;
 		BUG_ON(t4_rq_empty(wq));
 		t4_rq_consume(wq);
+		goto skip_cqe;
 	}
 
 flush_wq:

Modified: stable/11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h
==============================================================================
--- stable/11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Thu Feb  8 13:22:40 2018	(r329016)
+++ stable/11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h	Thu Feb  8 14:39:07 2018	(r329017)
@@ -926,12 +926,11 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 add
 u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size);
 void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
 int c4iw_ofld_send(struct c4iw_rdev *rdev, struct mbuf *m);
-void c4iw_flush_hw_cq(struct t4_cq *cq);
+void c4iw_flush_hw_cq(struct c4iw_cq *chp);
 void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
-void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count);
 int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp);
 int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count);
-int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count);
+int c4iw_flush_sq(struct c4iw_qp *qhp);
 int c4iw_ev_handler(struct sge_iq *, const struct rsp_ctrl *);
 u16 c4iw_rqes_posted(struct c4iw_qp *qhp);
 int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe);

Modified: stable/11/sys/dev/cxgbe/iw_cxgbe/qp.c
==============================================================================
--- stable/11/sys/dev/cxgbe/iw_cxgbe/qp.c	Thu Feb  8 13:22:40 2018	(r329016)
+++ stable/11/sys/dev/cxgbe/iw_cxgbe/qp.c	Thu Feb  8 14:39:07 2018	(r329017)
@@ -734,6 +734,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_
 		swsqe->complete = 0;
 		swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) ||
 					qhp->sq_sig_all;
+		swsqe->flushed = 0;
 		swsqe->wr_id = wr->wr_id;
 
 		init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16);
@@ -1010,10 +1011,18 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4i
 	CTR4(KTR_IW_CXGBE, "%s qhp %p rchp %p schp %p", __func__, qhp, rchp,
 	    schp);
 
-	/* locking hierarchy: cq lock first, then qp lock. */
+	/* locking heirarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&rchp->lock, flag);
 	spin_lock(&qhp->lock);
-	c4iw_flush_hw_cq(&rchp->cq);
+
+	if (qhp->wq.flushed) {
+		spin_unlock(&qhp->lock);
+		spin_unlock_irqrestore(&rchp->lock, flag);
+		return;
+	}
+	qhp->wq.flushed = 1;
+
+	c4iw_flush_hw_cq(rchp);
 	c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
 	flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
 	spin_unlock(&qhp->lock);
@@ -1024,12 +1033,11 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4i
 		spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
 	}
 
-	/* locking hierarchy: cq lock first, then qp lock. */
+	/* locking heirarchy: cq lock first, then qp lock. */
 	spin_lock_irqsave(&schp->lock, flag);
 	spin_lock(&qhp->lock);
-	c4iw_flush_hw_cq(&schp->cq);
-	c4iw_count_scqes(&schp->cq, &qhp->wq, &count);
-	flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
+	c4iw_flush_hw_cq(schp);
+	flushed = c4iw_flush_sq(qhp);
 	spin_unlock(&qhp->lock);
 	spin_unlock_irqrestore(&schp->lock, flag);
 	if (flushed && schp->ibcq.comp_handler) {
@@ -1047,8 +1055,8 @@ static void flush_qp(struct c4iw_qp *qhp)
 	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
 	schp = get_chp(qhp->rhp, qhp->attr.scq);
 
+	t4_set_wq_in_error(&qhp->wq);
 	if (qhp->ibqp.uobject) {
-		t4_set_wq_in_error(&qhp->wq);
 		t4_set_cq_in_error(&rchp->cq);
 		spin_lock_irqsave(&rchp->comp_handler_lock, flag);
 		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
@@ -1339,6 +1347,7 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_q
 		switch (attrs->next_state) {
 		case C4IW_QP_STATE_CLOSING:
 			BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2);
+			t4_set_wq_in_error(&qhp->wq);
 			set_state(qhp, C4IW_QP_STATE_CLOSING);
 			ep = qhp->ep;
 			if (!internal) {
@@ -1346,18 +1355,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_q
 				disconnect = 1;
 				c4iw_get_ep(&qhp->ep->com);
 			}
-			if (qhp->ibqp.uobject)
-				t4_set_wq_in_error(&qhp->wq);
 			ret = rdma_fini(rhp, qhp, ep);
 			if (ret)
 				goto err;
 			break;
 		case C4IW_QP_STATE_TERMINATE:
+			t4_set_wq_in_error(&qhp->wq);
 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
 			qhp->attr.layer_etype = attrs->layer_etype;
 			qhp->attr.ecode = attrs->ecode;
-			if (qhp->ibqp.uobject)
-				t4_set_wq_in_error(&qhp->wq);
 			ep = qhp->ep;
 			if (!internal)
 				terminate = 1;
@@ -1365,9 +1371,8 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_q
 			c4iw_get_ep(&qhp->ep->com);
 			break;
 		case C4IW_QP_STATE_ERROR:
+			t4_set_wq_in_error(&qhp->wq);
 			set_state(qhp, C4IW_QP_STATE_ERROR);
-			if (qhp->ibqp.uobject)
-				t4_set_wq_in_error(&qhp->wq);
 			if (!internal) {
 				abort = 1;
 				disconnect = 1;
@@ -1558,6 +1563,7 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_att
 	qhp->wq.sq.size = sqsize;
 	qhp->wq.sq.memsize = (sqsize + spg_ndesc) * sizeof *qhp->wq.sq.queue +
 	    16 * sizeof(__be64);
+	qhp->wq.sq.flush_cidx = -1;
 	qhp->wq.rq.size = rqsize;
 	qhp->wq.rq.memsize = (rqsize + spg_ndesc) * sizeof *qhp->wq.rq.queue;
 

Modified: stable/11/sys/dev/cxgbe/iw_cxgbe/t4.h
==============================================================================
--- stable/11/sys/dev/cxgbe/iw_cxgbe/t4.h	Thu Feb  8 13:22:40 2018	(r329016)
+++ stable/11/sys/dev/cxgbe/iw_cxgbe/t4.h	Thu Feb  8 14:39:07 2018	(r329017)
@@ -289,6 +289,7 @@ struct t4_swsqe {
 	int			complete;
 	int			signaled;
 	u16			idx;
+	int			flushed;
 };
 
 struct t4_sq {
@@ -307,6 +308,7 @@ struct t4_sq {
 	u16 pidx;
 	u16 wq_pidx;
 	u16 flags;
+	short flush_cidx;
 };
 
 struct t4_swrqe {
@@ -337,6 +339,7 @@ struct t4_wq {
 	void __iomem *db;
 	void __iomem *gts;
 	struct c4iw_rdev *rdev;
+	int flushed;
 };
 
 static inline int t4_rqes_posted(struct t4_wq *wq)
@@ -414,6 +417,9 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 
 
 static inline void t4_sq_consume(struct t4_wq *wq)
 {
+	BUG_ON(wq->sq.in_use < 1);
+	if (wq->sq.cidx == wq->sq.flush_cidx)
+		wq->sq.flush_cidx = -1;
 	wq->sq.in_use--;
 	if (++wq->sq.cidx == wq->sq.size)
 		wq->sq.cidx = 0;
@@ -492,12 +498,14 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se)
 static inline void t4_swcq_produce(struct t4_cq *cq)
 {
 	cq->sw_in_use++;
+	BUG_ON(cq->sw_in_use >= cq->size);
 	if (++cq->sw_pidx == cq->size)
 		cq->sw_pidx = 0;
 }
 
 static inline void t4_swcq_consume(struct t4_cq *cq)
 {
+	BUG_ON(cq->sw_in_use < 1);
 	cq->sw_in_use--;
 	if (++cq->sw_cidx == cq->size)
 		cq->sw_cidx = 0;
@@ -540,6 +548,7 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, str
 		cq->error = 1;
 		printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid);
 	} else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) {
+		rmb();
 		*cqe = &cq->queue[cq->cidx];
 		ret = 0;
 	} else