From owner-svn-src-projects@freebsd.org Wed Mar 21 18:33:29 2018 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 1B019F5B394 for ; Wed, 21 Mar 2018 18:33:29 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id C20F27F624; Wed, 21 Mar 2018 18:33:28 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id A2ACC2E4F0; Wed, 21 Mar 2018 18:33:28 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2LIXSW4009384; Wed, 21 Mar 2018 18:33:28 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2LIXSvr009380; Wed, 21 Mar 2018 18:33:28 GMT (envelope-from np@FreeBSD.org) Message-Id: <201803211833.w2LIXSvr009380@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Wed, 21 Mar 2018 18:33:28 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r331315 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Group: projects X-SVN-Commit-Author: np X-SVN-Commit-Paths: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Commit-Revision: 331315 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Mar 2018 18:33:29 -0000 Author: np Date: Wed Mar 21 18:33:28 2018 New Revision: 331315 URL: https://svnweb.freebsd.org/changeset/base/331315 Log: Back out r329017 and r329391, which were direct commits to stable/11. Sponsored by: Chelsio communications. Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c Wed Mar 21 18:02:56 2018 (r331314) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c Wed Mar 21 18:33:28 2018 (r331315) @@ -245,82 +245,137 @@ static void insert_sq_cqe(struct t4_wq *wq, struct t4_ t4_swcq_produce(cq); } -static void advance_oldest_read(struct t4_wq *wq); - -int c4iw_flush_sq(struct c4iw_qp *qhp) +int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count) { int flushed = 0; - struct t4_wq *wq = &qhp->wq; - struct c4iw_cq *chp = to_c4iw_cq(qhp->ibqp.send_cq); - struct t4_cq *cq = &chp->cq; - int idx; - struct t4_swsqe *swsqe; + struct t4_swsqe *swsqe = &wq->sq.sw_sq[wq->sq.cidx + count]; + int in_use = wq->sq.in_use - count; - if (wq->sq.flush_cidx == -1) - wq->sq.flush_cidx = wq->sq.cidx; - idx = wq->sq.flush_cidx; - BUG_ON(idx >= wq->sq.size); - while (idx != wq->sq.pidx) { - swsqe = &wq->sq.sw_sq[idx]; - BUG_ON(swsqe->flushed); - swsqe->flushed = 1; + BUG_ON(in_use < 0); + while (in_use--) { + swsqe->signaled = 0; insert_sq_cqe(wq, cq, swsqe); - if (wq->sq.oldest_read == swsqe) { - BUG_ON(swsqe->opcode != FW_RI_READ_REQ); - advance_oldest_read(wq); - } + swsqe++; + if (swsqe == (wq->sq.sw_sq + wq->sq.size)) + swsqe = wq->sq.sw_sq; flushed++; - if (++idx == wq->sq.size) - idx = 0; } - wq->sq.flush_cidx += flushed; - if (wq->sq.flush_cidx >= wq->sq.size) - wq->sq.flush_cidx -= wq->sq.size; return flushed; } +/* + * Move all CQEs from the HWCQ into the SWCQ. + */ +void c4iw_flush_hw_cq(struct t4_cq *cq) +{ + struct t4_cqe *cqe = NULL, *swcqe; + int ret; + + CTR3(KTR_IW_CXGBE, "%s cq %p cqid 0x%x", __func__, cq, cq->cqid); + ret = t4_next_hw_cqe(cq, &cqe); + while (!ret) { + CTR3(KTR_IW_CXGBE, "%s flushing hwcq cidx 0x%x swcq pidx 0x%x", + __func__, cq->cidx, cq->sw_pidx); + swcqe = &cq->sw_queue[cq->sw_pidx]; + *swcqe = *cqe; + swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); + t4_swcq_produce(cq); + t4_hwcq_consume(cq); + ret = t4_next_hw_cqe(cq, &cqe); + } +} + +static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) +{ + if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) + return 0; + + if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) + return 0; + + if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) + return 0; + return 1; +} + +void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count) +{ + struct t4_cqe *cqe; + u32 ptr; + + *count = 0; + ptr = cq->sw_cidx; + while (ptr != cq->sw_pidx) { + cqe = &cq->sw_queue[ptr]; + if ((SQ_TYPE(cqe) || ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && + wq->sq.oldest_read)) && + (CQE_QPID(cqe) == wq->sq.qid)) + (*count)++; + if (++ptr == cq->size) + ptr = 0; + } + CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); +} + +void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) +{ + struct t4_cqe *cqe; + u32 ptr; + + *count = 0; + CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count); + ptr = cq->sw_cidx; + while (ptr != cq->sw_pidx) { + cqe = &cq->sw_queue[ptr]; + if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && + (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) + (*count)++; + if (++ptr == cq->size) + ptr = 0; + } + CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); +} + static void flush_completed_wrs(struct t4_wq *wq, struct t4_cq *cq) { struct t4_swsqe *swsqe; - int cidx; + u16 ptr = wq->sq.cidx; + int count = wq->sq.in_use; + int unsignaled = 0; - if (wq->sq.flush_cidx == -1) - wq->sq.flush_cidx = wq->sq.cidx; - cidx = wq->sq.flush_cidx; - BUG_ON(cidx > wq->sq.size); - - while (cidx != wq->sq.pidx) { - swsqe = &wq->sq.sw_sq[cidx]; + swsqe = &wq->sq.sw_sq[ptr]; + while (count--) if (!swsqe->signaled) { - if (++cidx == wq->sq.size) - cidx = 0; + if (++ptr == wq->sq.size) + ptr = 0; + swsqe = &wq->sq.sw_sq[ptr]; + unsignaled++; } else if (swsqe->complete) { - BUG_ON(swsqe->flushed); - /* * Insert this completed cqe into the swcq. */ CTR3(KTR_IW_CXGBE, - "%s moving cqe into swcq sq idx %u cq idx %u\n", - __func__, cidx, cq->sw_pidx); + "%s moving cqe into swcq sq idx %u cq idx %u", + __func__, ptr, cq->sw_pidx); swsqe->cqe.header |= htonl(V_CQE_SWCQE(1)); cq->sw_queue[cq->sw_pidx] = swsqe->cqe; t4_swcq_produce(cq); - swsqe->flushed = 1; - if (++cidx == wq->sq.size) - cidx = 0; - wq->sq.flush_cidx = cidx; + swsqe->signaled = 0; + wq->sq.in_use -= unsignaled; + break; } else break; - } } static void create_read_req_cqe(struct t4_wq *wq, struct t4_cqe *hw_cqe, struct t4_cqe *read_cqe) { read_cqe->u.scqe.cidx = wq->sq.oldest_read->idx; - read_cqe->len = htonl(wq->sq.oldest_read->read_len); + read_cqe->len = cpu_to_be32(wq->sq.oldest_read->read_len); read_cqe->header = htonl(V_CQE_QPID(CQE_QPID(hw_cqe)) | V_CQE_SWCQE(SW_CQE(hw_cqe)) | V_CQE_OPCODE(FW_RI_READ_REQ) | @@ -328,6 +383,9 @@ static void create_read_req_cqe(struct t4_wq *wq, stru read_cqe->bits_type_ts = hw_cqe->bits_type_ts; } +/* + * Return a ptr to the next read wr in the SWSQ or NULL. + */ static void advance_oldest_read(struct t4_wq *wq) { @@ -347,128 +405,6 @@ static void advance_oldest_read(struct t4_wq *wq) } /* - * Move all CQEs from the HWCQ into the SWCQ. - * Deal with out-of-order and/or completions that complete - * prior unsignalled WRs. - */ -void c4iw_flush_hw_cq(struct c4iw_cq *chp) -{ - struct t4_cqe *hw_cqe, *swcqe, read_cqe; - struct c4iw_qp *qhp; - struct t4_swsqe *swsqe; - int ret; - - CTR3(KTR_IW_CXGBE, "%s c4iw_cq %p cqid 0x%x", __func__, chp, - chp->cq.cqid); - ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); - - /* - * This logic is similar to poll_cq(), but not quite the same - * unfortunately. Need to move pertinent HW CQEs to the SW CQ but - * also do any translation magic that poll_cq() normally does. - */ - while (!ret) { - qhp = get_qhp(chp->rhp, CQE_QPID(hw_cqe)); - - /* - * drop CQEs with no associated QP - */ - if (qhp == NULL) - goto next_cqe; - - if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) - goto next_cqe; - - if (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP) { - - /* - * If we have reached here because of async - * event or other error, and have egress error - * then drop - */ - if (CQE_TYPE(hw_cqe) == 1) { - goto next_cqe; - } - - /* - * drop peer2peer RTR reads. - */ - if (CQE_WRID_STAG(hw_cqe) == 1) - goto next_cqe; - - /* - * Eat completions for unsignaled read WRs. - */ - if (!qhp->wq.sq.oldest_read->signaled) { - advance_oldest_read(&qhp->wq); - goto next_cqe; - } - - /* - * Don't write to the HWCQ, create a new read req CQE - * in local memory and move it into the swcq. - */ - create_read_req_cqe(&qhp->wq, hw_cqe, &read_cqe); - hw_cqe = &read_cqe; - advance_oldest_read(&qhp->wq); - } - - /* if its a SQ completion, then do the magic to move all the - * unsignaled and now in-order completions into the swcq. - */ - if (SQ_TYPE(hw_cqe)) { - swsqe = &qhp->wq.sq.sw_sq[CQE_WRID_SQ_IDX(hw_cqe)]; - swsqe->cqe = *hw_cqe; - swsqe->complete = 1; - flush_completed_wrs(&qhp->wq, &chp->cq); - } else { - swcqe = &chp->cq.sw_queue[chp->cq.sw_pidx]; - *swcqe = *hw_cqe; - swcqe->header |= cpu_to_be32(V_CQE_SWCQE(1)); - t4_swcq_produce(&chp->cq); - } -next_cqe: - t4_hwcq_consume(&chp->cq); - ret = t4_next_hw_cqe(&chp->cq, &hw_cqe); - } -} - -static int cqe_completes_wr(struct t4_cqe *cqe, struct t4_wq *wq) -{ - if (CQE_OPCODE(cqe) == FW_RI_TERMINATE) - return 0; - - if ((CQE_OPCODE(cqe) == FW_RI_RDMA_WRITE) && RQ_TYPE(cqe)) - return 0; - - if ((CQE_OPCODE(cqe) == FW_RI_READ_RESP) && SQ_TYPE(cqe)) - return 0; - - if (CQE_SEND_OPCODE(cqe) && RQ_TYPE(cqe) && t4_rq_empty(wq)) - return 0; - return 1; -} - -void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) -{ - struct t4_cqe *cqe; - u32 ptr; - - *count = 0; - CTR2(KTR_IW_CXGBE, "%s count zero %d", __func__, *count); - ptr = cq->sw_cidx; - while (ptr != cq->sw_pidx) { - cqe = &cq->sw_queue[ptr]; - if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && - (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) - (*count)++; - if (++ptr == cq->size) - ptr = 0; - } - CTR3(KTR_IW_CXGBE, "%s cq %p count %d", __func__, cq, *count); -} - -/* * poll_cq * * Caller must: @@ -514,22 +450,6 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, } /* - * skip hw cqe's if the wq is flushed. - */ - if (wq->flushed && !SW_CQE(hw_cqe)) { - ret = -EAGAIN; - goto skip_cqe; - } - - /* - * skip TERMINATE cqes... - */ - if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { - ret = -EAGAIN; - goto skip_cqe; - } - - /* * Special cqe for drain WR completions... */ if (CQE_OPCODE(hw_cqe) == C4IW_DRAIN_OPCODE) { @@ -547,22 +467,12 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, */ if (RQ_TYPE(hw_cqe) && (CQE_OPCODE(hw_cqe) == FW_RI_READ_RESP)) { - /* If we have reached here because of async - * event or other error, and have egress error - * then drop - */ - if (CQE_TYPE(hw_cqe) == 1) { - if (CQE_STATUS(hw_cqe)) - t4_set_wq_in_error(wq); - ret = -EAGAIN; - goto skip_cqe; - } - - /* If this is an unsolicited read response, then the read + /* + * If this is an unsolicited read response, then the read * was generated by the kernel driver as part of peer-2-peer * connection setup. So ignore the completion. */ - if (CQE_WRID_STAG(hw_cqe) == 1) { + if (!wq->sq.oldest_read) { if (CQE_STATUS(hw_cqe)) t4_set_wq_in_error(wq); ret = -EAGAIN; @@ -570,15 +480,6 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, } /* - * Eat completions for unsignaled read WRs. - */ - if (!wq->sq.oldest_read->signaled) { - advance_oldest_read(wq); - ret = -EAGAIN; - goto skip_cqe; - } - - /* * Don't write to the HWCQ, so create a new read req CQE * in local memory. */ @@ -588,10 +489,16 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, } if (CQE_STATUS(hw_cqe) || t4_wq_in_error(wq)) { - *cqe_flushed = (CQE_STATUS(hw_cqe) == T4_ERR_SWFLUSH); + *cqe_flushed = t4_wq_in_error(wq); t4_set_wq_in_error(wq); + goto proc_cqe; } + if (CQE_OPCODE(hw_cqe) == FW_RI_TERMINATE) { + ret = -EAGAIN; + goto skip_cqe; + } + /* * RECV completion. */ @@ -603,7 +510,12 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, * then we complete this with T4_ERR_MSN and mark the wq in * error. */ - BUG_ON(t4_rq_empty(wq)); + + if (t4_rq_empty(wq)) { + t4_set_wq_in_error(wq); + ret = -EAGAIN; + goto skip_cqe; + } if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) { t4_set_wq_in_error(wq); hw_cqe->header |= htonl(V_CQE_STATUS(T4_ERR_MSN)); @@ -644,26 +556,9 @@ proc_cqe: * completion. */ if (SQ_TYPE(hw_cqe)) { - int idx = CQE_WRID_SQ_IDX(hw_cqe); - BUG_ON(idx >= wq->sq.size); - - /* - * Account for any unsignaled completions completed by - * this signaled completion. In this case, cidx points - * to the first unsignaled one, and idx points to the - * signaled one. So adjust in_use based on this delta. - * if this is not completing any unsigned wrs, then the - * delta will be 0. Handle wrapping also! - */ - if (idx < wq->sq.cidx) - wq->sq.in_use -= wq->sq.size + idx - wq->sq.cidx; - else - wq->sq.in_use -= idx - wq->sq.cidx; - BUG_ON(wq->sq.in_use <= 0 || wq->sq.in_use >= wq->sq.size); - - wq->sq.cidx = (uint16_t)idx; - CTR2(KTR_IW_CXGBE, "%s completing sq idx %u\n", - __func__, wq->sq.cidx); + wq->sq.cidx = CQE_WRID_SQ_IDX(hw_cqe); + CTR2(KTR_IW_CXGBE, "%s completing sq idx %u", + __func__, wq->sq.cidx); *cookie = wq->sq.sw_sq[wq->sq.cidx].wr_id; t4_sq_consume(wq); } else { @@ -672,7 +567,6 @@ proc_cqe: *cookie = wq->rq.sw_rq[wq->rq.cidx].wr_id; BUG_ON(t4_rq_empty(wq)); t4_rq_consume(wq); - goto skip_cqe; } flush_wq: Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h Wed Mar 21 18:02:56 2018 (r331314) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h Wed Mar 21 18:33:28 2018 (r331315) @@ -926,11 +926,12 @@ void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 add u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size); void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size); int c4iw_ofld_send(struct c4iw_rdev *rdev, struct mbuf *m); -void c4iw_flush_hw_cq(struct c4iw_cq *chp); +void c4iw_flush_hw_cq(struct t4_cq *cq); void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count); +void c4iw_count_scqes(struct t4_cq *cq, struct t4_wq *wq, int *count); int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp); int c4iw_flush_rq(struct t4_wq *wq, struct t4_cq *cq, int count); -int c4iw_flush_sq(struct c4iw_qp *qhp); +int c4iw_flush_sq(struct t4_wq *wq, struct t4_cq *cq, int count); int c4iw_ev_handler(struct sge_iq *, const struct rsp_ctrl *); u16 c4iw_rqes_posted(struct c4iw_qp *qhp); int c4iw_post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe); Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c Wed Mar 21 18:02:56 2018 (r331314) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c Wed Mar 21 18:33:28 2018 (r331315) @@ -734,7 +734,6 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_ swsqe->complete = 0; swsqe->signaled = (wr->send_flags & IB_SEND_SIGNALED) || qhp->sq_sig_all; - swsqe->flushed = 0; swsqe->wr_id = wr->wr_id; init_wr_hdr(wqe, qhp->wq.sq.pidx, fw_opcode, fw_flags, len16); @@ -1011,18 +1010,10 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4i CTR4(KTR_IW_CXGBE, "%s qhp %p rchp %p schp %p", __func__, qhp, rchp, schp); - /* locking heirarchy: cq lock first, then qp lock. */ + /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&rchp->lock, flag); spin_lock(&qhp->lock); - - if (qhp->wq.flushed) { - spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, flag); - return; - } - qhp->wq.flushed = 1; - - c4iw_flush_hw_cq(rchp); + c4iw_flush_hw_cq(&rchp->cq); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); @@ -1033,12 +1024,12 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4i spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); } - /* locking heirarchy: cq lock first, then qp lock. */ + /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); - if (schp != rchp) - c4iw_flush_hw_cq(schp); - flushed = c4iw_flush_sq(qhp); + c4iw_flush_hw_cq(&schp->cq); + c4iw_count_scqes(&schp->cq, &qhp->wq, &count); + flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); if (flushed && schp->ibcq.comp_handler) { @@ -1056,8 +1047,8 @@ static void flush_qp(struct c4iw_qp *qhp) rchp = get_chp(qhp->rhp, qhp->attr.rcq); schp = get_chp(qhp->rhp, qhp->attr.scq); - t4_set_wq_in_error(&qhp->wq); if (qhp->ibqp.uobject) { + t4_set_wq_in_error(&qhp->wq); t4_set_cq_in_error(&rchp->cq); spin_lock_irqsave(&rchp->comp_handler_lock, flag); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); @@ -1348,7 +1339,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_q switch (attrs->next_state) { case C4IW_QP_STATE_CLOSING: BUG_ON(atomic_read(&qhp->ep->com.kref.refcount) < 2); - t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_CLOSING); ep = qhp->ep; if (!internal) { @@ -1356,15 +1346,18 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_q disconnect = 1; c4iw_get_ep(&qhp->ep->com); } + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); ret = rdma_fini(rhp, qhp, ep); if (ret) goto err; break; case C4IW_QP_STATE_TERMINATE: - t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_TERMINATE); qhp->attr.layer_etype = attrs->layer_etype; qhp->attr.ecode = attrs->ecode; + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); ep = qhp->ep; if (!internal) terminate = 1; @@ -1372,8 +1365,9 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_q c4iw_get_ep(&qhp->ep->com); break; case C4IW_QP_STATE_ERROR: - t4_set_wq_in_error(&qhp->wq); set_state(qhp, C4IW_QP_STATE_ERROR); + if (qhp->ibqp.uobject) + t4_set_wq_in_error(&qhp->wq); if (!internal) { abort = 1; disconnect = 1; @@ -1564,7 +1558,6 @@ c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_att qhp->wq.sq.size = sqsize; qhp->wq.sq.memsize = (sqsize + spg_ndesc) * sizeof *qhp->wq.sq.queue + 16 * sizeof(__be64); - qhp->wq.sq.flush_cidx = -1; qhp->wq.rq.size = rqsize; qhp->wq.rq.memsize = (rqsize + spg_ndesc) * sizeof *qhp->wq.rq.queue; Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h Wed Mar 21 18:02:56 2018 (r331314) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h Wed Mar 21 18:33:28 2018 (r331315) @@ -289,7 +289,6 @@ struct t4_swsqe { int complete; int signaled; u16 idx; - int flushed; }; struct t4_sq { @@ -308,7 +307,6 @@ struct t4_sq { u16 pidx; u16 wq_pidx; u16 flags; - short flush_cidx; }; struct t4_swrqe { @@ -339,7 +337,6 @@ struct t4_wq { void __iomem *db; void __iomem *gts; struct c4iw_rdev *rdev; - int flushed; }; static inline int t4_rqes_posted(struct t4_wq *wq) @@ -417,9 +414,6 @@ static inline void t4_sq_produce(struct t4_wq *wq, u8 static inline void t4_sq_consume(struct t4_wq *wq) { - BUG_ON(wq->sq.in_use < 1); - if (wq->sq.cidx == wq->sq.flush_cidx) - wq->sq.flush_cidx = -1; wq->sq.in_use--; if (++wq->sq.cidx == wq->sq.size) wq->sq.cidx = 0; @@ -498,14 +492,12 @@ static inline int t4_arm_cq(struct t4_cq *cq, int se) static inline void t4_swcq_produce(struct t4_cq *cq) { cq->sw_in_use++; - BUG_ON(cq->sw_in_use >= cq->size); if (++cq->sw_pidx == cq->size) cq->sw_pidx = 0; } static inline void t4_swcq_consume(struct t4_cq *cq) { - BUG_ON(cq->sw_in_use < 1); cq->sw_in_use--; if (++cq->sw_cidx == cq->size) cq->sw_cidx = 0; @@ -548,7 +540,6 @@ static inline int t4_next_hw_cqe(struct t4_cq *cq, str cq->error = 1; printk(KERN_ERR MOD "cq overflow cqid %u\n", cq->cqid); } else if (t4_valid_cqe(cq, &cq->queue[cq->cidx])) { - rmb(); *cqe = &cq->queue[cq->cidx]; ret = 0; } else From owner-svn-src-projects@freebsd.org Wed Mar 21 18:37:49 2018 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 87A9CF5B954 for ; Wed, 21 Mar 2018 18:37:49 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 3E7E97F90A; Wed, 21 Mar 2018 18:37:49 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 398E32E4F2; Wed, 21 Mar 2018 18:37:49 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2LIbnsx009588; Wed, 21 Mar 2018 18:37:49 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2LIbnO3009587; Wed, 21 Mar 2018 18:37:49 GMT (envelope-from np@FreeBSD.org) Message-Id: <201803211837.w2LIbnO3009587@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Wed, 21 Mar 2018 18:37:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r331316 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Group: projects X-SVN-Commit-Author: np X-SVN-Commit-Paths: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Commit-Revision: 331316 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Mar 2018 18:37:49 -0000 Author: np Date: Wed Mar 21 18:37:48 2018 New Revision: 331316 URL: https://svnweb.freebsd.org/changeset/base/331316 Log: MFC r320418. Note that the socket lock _is_ the same as so_rcv's lock in 11 and this is a no-op in this branch. Sponsored by: Chelsio Communications Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Directory Properties: projects/bsd_rdma_4_9_stable_11/ (props changed) Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:33:28 2018 (r331315) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:37:48 2018 (r331316) @@ -622,11 +622,10 @@ init_iwarp_socket(struct socket *so, void *arg) struct sockopt sopt; int on = 1; - /* Note that SOCK_LOCK(so) is same as SOCKBUF_LOCK(&so->so_rcv) */ - SOCK_LOCK(so); + SOCKBUF_LOCK(&so->so_rcv); soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); so->so_state |= SS_NBIO; - SOCK_UNLOCK(so); + SOCKBUF_UNLOCK(&so->so_rcv); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; From owner-svn-src-projects@freebsd.org Wed Mar 21 18:39:30 2018 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id E7DDFF5BB35 for ; Wed, 21 Mar 2018 18:39:29 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 96CA97FA8C; Wed, 21 Mar 2018 18:39:29 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 8DB132E4F3; Wed, 21 Mar 2018 18:39:29 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2LIdTL9009691; Wed, 21 Mar 2018 18:39:29 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2LIdTte009690; Wed, 21 Mar 2018 18:39:29 GMT (envelope-from np@FreeBSD.org) Message-Id: <201803211839.w2LIdTte009690@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Wed, 21 Mar 2018 18:39:29 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r331317 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Group: projects X-SVN-Commit-Author: np X-SVN-Commit-Paths: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Commit-Revision: 331317 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Mar 2018 18:39:30 -0000 Author: np Date: Wed Mar 21 18:39:29 2018 New Revision: 331317 URL: https://svnweb.freebsd.org/changeset/base/331317 Log: MFC r323082: cxgbe/iw_cxgbe: Set TCP_NODELAY before initiating connection so that t4_tom picks it up right away. This is less work than waiting for the connection to be established before applying the setting. Sponsored by: Chelsio Communications Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Directory Properties: projects/bsd_rdma_4_9_stable_11/ (props changed) Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:37:48 2018 (r331316) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:39:29 2018 (r331317) @@ -108,6 +108,7 @@ static void process_peer_close(struct c4iw_ep *ep); static void process_conn_error(struct c4iw_ep *ep); static void process_close_complete(struct c4iw_ep *ep); static void ep_timeout(unsigned long arg); +static void setiwsockopt(struct socket *so); static void init_iwarp_socket(struct socket *so, void *arg); static void uninit_iwarp_socket(struct socket *so); static void process_data(struct c4iw_ep *ep); @@ -616,16 +617,12 @@ process_close_complete(struct c4iw_ep *ep) } static void -init_iwarp_socket(struct socket *so, void *arg) +setiwsockopt(struct socket *so) { int rc; struct sockopt sopt; int on = 1; - SOCKBUF_LOCK(&so->so_rcv); - soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); - so->so_state |= SS_NBIO; - SOCKBUF_UNLOCK(&so->so_rcv); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = IPPROTO_TCP; sopt.sopt_name = TCP_NODELAY; @@ -640,6 +637,16 @@ init_iwarp_socket(struct socket *so, void *arg) } static void +init_iwarp_socket(struct socket *so, void *arg) +{ + + SOCKBUF_LOCK(&so->so_rcv); + soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOCKBUF_UNLOCK(&so->so_rcv); +} + +static void uninit_iwarp_socket(struct socket *so) { @@ -734,6 +741,7 @@ process_newconn(struct iw_cm_id *parent_cm_id, struct free(local, M_SONAME); free(remote, M_SONAME); + setiwsockopt(child_so); init_iwarp_socket(child_so, &child_ep->com); c4iw_get_ep(&parent_ep->com); init_timer(&child_ep->timer); @@ -2233,6 +2241,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_ } fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); + setiwsockopt(cm_id->so); state_set(&ep->com, CONNECTING); ep->tos = 0; ep->com.local_addr = cm_id->local_addr; From owner-svn-src-projects@freebsd.org Wed Mar 21 18:57:32 2018 Return-Path: Delivered-To: svn-src-projects@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id DA507F5D268 for ; Wed, 21 Mar 2018 18:57:31 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 8BE3F807BF; Wed, 21 Mar 2018 18:57:31 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 82A0E2E832; Wed, 21 Mar 2018 18:57:31 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2LIvVJE019841; Wed, 21 Mar 2018 18:57:31 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2LIvVHM019839; Wed, 21 Mar 2018 18:57:31 GMT (envelope-from np@FreeBSD.org) Message-Id: <201803211857.w2LIvVHM019839@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Wed, 21 Mar 2018 18:57:31 +0000 (UTC) To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r331318 - projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Group: projects X-SVN-Commit-Author: np X-SVN-Commit-Paths: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe X-SVN-Commit-Revision: 331318 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-projects@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the src " projects" tree" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Mar 2018 18:57:32 -0000 Author: np Date: Wed Mar 21 18:57:31 2018 New Revision: 331318 URL: https://svnweb.freebsd.org/changeset/base/331318 Log: MFC r326169 (cxgbe portion). Sponsored by: Chelsio Communications Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cq.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/device.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/iw_cxgbe.h projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/mem.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/provider.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/qp.c projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/t4.h projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/user.h Directory Properties: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/ (props changed) Modified: projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c ============================================================================== --- projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:39:29 2018 (r331317) +++ projects/bsd_rdma_4_9_stable_11/sys/dev/cxgbe/iw_cxgbe/cm.c Wed Mar 21 18:57:31 2018 (r331318) @@ -46,8 +46,11 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include +#include +#include #include #include #include @@ -78,6 +81,8 @@ static struct work_struct c4iw_task; static struct workqueue_struct *c4iw_taskq; static LIST_HEAD(err_cqe_list); static spinlock_t err_cqe_lock; +static LIST_HEAD(listen_port_list); +static DEFINE_MUTEX(listen_port_mutex); static void process_req(struct work_struct *ctx); static void start_ep_timer(struct c4iw_ep *ep); @@ -85,12 +90,7 @@ static int stop_ep_timer(struct c4iw_ep *ep); static int set_tcpinfo(struct c4iw_ep *ep); static void process_timeout(struct c4iw_ep *ep); static void process_err_cqes(void); -static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc); -static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); -static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state tostate); static void *alloc_ep(int size, gfp_t flags); -static int find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4); static void close_socket(struct socket *so); static int send_mpa_req(struct c4iw_ep *ep); static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen); @@ -120,6 +120,15 @@ static int process_terminate(struct c4iw_ep *ep); static int terminate(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m); static int add_ep_to_req_list(struct c4iw_ep *ep, int ep_events); +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep); +static int rem_ep_from_listenlist(struct c4iw_listen_ep *lep); +static struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so); +static int get_ifnet_from_raddr(struct sockaddr_storage *raddr, + struct ifnet **ifp); +static void process_newconn(struct c4iw_listen_ep *master_lep, + struct socket *new_so); #define START_EP_TIMER(ep) \ do { \ CTR3(KTR_IW_CXGBE, "start_ep_timer (%s:%d) ep %p", \ @@ -134,6 +143,34 @@ static int add_ep_to_req_list(struct c4iw_ep *ep, int stop_ep_timer(ep); \ }) +#define GET_LOCAL_ADDR(pladdr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_LOCAL_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getsockaddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getsockaddr(so, (struct sockaddr **)&__a); \ + *(pladdr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + +#define GET_REMOTE_ADDR(praddr, so) \ + do { \ + struct sockaddr_storage *__a = NULL; \ + struct inpcb *__inp = sotoinpcb(so); \ + KASSERT(__inp != NULL, \ + ("GET_REMOTE_ADDR(%s):so:%p, inp = NULL", __func__, so)); \ + if (__inp->inp_vflag & INP_IPV4) \ + in_getpeeraddr(so, (struct sockaddr **)&__a); \ + else \ + in6_getpeeraddr(so, (struct sockaddr **)&__a); \ + *(praddr) = *__a; \ + free(__a, M_SONAME); \ + } while (0) + #ifdef KTR static char *states[] = { "idle", @@ -152,7 +189,6 @@ static char *states[] = { }; #endif - static void deref_cm_id(struct c4iw_ep_common *epc) { epc->cm_id->rem_ref(epc->cm_id); @@ -179,13 +215,184 @@ static void ref_qp(struct c4iw_ep *ep) set_bit(QP_REFED, &ep->com.history); c4iw_qp_add_ref(&ep->com.qp->ibqp); } +/* allocated per TCP port while listening */ +struct listen_port_info { + uint16_t port_num; /* TCP port address */ + struct list_head list; /* belongs to listen_port_list */ + struct list_head lep_list; /* per port lep list */ + uint32_t refcnt; /* number of lep's listening */ +}; +/* + * Following two lists are used to manage INADDR_ANY listeners: + * 1)listen_port_list + * 2)lep_list + * + * Below is the INADDR_ANY listener lists overview on a system with a two port + * adapter: + * |------------------| + * |listen_port_list | + * |------------------| + * | + * | |-----------| |-----------| + * | | port_num:X| | port_num:X| + * |--------------|-list------|-------|-list------|-------.... + * | lep_list----| | lep_list----| + * | refcnt | | | refcnt | | + * | | | | | | + * | | | | | | + * |-----------| | |-----------| | + * | | + * | | + * | | + * | | lep1 lep2 + * | | |----------------| |----------------| + * | |----| listen_ep_list |----| listen_ep_list | + * | |----------------| |----------------| + * | + * | + * | lep1 lep2 + * | |----------------| |----------------| + * |---| listen_ep_list |----| listen_ep_list | + * |----------------| |----------------| + * + * Because of two port adapter, the number of lep's are two(lep1 & lep2) for + * each TCP port number. + * + * Here 'lep1' is always marked as Master lep, because solisten() is always + * called through first lep. + * + */ +static struct listen_port_info * +add_ep_to_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) + goto found_port; + + port_info = malloc(sizeof(*port_info), M_CXGBE, M_WAITOK); + port_info->port_num = port; + port_info->refcnt = 0; + + list_add_tail(&port_info->list, &listen_port_list); + INIT_LIST_HEAD(&port_info->lep_list); + +found_port: + port_info->refcnt++; + list_add_tail(&lep->listen_ep_list, &port_info->lep_list); + mutex_unlock(&listen_port_mutex); + return port_info; +} + +static int +rem_ep_from_listenlist(struct c4iw_listen_ep *lep) +{ + uint16_t port; + struct listen_port_info *port_info = NULL; + struct sockaddr_storage *laddr = &lep->com.local_addr; + int refcnt = 0; + + port = (laddr->ss_family == AF_INET) ? + ((struct sockaddr_in *)laddr)->sin_port : + ((struct sockaddr_in6 *)laddr)->sin6_port; + + mutex_lock(&listen_port_mutex); + + /* get the port_info structure based on the lep's port address */ + list_for_each_entry(port_info, &listen_port_list, list) { + if (port_info->port_num == port) { + port_info->refcnt--; + refcnt = port_info->refcnt; + /* remove the current lep from the listen list */ + list_del(&lep->listen_ep_list); + if (port_info->refcnt == 0) { + /* Remove this entry from the list as there + * are no more listeners for this port_num. + */ + list_del(&port_info->list); + kfree(port_info); + } + break; + } + } + mutex_unlock(&listen_port_mutex); + return refcnt; +} + +/* + * Find the lep that belongs to the ifnet on which the SYN frame was received. + */ +struct c4iw_listen_ep * +find_real_listen_ep(struct c4iw_listen_ep *master_lep, struct socket *so) +{ + struct adapter *adap = NULL; + struct c4iw_listen_ep *lep = NULL; + struct sockaddr_storage remote = { 0 }; + struct ifnet *new_conn_ifp = NULL; + struct listen_port_info *port_info = NULL; + int err = 0, i = 0, + found_portinfo = 0, found_lep = 0; + uint16_t port; + + /* STEP 1: get 'ifnet' based on socket's remote address */ + GET_REMOTE_ADDR(&remote, so); + + err = get_ifnet_from_raddr(&remote, &new_conn_ifp); + if (err) { + CTR4(KTR_IW_CXGBE, "%s: Failed to get ifnet, sock %p, " + "master_lep %p err %d", + __func__, so, master_lep, err); + return (NULL); + } + + /* STEP 2: Find 'port_info' with listener local port address. */ + port = (master_lep->com.local_addr.ss_family == AF_INET) ? + ((struct sockaddr_in *)&master_lep->com.local_addr)->sin_port : + ((struct sockaddr_in6 *)&master_lep->com.local_addr)->sin6_port; + + + mutex_lock(&listen_port_mutex); + list_for_each_entry(port_info, &listen_port_list, list) + if (port_info->port_num == port) { + found_portinfo =1; + break; + } + if (!found_portinfo) + goto out; + + /* STEP 3: Traverse through list of lep's that are bound to the current + * TCP port address and find the lep that belongs to the ifnet on which + * the SYN frame was received. + */ + list_for_each_entry(lep, &port_info->lep_list, listen_ep_list) { + adap = lep->com.dev->rdev.adap; + for_each_port(adap, i) { + if (new_conn_ifp == adap->port[i]->vi[0].ifp) { + found_lep =1; + goto out; + } + } + } +out: + mutex_unlock(&listen_port_mutex); + return found_lep ? lep : (NULL); +} + static void process_timeout(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int abort = 1; - mutex_lock(&ep->com.mutex); CTR4(KTR_IW_CXGBE, "%s ep :%p, tid:%u, state %d", __func__, ep, ep->hwtid, ep->com.state); set_bit(TIMEDOUT, &ep->com.history); @@ -221,7 +428,6 @@ static void process_timeout(struct c4iw_ep *ep) , __func__, ep, ep->hwtid, ep->com.state); abort = 0; } - mutex_unlock(&ep->com.mutex); if (abort) c4iw_ep_disconnect(ep, 1, GFP_KERNEL); c4iw_put_ep(&ep->com); @@ -273,14 +479,16 @@ process_req(struct work_struct *ctx) ep_events = epc->ep_events; epc->ep_events = 0; spin_unlock_irqrestore(&req_lock, flag); - CTR4(KTR_IW_CXGBE, "%s: so %p, ep %p, events 0x%x", __func__, - epc->so, epc, ep_events); + mutex_lock(&epc->mutex); + CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, ep_state %s events 0x%x", + __func__, epc->so, epc, states[epc->state], ep_events); if (ep_events & C4IW_EVENT_TERM) process_terminate((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_TIMEOUT) process_timeout((struct c4iw_ep *)epc); if (ep_events & C4IW_EVENT_SOCKET) process_socket_event((struct c4iw_ep *)epc); + mutex_unlock(&epc->mutex); c4iw_put_ep(epc); process_err_cqes(); spin_lock_irqsave(&req_lock, flag); @@ -321,55 +529,67 @@ done: return (rc); } - static int -find_route(__be32 local_ip, __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, struct nhop4_extended *pnh4) +get_ifnet_from_raddr(struct sockaddr_storage *raddr, struct ifnet **ifp) { - struct in_addr addr; - int err; + int err = 0; - CTR5(KTR_IW_CXGBE, "%s:frtB %x, %x, %d, %d", __func__, local_ip, - peer_ip, ntohs(local_port), ntohs(peer_port)); + if (raddr->ss_family == AF_INET) { + struct sockaddr_in *raddr4 = (struct sockaddr_in *)raddr; + struct nhop4_extended nh4 = {0}; - addr.s_addr = peer_ip; - err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, addr, NHR_REF, 0, pnh4); + err = fib4_lookup_nh_ext(RT_DEFAULT_FIB, raddr4->sin_addr, + NHR_REF, 0, &nh4); + *ifp = nh4.nh_ifp; + if (err) + fib4_free_nh_ext(RT_DEFAULT_FIB, &nh4); + } else { + struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)raddr; + struct nhop6_extended nh6 = {0}; + struct in6_addr addr6; + uint32_t scopeid; - CTR2(KTR_IW_CXGBE, "%s:frtE %d", __func__, err); + memset(&addr6, 0, sizeof(addr6)); + in6_splitscope((struct in6_addr *)&raddr6->sin6_addr, + &addr6, &scopeid); + err = fib6_lookup_nh_ext(RT_DEFAULT_FIB, &addr6, scopeid, + NHR_REF, 0, &nh6); + *ifp = nh6.nh_ifp; + if (err) + fib6_free_nh_ext(RT_DEFAULT_FIB, &nh6); + } + + CTR2(KTR_IW_CXGBE, "%s: return: %d", __func__, err); return err; } static void close_socket(struct socket *so) { - uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); } static void process_peer_close(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int disconnect = 1; int release = 0; CTR4(KTR_IW_CXGBE, "%s:ppcB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); - mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT: - CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc1 %p MPA_REQ_WAIT DEAD", __func__, ep); - __state_set(&ep->com, CLOSING); - break; - + /* Fallthrough */ case MPA_REQ_SENT: - CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT CLOSING", + CTR2(KTR_IW_CXGBE, "%s:ppc2 %p MPA_REQ_SENT DEAD", __func__, ep); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; connect_reply_upcall(ep, -ECONNABORTED); disconnect = 0; @@ -388,21 +608,20 @@ process_peer_close(struct c4iw_ep *ep) */ CTR2(KTR_IW_CXGBE, "%s:ppc3 %p MPA_REQ_RCVD CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); - c4iw_get_ep(&ep->com); + ep->com.state = CLOSING; break; case MPA_REP_SENT: CTR2(KTR_IW_CXGBE, "%s:ppc4 %p MPA_REP_SENT CLOSING", __func__, ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; break; case FPDU_MODE: CTR2(KTR_IW_CXGBE, "%s:ppc5 %p FPDU_MODE CLOSING", __func__, ep); START_EP_TIMER(ep); - __state_set(&ep->com, CLOSING); + ep->com.state = CLOSING; attrs.next_state = C4IW_QP_STATE_CLOSING; c4iw_modify_qp(ep->com.dev, ep->com.qp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); @@ -418,7 +637,7 @@ process_peer_close(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:ppc7 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; disconnect = 0; break; @@ -433,7 +652,7 @@ process_peer_close(struct c4iw_ep *ep) } close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; disconnect = 0; break; @@ -450,7 +669,6 @@ process_peer_close(struct c4iw_ep *ep) break; } - mutex_unlock(&ep->com.mutex); if (disconnect) { @@ -469,11 +687,10 @@ process_peer_close(struct c4iw_ep *ep) static void process_conn_error(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int ret; int state; - mutex_lock(&ep->com.mutex); state = ep->com.state; CTR5(KTR_IW_CXGBE, "%s:pceB ep %p so %p so->so_error %u state %s", __func__, ep, ep->com.so, ep->com.so->so_error, @@ -483,6 +700,7 @@ process_conn_error(struct c4iw_ep *ep) case MPA_REQ_WAIT: STOP_EP_TIMER(ep); + c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_SENT: @@ -496,13 +714,6 @@ process_conn_error(struct c4iw_ep *ep) break; case MPA_REQ_RCVD: - - /* - * We're gonna mark this puppy DEAD, but keep - * the reference on it until the ULP accepts or - * rejects the CR. - */ - c4iw_get_ep(&ep->com); break; case MORIBUND: @@ -531,7 +742,6 @@ process_conn_error(struct c4iw_ep *ep) case DEAD: CTR2(KTR_IW_CXGBE, "%s so_error %d IN DEAD STATE!!!!", __func__, ep->com.so->so_error); - mutex_unlock(&ep->com.mutex); return; default: @@ -541,10 +751,9 @@ process_conn_error(struct c4iw_ep *ep) if (state != ABORTING) { close_socket(ep->com.so); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); } - mutex_unlock(&ep->com.mutex); CTR2(KTR_IW_CXGBE, "%s:pceE %p", __func__, ep); return; } @@ -552,14 +761,13 @@ process_conn_error(struct c4iw_ep *ep) static void process_close_complete(struct c4iw_ep *ep) { - struct c4iw_qp_attributes attrs; + struct c4iw_qp_attributes attrs = {0}; int release = 0; CTR4(KTR_IW_CXGBE, "%s:pccB ep %p so %p state %s", __func__, ep, ep->com.so, states[ep->com.state]); /* The cm_id may be null if we failed to connect */ - mutex_lock(&ep->com.mutex); set_bit(CLOSE_CON_RPL, &ep->com.history); switch (ep->com.state) { @@ -567,7 +775,7 @@ process_close_complete(struct c4iw_ep *ep) case CLOSING: CTR2(KTR_IW_CXGBE, "%s:pcc1 %p CLOSING MORIBUND", __func__, ep); - __state_set(&ep->com, MORIBUND); + ep->com.state = MORIBUND; break; case MORIBUND: @@ -588,7 +796,7 @@ process_close_complete(struct c4iw_ep *ep) close_socket(ep->com.so); close_complete_upcall(ep, 0); - __state_set(&ep->com, DEAD); + ep->com.state = DEAD; release = 1; break; @@ -605,12 +813,11 @@ process_close_complete(struct c4iw_ep *ep) panic("%s:pcc6 %p unknown ep state", __func__, ep); break; } - mutex_unlock(&ep->com.mutex); if (release) { CTR2(KTR_IW_CXGBE, "%s:pcc8 %p", __func__, ep); - c4iw_put_ep(&ep->com); + release_ep_resources(ep); } CTR2(KTR_IW_CXGBE, "%s:pccE %p", __func__, ep); return; @@ -639,49 +846,56 @@ setiwsockopt(struct socket *so) static void init_iwarp_socket(struct socket *so, void *arg) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); - so->so_state |= SS_NBIO; - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_set(so, SO_RCV, c4iw_so_upcall, arg); + so->so_state |= SS_NBIO; + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void uninit_iwarp_socket(struct socket *so) { - - SOCKBUF_LOCK(&so->so_rcv); - soupcall_clear(so, SO_RCV); - SOCKBUF_UNLOCK(&so->so_rcv); + if (SOLISTENING(so)) { + SOLISTEN_LOCK(so); + solisten_upcall_set(so, NULL, NULL); + SOLISTEN_UNLOCK(so); + } else { + SOCKBUF_LOCK(&so->so_rcv); + soupcall_clear(so, SO_RCV); + SOCKBUF_UNLOCK(&so->so_rcv); + } } static void process_data(struct c4iw_ep *ep) { - struct sockaddr_in *local, *remote; int disconnect = 0; CTR5(KTR_IW_CXGBE, "%s: so %p, ep %p, state %s, sbused %d", __func__, ep->com.so, ep, states[ep->com.state], sbused(&ep->com.so->so_rcv)); - switch (state_read(&ep->com)) { + switch (ep->com.state) { case MPA_REQ_SENT: disconnect = process_mpa_reply(ep); break; case MPA_REQ_WAIT: - in_getsockaddr(ep->com.so, (struct sockaddr **)&local); - in_getpeeraddr(ep->com.so, (struct sockaddr **)&remote); - ep->com.local_addr = *local; - ep->com.remote_addr = *remote; - free(local, M_SONAME); - free(remote, M_SONAME); disconnect = process_mpa_request(ep); + if (disconnect) + /* Refered in process_newconn() */ + c4iw_put_ep(&ep->parent_ep->com); break; default: if (sbused(&ep->com.so->so_rcv)) log(LOG_ERR, "%s: Unexpected streaming data. ep %p, " "state %d, so %p, so_state 0x%x, sbused %u\n", - __func__, ep, state_read(&ep->com), ep->com.so, + __func__, ep, ep->com.state, ep->com.so, ep->com.so->so_state, sbused(&ep->com.so->so_rcv)); break; } @@ -705,58 +919,122 @@ process_connected(struct c4iw_ep *ep) return; err: close_socket(so); - state_set(&ep->com, DEAD); + ep->com.state = DEAD; c4iw_put_ep(&ep->com); return; } -void -process_newconn(struct iw_cm_id *parent_cm_id, struct socket *child_so) +static inline int c4iw_zero_addr(struct sockaddr *addr) { - struct c4iw_ep *child_ep; - struct sockaddr_in *local; - struct sockaddr_in *remote; - struct c4iw_ep *parent_ep = parent_cm_id->provider_data; + struct in6_addr *ip6; + + if (addr->sa_family == AF_INET) + return IN_ZERONET( + ntohl(((struct sockaddr_in *)addr)->sin_addr.s_addr)); + else { + ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr; + return (ip6->s6_addr32[0] | ip6->s6_addr32[1] | + ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0; + } +} + +static inline int c4iw_loopback_addr(struct sockaddr *addr) +{ + if (addr->sa_family == AF_INET) + return IN_LOOPBACK( + ntohl(((struct sockaddr_in *) addr)->sin_addr.s_addr)); + else + return IN6_IS_ADDR_LOOPBACK( + &((struct sockaddr_in6 *) addr)->sin6_addr); +} + +static inline int c4iw_any_addr(struct sockaddr *addr) +{ + return c4iw_zero_addr(addr) || c4iw_loopback_addr(addr); +} + +static void +process_newconn(struct c4iw_listen_ep *master_lep, struct socket *new_so) +{ + struct c4iw_listen_ep *real_lep = NULL; + struct c4iw_ep *new_ep = NULL; + struct sockaddr_in *remote = NULL; int ret = 0; - MPASS(child_so != NULL); + MPASS(new_so != NULL); - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (c4iw_any_addr((struct sockaddr *)&master_lep->com.local_addr)) { + /* Here we need to find the 'real_lep' that belongs to the + * incomming socket's network interface, such that the newly + * created 'ep' can be attached to the real 'lep'. + */ + real_lep = find_real_listen_ep(master_lep, new_so); + if (real_lep == NULL) { + CTR2(KTR_IW_CXGBE, "%s: Could not find the real listen " + "ep for sock: %p", __func__, new_so); + log(LOG_ERR,"%s: Could not find the real listen ep for " + "sock: %p\n", __func__, new_so); + /* FIXME: properly free the 'new_so' in failure case. + * Use of soabort() and soclose() are not legal + * here(before soaccept()). + */ + return; + } + } else /* for Non-Wildcard address, master_lep is always the real_lep */ + real_lep = master_lep; - CTR5(KTR_IW_CXGBE, - "%s: parent so %p, parent ep %p, child so %p, child ep %p", - __func__, parent_ep->com.so, parent_ep, child_so, child_ep); + new_ep = alloc_ep(sizeof(*new_ep), GFP_KERNEL); - in_getsockaddr(child_so, (struct sockaddr **)&local); - in_getpeeraddr(child_so, (struct sockaddr **)&remote); + CTR6(KTR_IW_CXGBE, "%s: master_lep %p, real_lep: %p, new ep %p, " + "listening so %p, new so %p", __func__, master_lep, real_lep, + new_ep, master_lep->com.so, new_so); - child_ep->com.local_addr = *local; - child_ep->com.remote_addr = *remote; - child_ep->com.dev = parent_ep->com.dev; - child_ep->com.so = child_so; - child_ep->com.cm_id = NULL; - child_ep->com.thread = parent_ep->com.thread; - child_ep->parent_ep = parent_ep; + new_ep->com.dev = real_lep->com.dev; + new_ep->com.so = new_so; + new_ep->com.cm_id = NULL; + new_ep->com.thread = real_lep->com.thread; + new_ep->parent_ep = real_lep; - free(local, M_SONAME); + GET_LOCAL_ADDR(&new_ep->com.local_addr, new_so); + GET_REMOTE_ADDR(&new_ep->com.remote_addr, new_so); + c4iw_get_ep(&real_lep->com); + init_timer(&new_ep->timer); + new_ep->com.state = MPA_REQ_WAIT; + START_EP_TIMER(new_ep); + + setiwsockopt(new_so); + ret = soaccept(new_so, (struct sockaddr **)&remote); + if (ret != 0) { + CTR4(KTR_IW_CXGBE, + "%s:listen sock:%p, new sock:%p, ret:%d\n", + __func__, master_lep->com.so, new_so, ret); + if (remote != NULL) + free(remote, M_SONAME); + uninit_iwarp_socket(new_so); + soclose(new_so); + c4iw_put_ep(&new_ep->com); + c4iw_put_ep(&real_lep->com); + return; + } free(remote, M_SONAME); - setiwsockopt(child_so); - init_iwarp_socket(child_so, &child_ep->com); - c4iw_get_ep(&parent_ep->com); - init_timer(&child_ep->timer); - state_set(&child_ep->com, MPA_REQ_WAIT); - START_EP_TIMER(child_ep); + /* MPA request might have been queued up on the socket already, so we + * initialize the socket/upcall_handler under lock to prevent processing + * MPA request on another thread(via process_req()) simultaniously. + */ + c4iw_get_ep(&new_ep->com); /* Dereferenced at the end below, this is to + avoid freeing of ep before ep unlock. */ + mutex_lock(&new_ep->com.mutex); + init_iwarp_socket(new_so, &new_ep->com); - /* maybe the request has already been queued up on the socket... */ - ret = process_mpa_request(child_ep); - if (ret == 2) + ret = process_mpa_request(new_ep); + if (ret) { /* ABORT */ - c4iw_ep_disconnect(child_ep, 1, GFP_KERNEL); - else if (ret == 1) - /* CLOSE */ - c4iw_ep_disconnect(child_ep, 0, GFP_KERNEL); - + c4iw_ep_disconnect(new_ep, 1, GFP_KERNEL); + c4iw_put_ep(&real_lep->com); + } + mutex_unlock(&new_ep->com.mutex); + c4iw_put_ep(&new_ep->com); return; } @@ -790,6 +1068,12 @@ c4iw_so_upcall(struct socket *so, void *arg, int waitf ep->com.entry.tqe_prev); MPASS(ep->com.so == so); + /* + * Wake up any threads waiting in rdma_init()/rdma_fini(), + * with locks held. + */ + if (so->so_error) + c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET); add_ep_to_req_list(ep, C4IW_EVENT_SOCKET); return (SU_OK); @@ -820,9 +1104,15 @@ terminate(struct sge_iq *iq, const struct rss_header * static void process_socket_event(struct c4iw_ep *ep) { - int state = state_read(&ep->com); + int state = ep->com.state; struct socket *so = ep->com.so; + if (ep->com.state == DEAD) { + CTR3(KTR_IW_CXGBE, "%s: Pending socket event discarded " + "ep %p ep_state %s", __func__, ep, states[state]); + return; + } + CTR6(KTR_IW_CXGBE, "process_socket_event: so %p, so_state 0x%x, " "so_err %d, sb_state 0x%x, ep %p, ep_state %s", so, so->so_state, so->so_error, so->so_rcv.sb_state, ep, states[state]); @@ -833,10 +1123,29 @@ process_socket_event(struct c4iw_ep *ep) } if (state == LISTEN) { - /* socket listening events are handled at IWCM */ - CTR3(KTR_IW_CXGBE, "%s Invalid ep state:%u, ep:%p", __func__, - ep->com.state, ep); - BUG(); + struct c4iw_listen_ep *lep = (struct c4iw_listen_ep *)ep; + struct socket *listen_so = so, *new_so = NULL; + int error = 0; + + SOLISTEN_LOCK(listen_so); + do { + error = solisten_dequeue(listen_so, &new_so, + SOCK_NONBLOCK); + if (error) { + CTR4(KTR_IW_CXGBE, "%s: lep %p listen_so %p " + "error %d", __func__, lep, listen_so, + error); + return; + } + process_newconn(lep, new_so); + + /* solisten_dequeue() unlocks while return, so aquire + * lock again for sol_qlen and also for next iteration. + */ + SOLISTEN_LOCK(listen_so); + } while (listen_so->sol_qlen); + SOLISTEN_UNLOCK(listen_so); + return; } @@ -955,34 +1264,6 @@ stop_ep_timer(struct c4iw_ep *ep) return 1; } -static enum -c4iw_ep_state state_read(struct c4iw_ep_common *epc) -{ - enum c4iw_ep_state state; - - mutex_lock(&epc->mutex); - state = epc->state; - mutex_unlock(&epc->mutex); - - return (state); -} - -static void -__state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) -{ - - epc->state = new; -} - -static void -state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new) -{ - - mutex_lock(&epc->mutex); - __state_set(epc, new); - mutex_unlock(&epc->mutex); -} - static void * alloc_ep(int size, gfp_t gfp) { @@ -1059,8 +1340,8 @@ send_mpa_req(struct c4iw_ep *ep) } if (mpa_rev_to_use == 2) { - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons((u16)ep->ird); mpa_v2_params.ord = htons((u16)ep->ord); @@ -1112,7 +1393,7 @@ send_mpa_req(struct c4iw_ep *ep) } START_EP_TIMER(ep); - state_set(&ep->com, MPA_REQ_SENT); + ep->com.state = MPA_REQ_SENT; ep->mpa_attr.initiator = 1; CTR3(KTR_IW_CXGBE, "%s:smrE %p, error: %d", __func__, ep, err); return 0; @@ -1155,8 +1436,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { mpa->flags |= MPA_ENHANCED_RDMA_CONN; - mpa->private_data_size += - htons(sizeof(struct mpa_v2_conn_params)); + mpa->private_data_size = htons(ntohs(mpa->private_data_size) + + sizeof(struct mpa_v2_conn_params)); mpa_v2_params.ird = htons(((u16)ep->ird) | (peer2peer ? MPA_V2_PEER2PEER_MODEL : 0)); @@ -1171,7 +1452,7 @@ static int send_mpa_reject(struct c4iw_ep *ep, const v if (ep->plen) memcpy(mpa->private_data + - sizeof(struct mpa_v2_conn_params), pdata, plen); + sizeof(struct mpa_v2_conn_params), pdata, plen); CTR5(KTR_IW_CXGBE, "%s:smrej3 %p %d %d %d", __func__, ep, mpa_v2_params.ird, mpa_v2_params.ord, ep->plen); } else @@ -1275,7 +1556,7 @@ static int send_mpa_reply(struct c4iw_ep *ep, const vo free(mpa, M_CXGBE); - state_set(&ep->com, MPA_REP_SENT); + ep->com.state = MPA_REP_SENT; ep->snd_seq += mpalen; err = -sosend(ep->com.so, NULL, NULL, m, NULL, MSG_DONTWAIT, ep->com.thread); @@ -1332,17 +1613,17 @@ send_abort(struct c4iw_ep *ep) } uninit_iwarp_socket(so); - sodisconnect(so); + soclose(so); set_bit(ABORT_CONN, &ep->com.history); /* * TBD: iw_cxgbe driver should receive ABORT reply for every ABORT * request it has sent. But the current TOE driver is not propagating * this ABORT reply event (via do_abort_rpl) to iw_cxgbe. So as a work- - * around de-refer 'ep' (which was refered before sending ABORT request) - * here instead of doing it in abort_rpl() handler of iw_cxgbe driver. + * around de-refererece 'ep' here instead of doing it in abort_rpl() + * handler(not yet implemented) of iw_cxgbe driver. */ - c4iw_put_ep(&ep->com); + release_ep_resources(ep); return (0); } @@ -1401,6 +1682,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i CTR2(KTR_IW_CXGBE, "%s:cru1 %p", __func__, ep); /* this means MPA_v2 is used */ + event.ord = ep->ird; + event.ird = ep->ord; event.private_data_len = ep->plen - sizeof(struct mpa_v2_conn_params); event.private_data = ep->mpa_pkt + @@ -1410,6 +1693,8 @@ static void connect_reply_upcall(struct c4iw_ep *ep, i CTR2(KTR_IW_CXGBE, "%s:cru2 %p", __func__, ep); /* this means MPA_v1 is used */ + event.ord = c4iw_max_read_depth; + event.ird = c4iw_max_read_depth; event.private_data_len = ep->plen; event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); @@ -1451,7 +1736,6 @@ static int connect_request_upcall(struct c4iw_ep *ep) event.local_addr = ep->com.local_addr; event.remote_addr = ep->com.remote_addr; event.provider_data = ep; - event.so = ep->com.so; if (!ep->tried_with_mpa_v1) { /* this means MPA_v2 is used */ @@ -1473,11 +1757,18 @@ static int connect_request_upcall(struct c4iw_ep *ep) c4iw_get_ep(&ep->com); ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id, &event); - if(ret) + if(ret) { + CTR3(KTR_IW_CXGBE, "%s: ep %p, Failure while notifying event to" + " IWCM, err:%d", __func__, ep, ret); c4iw_put_ep(&ep->com); + } else + /* Dereference parent_ep only in success case. + * In case of failure, parent_ep is dereferenced by the caller + * of process_mpa_request(). + */ + c4iw_put_ep(&ep->parent_ep->com); set_bit(CONNREQ_UPCALL, &ep->com.history); - c4iw_put_ep(&ep->parent_ep->com); return ret; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***