From owner-svn-src-head@freebsd.org Thu Aug 25 21:55:18 2016 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id C14BCBC6D6C; Thu, 25 Aug 2016 21:55:18 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 9F10017DE; Thu, 25 Aug 2016 21:55:18 +0000 (UTC) (envelope-from np@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u7PLtHd4075252; Thu, 25 Aug 2016 21:55:17 GMT (envelope-from np@FreeBSD.org) Received: (from np@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u7PLtHQN075249; Thu, 25 Aug 2016 21:55:17 GMT (envelope-from np@FreeBSD.org) Message-Id: <201608252155.u7PLtHQN075249@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: np set sender to np@FreeBSD.org using -f From: Navdeep Parhar Date: Thu, 25 Aug 2016 21:55:17 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r304822 - head/sys/dev/cxgbe/cxgbei X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 25 Aug 2016 21:55:18 -0000 Author: np Date: Thu Aug 25 21:55:17 2016 New Revision: 304822 URL: https://svnweb.freebsd.org/changeset/base/304822 Log: cxgbe/cxgbei: Read the chip's configuration to determine the actual hardware send and receive PDU limits. Report these limits to ICL and take them into account when setting the socket's send and receive buffer sizes. The driver used a single hardcoded limit everywhere prior to this change. Sponsored by: Chelsio Communications Modified: head/sys/dev/cxgbe/cxgbei/cxgbei.c head/sys/dev/cxgbe/cxgbei/cxgbei.h head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c Modified: head/sys/dev/cxgbe/cxgbei/cxgbei.c ============================================================================== --- head/sys/dev/cxgbe/cxgbei/cxgbei.c Thu Aug 25 21:33:39 2016 (r304821) +++ head/sys/dev/cxgbe/cxgbei/cxgbei.c Thu Aug 25 21:55:17 2016 (r304822) @@ -472,17 +472,47 @@ t4_sk_ddp_tag_release(struct icl_cxgbei_ return (0); } +static void +read_pdu_limits(struct adapter *sc, uint32_t *max_tx_pdu_len, + uint32_t *max_rx_pdu_len) +{ + uint32_t tx_len, rx_len, r, v; + + rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE); + tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); + + r = t4_read_reg(sc, A_TP_PARA_REG2); + rx_len = min(rx_len, G_MAXRXDATA(r)); + tx_len = min(tx_len, G_MAXRXDATA(r)); + + r = t4_read_reg(sc, A_TP_PARA_REG7); + v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r)); + rx_len = min(rx_len, v); + tx_len = min(tx_len, v); + + /* Remove after FW_FLOWC_MNEM_TXDATAPLEN_MAX fix in firmware. */ + tx_len = min(tx_len, 3 * 4096); + + *max_tx_pdu_len = rounddown2(tx_len, 512); + *max_rx_pdu_len = rounddown2(rx_len, 512); +} + +/* + * Initialize the software state of the iSCSI ULP driver. + * + * ENXIO means firmware didn't set up something that it was supposed to. + */ static int -cxgbei_ddp_init(struct adapter *sc, struct cxgbei_data *ci) +cxgbei_init(struct adapter *sc, struct cxgbei_data *ci) { - int nppods, bits, max_sz, rc; + int nppods, bits, rc; static const u_int pgsz_order[] = {0, 1, 2, 3}; MPASS(sc->vres.iscsi.size > 0); ci->llimit = sc->vres.iscsi.start; ci->ulimit = sc->vres.iscsi.start + sc->vres.iscsi.size - 1; - max_sz = G_MAXRXDATA(t4_read_reg(sc, A_TP_PARA_REG2)); + read_pdu_limits(sc, &ci->max_tx_pdu_len, &ci->max_rx_pdu_len); nppods = sc->vres.iscsi.size >> IPPOD_SIZE_SHIFT; if (nppods <= 1024) @@ -513,7 +543,6 @@ cxgbei_ddp_init(struct adapter *sc, stru } mtx_init(&ci->map_lock, "ddp lock", NULL, MTX_DEF | MTX_DUPOK); - ci->max_txsz = ci->max_rxsz = min(max_sz, ULP2_MAX_PKT_SIZE); ci->nppods = nppods; ci->idx_last = nppods; ci->idx_bits = bits; @@ -811,7 +840,7 @@ cxgbei_activate(struct adapter *sc) if (ci == NULL) return (ENOMEM); - rc = cxgbei_ddp_init(sc, ci); + rc = cxgbei_init(sc, ci); if (rc != 0) { free(ci, M_CXGBE); return (rc); Modified: head/sys/dev/cxgbe/cxgbei/cxgbei.h ============================================================================== --- head/sys/dev/cxgbe/cxgbei/cxgbei.h Thu Aug 25 21:33:39 2016 (r304821) +++ head/sys/dev/cxgbe/cxgbei/cxgbei.h Thu Aug 25 21:55:17 2016 (r304822) @@ -135,8 +135,6 @@ struct cxgbei_ulp2_tag_format { }; struct cxgbei_data { - u_int max_txsz; - u_int max_rxsz; u_int llimit; u_int ulimit; u_int nppods; @@ -144,6 +142,8 @@ struct cxgbei_data { u_char idx_bits; uint32_t idx_mask; uint32_t rsvd_tag_mask; + u_int max_tx_pdu_len; + u_int max_rx_pdu_len; struct mtx map_lock; bus_dma_tag_t ulp_ddp_tag; Modified: head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c ============================================================================== --- head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c Thu Aug 25 21:33:39 2016 (r304821) +++ head/sys/dev/cxgbe/cxgbei/icl_cxgbei.c Thu Aug 25 21:55:17 2016 (r304822) @@ -137,16 +137,6 @@ static kobj_method_t icl_cxgbei_methods[ DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); -#if 0 -/* - * Subtract another 256 for AHS from MAX_DSL if AHS could be used. - */ -#define CXGBEI_MAX_PDU 16224 -#define CXGBEI_MAX_DSL (CXGBEI_MAX_PDU - sizeof(struct iscsi_bhs) - 8) -#endif -#define CXGBEI_MAX_DSL 8192 -#define CXGBEI_MAX_PDU (CXGBEI_MAX_DSL + sizeof(struct iscsi_bhs) + 8) - void icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) { @@ -339,7 +329,7 @@ icl_cxgbei_conn_pdu_append_data(struct i if (__predict_true(m_append(m, len, addr) != 0)) { ip->ip_data_len += len; - MPASS(ip->ip_data_len <= CXGBEI_MAX_DSL); + MPASS(ip->ip_data_len <= ic->ic_max_data_segment_length); return (0); } else { if (flags & M_WAITOK) { @@ -386,7 +376,6 @@ icl_cxgbei_conn_pdu_queue(struct icl_con m = finalize_pdu(icc, icp); M_ASSERTPKTHDR(m); MPASS((m->m_pkthdr.len & 3) == 0); - MPASS(m->m_pkthdr.len + 8 <= CXGBEI_MAX_PDU); /* * Do not get inp from toep->inp as the toepcb might have detached @@ -427,7 +416,8 @@ icl_cxgbei_new_conn(const char *name, st #ifdef DIAGNOSTIC refcount_init(&ic->ic_outstanding_pdus, 0); #endif - ic->ic_max_data_segment_length = CXGBEI_MAX_DSL; + /* This is a stop-gap value that will be corrected during handoff. */ + ic->ic_max_data_segment_length = 16384; ic->ic_name = name; ic->ic_offload = "cxgbei"; ic->ic_unmapped = false; @@ -454,29 +444,16 @@ icl_cxgbei_conn_free(struct icl_conn *ic } static int -icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so) +icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so, int sspace, + int rspace) { - size_t minspace; struct sockopt opt; - int error, one = 1; + int error, one = 1, ss, rs; - /* - * For sendspace, this is required because the current code cannot - * send a PDU in pieces; thus, the minimum buffer size is equal - * to the maximum PDU size. "+4" is to account for possible padding. - * - * What we should actually do here is to use autoscaling, but set - * some minimal buffer size to "minspace". I don't know a way to do - * that, though. - */ - minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + - ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; - if (sendspace < minspace) - sendspace = minspace; - if (recvspace < minspace) - recvspace = minspace; + ss = max(sendspace, sspace); + rs = max(recvspace, rspace); - error = soreserve(so, sendspace, recvspace); + error = soreserve(so, ss, rs); if (error != 0) { icl_cxgbei_conn_close(ic); return (error); @@ -611,6 +588,7 @@ int icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) { struct icl_cxgbei_conn *icc = ic_to_icc(ic); + struct cxgbei_data *ci; struct find_ofld_adapter_rr fa; struct file *fp; struct socket *so; @@ -661,10 +639,7 @@ icl_cxgbei_conn_handoff(struct icl_conn if (fa.sc == NULL) return (EINVAL); icc->sc = fa.sc; - - error = icl_cxgbei_setsockopt(ic, so); - if (error) - return (error); + ci = icc->sc->iscsi_ulp_softc; inp = sotoinpcb(so); INP_WLOCK(inp); @@ -682,22 +657,43 @@ icl_cxgbei_conn_handoff(struct icl_conn MPASS(toep->vi->pi->adapter == icc->sc); icc->toep = toep; icc->cwt = cxgbei_select_worker_thread(icc); + + /* + * We maintain the _send_ DSL in this field just to have a + * convenient way to assert that the kernel never sends + * oversized PDUs. This field is otherwise unused in the driver + * or the kernel. + */ + ic->ic_max_data_segment_length = ci->max_tx_pdu_len - + ISCSI_BHS_SIZE; + icc->ulp_submode = 0; - if (ic->ic_header_crc32c) + if (ic->ic_header_crc32c) { icc->ulp_submode |= ULP_CRC_HEADER; - if (ic->ic_data_crc32c) + ic->ic_max_data_segment_length -= + ISCSI_HEADER_DIGEST_SIZE; + } + if (ic->ic_data_crc32c) { icc->ulp_submode |= ULP_CRC_DATA; + ic->ic_max_data_segment_length -= + ISCSI_DATA_DIGEST_SIZE; + } so->so_options |= SO_NO_DDP; toep->ulp_mode = ULP_MODE_ISCSI; toep->ulpcb = icc; - send_iscsi_flowc_wr(icc->sc, toep, CXGBEI_MAX_PDU); + send_iscsi_flowc_wr(icc->sc, toep, ci->max_tx_pdu_len); set_ulp_mode_iscsi(icc->sc, toep, ic->ic_header_crc32c, ic->ic_data_crc32c); error = 0; } INP_WUNLOCK(inp); + if (error == 0) { + error = icl_cxgbei_setsockopt(ic, so, ci->max_tx_pdu_len, + ci->max_rx_pdu_len); + } + return (error); } @@ -831,14 +827,52 @@ icl_cxgbei_conn_transfer_done(struct icl uma_zfree(icl_transfer_zone, prv); } +static void +cxgbei_limits(struct adapter *sc, void *arg) +{ + struct icl_drv_limits *idl = arg; + struct cxgbei_data *ci; + int max_dsl; + + if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims") != 0) + return; + + if (uld_active(sc, ULD_ISCSI)) { + ci = sc->iscsi_ulp_softc; + MPASS(ci != NULL); + + /* + * AHS is not supported by the kernel so we'll not account for + * it either in our PDU len -> data segment len conversions. + */ + + max_dsl = ci->max_rx_pdu_len - ISCSI_BHS_SIZE - + ISCSI_HEADER_DIGEST_SIZE - ISCSI_DATA_DIGEST_SIZE; + if (idl->idl_max_recv_data_segment_length > max_dsl) + idl->idl_max_recv_data_segment_length = max_dsl; + + max_dsl = ci->max_tx_pdu_len - ISCSI_BHS_SIZE - + ISCSI_HEADER_DIGEST_SIZE - ISCSI_DATA_DIGEST_SIZE; + if (idl->idl_max_send_data_segment_length > max_dsl) + idl->idl_max_send_data_segment_length = max_dsl; + } + + end_synchronized_op(sc, LOCK_HELD); +} + static int icl_cxgbei_limits(struct icl_drv_limits *idl) { - idl->idl_max_recv_data_segment_length = CXGBEI_MAX_DSL; - idl->idl_max_send_data_segment_length = CXGBEI_MAX_DSL; + /* Maximum allowed by the RFC. cxgbei_limits will clip them. */ + idl->idl_max_recv_data_segment_length = (1 << 24) - 1; + idl->idl_max_send_data_segment_length = (1 << 24) - 1; + + /* These are somewhat arbitrary. */ idl->idl_max_burst_length = 2 * 1024 * 1024; - idl->idl_first_burst_length = CXGBEI_MAX_DSL; + idl->idl_first_burst_length = 8192; + + t4_iterate(cxgbei_limits, idl); return (0); }