Date: Fri, 19 Jan 2024 17:17:35 GMT From: Alexander Motin <mav@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: fda7cc79aaf6 - stable/14 - mpi3mr: Fix EINPROGRESS errors hanging the card Message-ID: <202401191717.40JHHZPH097062@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by mav: URL: https://cgit.FreeBSD.org/src/commit/?id=fda7cc79aaf65a5a5d62627602d6456c70309ce1 commit fda7cc79aaf65a5a5d62627602d6456c70309ce1 Author: Warner Losh <imp@FreeBSD.org> AuthorDate: 2023-11-29 01:49:39 +0000 Commit: Alexander Motin <mav@FreeBSD.org> CommitDate: 2024-01-19 17:16:50 +0000 mpi3mr: Fix EINPROGRESS errors hanging the card Move enqueueing of commands to bus_dmamap_load_ccb callback Fix fundamental difference between FreeBSD and Linux. On Linux, your dma load callback always happends before it returns, so drivers are written to load the map, then submit to hardware. On FreeBSD, the callback may be deferred and return EINPROGRESS. This means the callback is responsible for queueing the request to the hardware is done after the SGL list is created. Make a number of interrelated cahnages: At the end of mpi3mr_prepare_sgls, add a call to mpi3mr_enqueue_request. Split the hardware submission out from the end of mpi3mr_action_scsiio and move it into a new routine mpi3mr_enqueue_request. Move all error completion from the end of mpi3mr_action_scsiio to where the error is detected. We cannot pass errors back from the mpi3mr_enqueue_request to do this on a 'failed' mpi3mr in a centralized place (since it has to be fire and forget). Add comments about zero length SGLs never making it into mpi3mr_prepare_sgls. Keep the code there for the moment, but we only set cm->data to non-NULL when scsiio_req->DataLength is not zero. So the datalength can't be zero and we can't send the zero SGLs. Add commentts about other "impossible" tests in mpi3mr_prepare_sgls that really should be simple asserts of some flavor. Eliminate cm->error_code, since we can't pass data back from the mpi3mr_prepare_sgl callback anymore. In mpi3mr_map_request, call mpi3mr_enqueue_request for the no data case. This seems to work even though we've not done the special zero length handling that was in mpi3mr_prepare_sgls, giving further evidence to it not actually being needed. This is needed for SCSI CDBs that have no data to pass to the drive like TEST UNIT READY. With this change, and the prior ones, we're now able to run with mpi3mr on 128GB systems and very heavy disk load (so many buffers land > 4GB: the driver instructs busdma to never use memory abouve 4GB, which may be too conservative, but an issue for another time). Sponsored by: Netflix Reviewed by: sumit.saxena_broadcom.com, mav, jhb Differential Revision: https://reviews.freebsd.org/D42543 (cherry picked from commit 3208a189c1e2c4ef35daa432fe45629a043d7047) --- sys/dev/mpi3mr/mpi3mr.h | 1 - sys/dev/mpi3mr/mpi3mr_cam.c | 130 +++++++++++++++++++++++++------------------- 2 files changed, 73 insertions(+), 58 deletions(-) diff --git a/sys/dev/mpi3mr/mpi3mr.h b/sys/dev/mpi3mr/mpi3mr.h index 2226c747d3cf..f1a2cbc0fd4c 100644 --- a/sys/dev/mpi3mr/mpi3mr.h +++ b/sys/dev/mpi3mr/mpi3mr.h @@ -467,7 +467,6 @@ struct mpi3mr_cmd { U16 hosttag; U8 req_qidx; Mpi3SCSIIORequest_t io_request; - int error_code; }; struct mpi3mr_chain { diff --git a/sys/dev/mpi3mr/mpi3mr_cam.c b/sys/dev/mpi3mr/mpi3mr_cam.c index b6e47eac058a..15ef2732ec56 100644 --- a/sys/dev/mpi3mr/mpi3mr_cam.c +++ b/sys/dev/mpi3mr/mpi3mr_cam.c @@ -86,7 +86,9 @@ #define smp_processor_id() PCPU_GET(cpuid) -static int +static void +mpi3mr_enqueue_request(struct mpi3mr_softc *sc, struct mpi3mr_cmd *cm); +static void mpi3mr_map_request(struct mpi3mr_softc *sc, struct mpi3mr_cmd *cm); void mpi3mr_release_simq_reinit(struct mpi3mr_cam_softc *cam_sc); @@ -118,18 +120,23 @@ static void mpi3mr_prepare_sgls(void *arg, U8 last_chain_sgl_flags; struct mpi3mr_chain *chain_req; Mpi3SCSIIORequest_t *scsiio_req; + union ccb *ccb; cm = (struct mpi3mr_cmd *)arg; sc = cm->sc; scsiio_req = (Mpi3SCSIIORequest_t *) &cm->io_request; + ccb = cm->ccb; if (error) { - cm->error_code = error; device_printf(sc->mpi3mr_dev, "%s: error=%d\n",__func__, error); if (error == EFBIG) { - cm->ccb->ccb_h.status = CAM_REQ_TOO_BIG; - return; + mpi3mr_set_ccbstatus(ccb, CAM_REQ_TOO_BIG); + } else { + mpi3mr_set_ccbstatus(ccb, CAM_REQ_CMP_ERR); } + mpi3mr_release_command(cm); + xpt_done(ccb); + return; } if (cm->data_dir == MPI3MR_READ) @@ -138,10 +145,9 @@ static void mpi3mr_prepare_sgls(void *arg, if (cm->data_dir == MPI3MR_WRITE) bus_dmamap_sync(sc->buffer_dmat, cm->dmamap, BUS_DMASYNC_PREWRITE); - if (nsegs > MPI3MR_SG_DEPTH) { - device_printf(sc->mpi3mr_dev, "SGE count is too large or 0.\n"); - return; - } + + KASSERT(nsegs <= MPI3MR_SG_DEPTH && nsegs > 0, + ("%s: bad SGE count: %d\n", device_get_nameunit(sc->mpi3mr_dev), nsegs)); simple_sgl_flags = MPI3_SGE_FLAGS_ELEMENT_TYPE_SIMPLE | MPI3_SGE_FLAGS_DLAS_SYSTEM; @@ -152,24 +158,15 @@ static void mpi3mr_prepare_sgls(void *arg, sg_local = (U8 *)&scsiio_req->SGL; - if (!scsiio_req->DataLength) { + if (scsiio_req->DataLength == 0) { + /* XXX we don't ever get here when DataLength == 0, right? cm->data is NULL */ + /* This whole if can likely be removed -- we handle it in mpi3mr_request_map */ mpi3mr_build_zero_len_sge(sg_local); - return; + goto enqueue; } sges_left = nsegs; - if (sges_left < 0) { - printf("scsi_dma_map failed: request for %d bytes!\n", - scsiio_req->DataLength); - return; - } - if (sges_left > MPI3MR_SG_DEPTH) { - printf("scsi_dma_map returned unsupported sge count %d!\n", - sges_left); - return; - } - sges_in_segment = (sc->facts.op_req_sz - offsetof(Mpi3SCSIIORequest_t, SGL))/sizeof(Mpi3SGESimple_t); @@ -218,33 +215,51 @@ fill_in_last_segment: i++; } +enqueue: + /* + * Now that we've created the sgls, we send the request to the device. + * Unlike in Linux, dmaload isn't guaranteed to load every time, but + * this function is always called when the resources are available, so + * we can send the request to hardware here always. mpi3mr_map_request + * knows about this quirk and will only take evasive action when an + * error other than EINPROGRESS is returned from dmaload. + */ + mpi3mr_enqueue_request(sc, cm); + return; } -int +static void mpi3mr_map_request(struct mpi3mr_softc *sc, struct mpi3mr_cmd *cm) { u_int32_t retcode = 0; + union ccb *ccb; + ccb = cm->ccb; if (cm->data != NULL) { mtx_lock(&sc->io_lock); /* Map data buffer into bus space */ retcode = bus_dmamap_load_ccb(sc->buffer_dmat, cm->dmamap, - cm->ccb, mpi3mr_prepare_sgls, cm, 0); + ccb, mpi3mr_prepare_sgls, cm, 0); mtx_unlock(&sc->io_lock); - if (retcode) - device_printf(sc->mpi3mr_dev, "bus_dmamap_load(): retcode = %d\n", retcode); - if (retcode == EINPROGRESS) { - device_printf(sc->mpi3mr_dev, "request load in progress\n"); - xpt_freeze_simq(sc->cam_sc->sim, 1); + if (retcode != 0 && retcode != EINPROGRESS) { + device_printf(sc->mpi3mr_dev, + "bus_dmamap_load(): retcode = %d\n", retcode); + /* + * Any other error means prepare_sgls wasn't called, and + * will never be called, so we have to mop up. This error + * should never happen, though. + */ + mpi3mr_set_ccbstatus(ccb, CAM_REQ_CMP_ERR); + mpi3mr_release_command(cm); + xpt_done(ccb); } + } else { + /* + * No data, we enqueue it directly here. + */ + mpi3mr_enqueue_request(sc, cm); } - if (cm->error_code) - return cm->error_code; - if (retcode) - mpi3mr_set_ccbstatus(cm->ccb, CAM_REQ_INVALID); - - return (retcode); } void @@ -912,12 +927,6 @@ mpi3mr_action_scsiio(struct mpi3mr_cam_softc *cam_sc, union ccb *ccb) struct mpi3mr_cmd *cm; uint8_t scsi_opcode, queue_idx; uint32_t mpi_control; - struct mpi3mr_op_req_queue *opreqq = NULL; - U32 data_len_blks = 0; - U32 tracked_io_sz = 0; - U32 ioc_pend_data_len = 0, tg_pend_data_len = 0; - struct mpi3mr_throttle_group_info *tg = NULL; - static int ratelimit; sc = cam_sc->sc; mtx_assert(&sc->mpi3mr_mtx, MA_OWNED); @@ -1104,15 +1113,15 @@ mpi3mr_action_scsiio(struct mpi3mr_cam_softc *cam_sc, union ccb *ccb) case CAM_DATA_SG_PADDR: device_printf(sc->mpi3mr_dev, "%s: physical addresses not supported\n", __func__); - mpi3mr_release_command(cm); mpi3mr_set_ccbstatus(ccb, CAM_REQ_INVALID); + mpi3mr_release_command(cm); xpt_done(ccb); return; case CAM_DATA_SG: device_printf(sc->mpi3mr_dev, "%s: scatter gather is not supported\n", __func__); - mpi3mr_release_command(cm); mpi3mr_set_ccbstatus(ccb, CAM_REQ_INVALID); + mpi3mr_release_command(cm); xpt_done(ccb); return; case CAM_DATA_VADDR: @@ -1129,27 +1138,35 @@ mpi3mr_action_scsiio(struct mpi3mr_cam_softc *cam_sc, union ccb *ccb) cm->data = csio->data_ptr; break; default: - mpi3mr_release_command(cm); mpi3mr_set_ccbstatus(ccb, CAM_REQ_INVALID); - xpt_done(ccb); - return; - } - - /* Prepare SGEs */ - if (mpi3mr_map_request(sc, cm)) { mpi3mr_release_command(cm); xpt_done(ccb); - printf("func: %s line: %d Build SGLs failed\n", __func__, __LINE__); return; } - - opreqq = &sc->op_req_q[queue_idx]; + + /* Prepare SGEs and queue to hardware */ + mpi3mr_map_request(sc, cm); +} + +static void +mpi3mr_enqueue_request(struct mpi3mr_softc *sc, struct mpi3mr_cmd *cm) +{ + static int ratelimit; + struct mpi3mr_op_req_queue *opreqq = &sc->op_req_q[cm->req_qidx]; + struct mpi3mr_throttle_group_info *tg = NULL; + uint32_t data_len_blks = 0; + uint32_t tracked_io_sz = 0; + uint32_t ioc_pend_data_len = 0, tg_pend_data_len = 0; + struct mpi3mr_target *targ = cm->targ; + union ccb *ccb = cm->ccb; + Mpi3SCSIIORequest_t *req = (Mpi3SCSIIORequest_t *)&cm->io_request; if (sc->iot_enable) { - data_len_blks = csio->dxfer_len >> 9; - + data_len_blks = ccb->csio.dxfer_len >> 9; + if ((data_len_blks >= sc->io_throttle_data_length) && targ->io_throttle_enabled) { + tracked_io_sz = data_len_blks; tg = targ->throttle_group; if (tg) { @@ -1207,19 +1224,18 @@ mpi3mr_action_scsiio(struct mpi3mr_cam_softc *cam_sc, union ccb *ccb) if (targ->io_divert) { req->MsgFlags |= MPI3_SCSIIO_MSGFLAGS_DIVERT_TO_FIRMWARE; - mpi_control |= MPI3_SCSIIO_FLAGS_DIVERT_REASON_IO_THROTTLING; + req->Flags = htole32(le32toh(req->Flags) | MPI3_SCSIIO_FLAGS_DIVERT_REASON_IO_THROTTLING); } } - req->Flags = htole32(mpi_control); if (mpi3mr_submit_io(sc, opreqq, (U8 *)&cm->io_request)) { - mpi3mr_release_command(cm); if (tracked_io_sz) { mpi3mr_atomic_sub(&sc->pend_large_data_sz, tracked_io_sz); if (tg) mpi3mr_atomic_sub(&tg->pend_large_data_sz, tracked_io_sz); } mpi3mr_set_ccbstatus(ccb, CAM_RESRC_UNAVAIL); + mpi3mr_release_command(cm); xpt_done(ccb); } else { callout_reset_sbt(&cm->callout, mstosbt(ccb->ccb_h.timeout), 0,
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202401191717.40JHHZPH097062>