Date: Sun, 18 Feb 2018 16:41:34 +0000 (UTC) From: Scott Long <scottl@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r329522 - in head/sys/dev: mpr mps Message-ID: <201802181641.w1IGfYa4052116@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: scottl Date: Sun Feb 18 16:41:34 2018 New Revision: 329522 URL: https://svnweb.freebsd.org/changeset/base/329522 Log: Improve command lifecycle debugging and detection of problems. Sponsored by: Netflix Modified: head/sys/dev/mpr/mpr.c head/sys/dev/mpr/mpr_sas.c head/sys/dev/mpr/mprvar.h head/sys/dev/mps/mps.c head/sys/dev/mps/mps_sas.c head/sys/dev/mps/mpsvar.h Modified: head/sys/dev/mpr/mpr.c ============================================================================== --- head/sys/dev/mpr/mpr.c Sun Feb 18 16:03:50 2018 (r329521) +++ head/sys/dev/mpr/mpr.c Sun Feb 18 16:41:34 2018 (r329522) @@ -1135,6 +1135,9 @@ mpr_enqueue_request(struct mpr_softc *sc, struct mpr_c if (++sc->io_cmds_active > sc->io_cmds_highwater) sc->io_cmds_highwater++; + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("command not busy\n")); + cm->cm_state = MPR_CM_STATE_INQUEUE; + if (sc->atomic_desc_capable) { rd.u.low = cm->cm_desc.Words.Low; mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET, @@ -1554,6 +1557,7 @@ mpr_alloc_requests(struct mpr_softc *sc) cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; + cm->cm_state = MPR_CM_STATE_BUSY; TAILQ_INIT(&cm->cm_chain_list); TAILQ_INIT(&cm->cm_prp_page_list); callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0); @@ -2444,6 +2448,9 @@ mpr_intr_locked(void *data) case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS: case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; + KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE, + ("command not inqueue\n")); + cm->cm_state = MPR_CM_STATE_BUSY; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: @@ -2513,6 +2520,9 @@ mpr_intr_locked(void *data) } else { cm = &sc->commands[ le16toh(desc->AddressReply.SMID)]; + KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE, + ("command not inqueue\n")); + cm->cm_state = MPR_CM_STATE_BUSY; cm->cm_reply = reply; cm->cm_reply_data = le32toh(desc->AddressReply. @@ -2543,8 +2553,7 @@ mpr_intr_locked(void *data) } if (pq != sc->replypostindex) { - mpr_dprint(sc, MPR_TRACE, - "%s sc %p writing postindex %d\n", + mpr_dprint(sc, MPR_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); Modified: head/sys/dev/mpr/mpr_sas.c ============================================================================== --- head/sys/dev/mpr/mpr_sas.c Sun Feb 18 16:03:50 2018 (r329521) +++ head/sys/dev/mpr/mpr_sas.c Sun Feb 18 16:41:34 2018 (r329522) @@ -1161,6 +1161,10 @@ mprsas_complete_all_commands(struct mpr_softc *sc) /* complete all commands with a NULL reply */ for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; + if (cm->cm_state == MPR_CM_STATE_FREE) + continue; + + cm->cm_state = MPR_CM_STATE_BUSY; cm->cm_reply = NULL; completed = 0; @@ -1173,9 +1177,7 @@ mprsas_complete_all_commands(struct mpr_softc *sc) cm, cm->cm_state, cm->cm_ccb); cm->cm_complete(sc, cm); completed = 1; - } - - if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) { + } else if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) { mprsas_log_command(cm, MPR_RECOVERY, "waking up cm %p state %x ccb %p for diag reset\n", cm, cm->cm_state, cm->cm_ccb); @@ -1183,9 +1185,6 @@ mprsas_complete_all_commands(struct mpr_softc *sc) completed = 1; } - if (cm->cm_sc->io_cmds_active != 0) - cm->cm_sc->io_cmds_active--; - if ((completed == 0) && (cm->cm_state != MPR_CM_STATE_FREE)) { /* this should never happen, but if it does, log */ mprsas_log_command(cm, MPR_RECOVERY, @@ -1194,6 +1193,8 @@ mprsas_complete_all_commands(struct mpr_softc *sc) cm->cm_ccb); } } + + sc->io_cmds_active = 0; } void @@ -1248,6 +1249,11 @@ mprsas_tm_timeout(void *data) mprsas_log_command(tm, MPR_INFO|MPR_RECOVERY, "task mgmt %p timed " "out\n", tm); + + KASSERT(tm->cm_state == MPR_CM_STATE_INQUEUE, + ("command not inqueue\n")); + + tm->cm_state = MPR_CM_STATE_BUSY; mpr_reinit(sc); } @@ -1657,7 +1663,7 @@ mprsas_scsiio_timeout(void *data) * and been re-used, though this is unlikely. */ mpr_intr_locked(sc); - if (cm->cm_state == MPR_CM_STATE_FREE) { + if (cm->cm_state != MPR_CM_STATE_INQUEUE) { mprsas_log_command(cm, MPR_XINFO, "SCSI command %p almost timed out\n", cm); return; @@ -2492,6 +2498,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mp if (cm->cm_state == MPR_CM_STATE_TIMEDOUT) { TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery); + cm->cm_state = MPR_CM_STATE_BUSY; if (cm->cm_reply != NULL) mprsas_log_command(cm, MPR_RECOVERY, "completed timedout cm %p ccb %p during recovery " Modified: head/sys/dev/mpr/mprvar.h ============================================================================== --- head/sys/dev/mpr/mprvar.h Sun Feb 18 16:03:50 2018 (r329521) +++ head/sys/dev/mpr/mprvar.h Sun Feb 18 16:41:34 2018 (r329522) @@ -243,6 +243,7 @@ struct mpr_command { #define MPR_CM_STATE_FREE 0 #define MPR_CM_STATE_BUSY 1 #define MPR_CM_STATE_TIMEDOUT 2 +#define MPR_CM_STATE_INQUEUE 3 bus_dmamap_t cm_dmamap; struct scsi_sense_data *cm_sense; uint64_t *nvme_error_response; @@ -569,6 +570,8 @@ mpr_free_command(struct mpr_softc *sc, struct mpr_comm struct mpr_chain *chain, *chain_temp; struct mpr_prp_page *prp_page, *prp_page_temp; + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n")); + if (cm->cm_reply != NULL) mpr_free_reply(sc, cm->cm_reply_data); cm->cm_reply = NULL; @@ -607,9 +610,10 @@ mpr_alloc_command(struct mpr_softc *sc) if (cm == NULL) return (NULL); + KASSERT(cm->cm_state == MPR_CM_STATE_FREE, + ("mpr: Allocating busy command\n")); + TAILQ_REMOVE(&sc->req_list, cm, cm_link); - KASSERT(cm->cm_state == MPR_CM_STATE_FREE, ("mpr: Allocating busy " - "command\n")); cm->cm_state = MPR_CM_STATE_BUSY; return (cm); } @@ -619,6 +623,8 @@ mpr_free_high_priority_command(struct mpr_softc *sc, s { struct mpr_chain *chain, *chain_temp; + KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n")); + if (cm->cm_reply != NULL) mpr_free_reply(sc, cm->cm_reply_data); cm->cm_reply = NULL; @@ -645,9 +651,10 @@ mpr_alloc_high_priority_command(struct mpr_softc *sc) if (cm == NULL) return (NULL); + KASSERT(cm->cm_state == MPR_CM_STATE_FREE, + ("mpr: Allocating busy command\n")); + TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link); - KASSERT(cm->cm_state == MPR_CM_STATE_FREE, ("mpr: Allocating busy " - "command\n")); cm->cm_state = MPR_CM_STATE_BUSY; return (cm); } Modified: head/sys/dev/mps/mps.c ============================================================================== --- head/sys/dev/mps/mps.c Sun Feb 18 16:03:50 2018 (r329521) +++ head/sys/dev/mps/mps.c Sun Feb 18 16:41:34 2018 (r329522) @@ -1102,6 +1102,10 @@ mps_enqueue_request(struct mps_softc *sc, struct mps_c rd.u.low = cm->cm_desc.Words.Low; rd.u.high = cm->cm_desc.Words.High; rd.word = htole64(rd.word); + + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("command not busy\n")); + cm->cm_state = MPS_CM_STATE_INQUEUE; + /* TODO-We may need to make below regwrite atomic */ mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET, rd.u.low); @@ -1502,6 +1506,7 @@ mps_alloc_requests(struct mps_softc *sc) cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN; cm->cm_desc.Default.SMID = i; cm->cm_sc = sc; + cm->cm_state = MPS_CM_STATE_BUSY; TAILQ_INIT(&cm->cm_chain_list); callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0); @@ -2305,6 +2310,9 @@ mps_intr_locked(void *data) switch (flags) { case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS: cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)]; + KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE, + ("command not inqueue\n")); + cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = NULL; break; case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY: @@ -2340,7 +2348,7 @@ mps_intr_locked(void *data) sc->reply_frames, sc->fqdepth, sc->replyframesz); printf("%s: baddr %#x,\n", __func__, baddr); - /* LSI-TODO. See Linux Code. Need Graceful exit*/ + /* LSI-TODO. See Linux Code for Graceful exit */ panic("Reply address out of range"); } if (le16toh(desc->AddressReply.SMID) == 0) { @@ -2372,10 +2380,14 @@ mps_intr_locked(void *data) (MPI2_EVENT_NOTIFICATION_REPLY *) reply); } else { - cm = &sc->commands[le16toh(desc->AddressReply.SMID)]; + cm = &sc->commands[ + le16toh(desc->AddressReply.SMID)]; + KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE, + ("command not inqueue\n")); + cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = reply; - cm->cm_reply_data = - le32toh(desc->AddressReply.ReplyFrameAddress); + cm->cm_reply_data = le32toh( + desc->AddressReply.ReplyFrameAddress); } break; } @@ -2403,10 +2415,10 @@ mps_intr_locked(void *data) } if (pq != sc->replypostindex) { - mps_dprint(sc, MPS_TRACE, - "%s sc %p writing postindex %d\n", + mps_dprint(sc, MPS_TRACE, "%s sc %p writing postindex %d\n", __func__, sc, sc->replypostindex); - mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex); + mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, + sc->replypostindex); } return; Modified: head/sys/dev/mps/mps_sas.c ============================================================================== --- head/sys/dev/mps/mps_sas.c Sun Feb 18 16:03:50 2018 (r329521) +++ head/sys/dev/mps/mps_sas.c Sun Feb 18 16:41:34 2018 (r329522) @@ -1101,6 +1101,10 @@ mpssas_complete_all_commands(struct mps_softc *sc) /* complete all commands with a NULL reply */ for (i = 1; i < sc->num_reqs; i++) { cm = &sc->commands[i]; + if (cm->cm_state == MPS_CM_STATE_FREE) + continue; + + cm->cm_state = MPS_CM_STATE_BUSY; cm->cm_reply = NULL; completed = 0; @@ -1109,14 +1113,12 @@ mpssas_complete_all_commands(struct mps_softc *sc) if (cm->cm_complete != NULL) { mpssas_log_command(cm, MPS_RECOVERY, - "completing cm %p state %x ccb %p for diag reset\n", + "completing cm %p state %x ccb %p for diag reset\n", cm, cm->cm_state, cm->cm_ccb); cm->cm_complete(sc, cm); completed = 1; - } - - if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) { + } else if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) { mpssas_log_command(cm, MPS_RECOVERY, "waking up cm %p state %x ccb %p for diag reset\n", cm, cm->cm_state, cm->cm_ccb); @@ -1124,9 +1126,6 @@ mpssas_complete_all_commands(struct mps_softc *sc) completed = 1; } - if (cm->cm_sc->io_cmds_active != 0) - cm->cm_sc->io_cmds_active--; - if ((completed == 0) && (cm->cm_state != MPS_CM_STATE_FREE)) { /* this should never happen, but if it does, log */ mpssas_log_command(cm, MPS_RECOVERY, @@ -1135,6 +1134,8 @@ mpssas_complete_all_commands(struct mps_softc *sc) cm->cm_ccb); } } + + sc->io_cmds_active = 0; } void @@ -1191,6 +1192,11 @@ mpssas_tm_timeout(void *data) mpssas_log_command(tm, MPS_INFO|MPS_RECOVERY, "task mgmt %p timed out\n", tm); + + KASSERT(tm->cm_state == MPS_CM_STATE_INQUEUE, + ("command not inqueue\n")); + + tm->cm_state = MPS_CM_STATE_BUSY; mps_reinit(sc); } @@ -1591,7 +1597,7 @@ mpssas_scsiio_timeout(void *data) * and been re-used, though this is unlikely. */ mps_intr_locked(sc); - if (cm->cm_state == MPS_CM_STATE_FREE) { + if (cm->cm_state != MPS_CM_STATE_INQUEUE) { mpssas_log_command(cm, MPS_XINFO, "SCSI command %p almost timed out\n", cm); return; @@ -2031,12 +2037,13 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mp if (cm->cm_state == MPS_CM_STATE_TIMEDOUT) { TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery); + cm->cm_state = MPS_CM_STATE_BUSY; if (cm->cm_reply != NULL) mpssas_log_command(cm, MPS_RECOVERY, "completed timedout cm %p ccb %p during recovery " "ioc %x scsi %x state %x xfer %u\n", - cm, cm->cm_ccb, - le16toh(rep->IOCStatus), rep->SCSIStatus, rep->SCSIState, + cm, cm->cm_ccb, le16toh(rep->IOCStatus), + rep->SCSIStatus, rep->SCSIState, le32toh(rep->TransferCount)); else mpssas_log_command(cm, MPS_RECOVERY, @@ -2047,8 +2054,8 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mp mpssas_log_command(cm, MPS_RECOVERY, "completed cm %p ccb %p during recovery " "ioc %x scsi %x state %x xfer %u\n", - cm, cm->cm_ccb, - le16toh(rep->IOCStatus), rep->SCSIStatus, rep->SCSIState, + cm, cm->cm_ccb, le16toh(rep->IOCStatus), + rep->SCSIStatus, rep->SCSIState, le32toh(rep->TransferCount)); else mpssas_log_command(cm, MPS_RECOVERY, Modified: head/sys/dev/mps/mpsvar.h ============================================================================== --- head/sys/dev/mps/mpsvar.h Sun Feb 18 16:03:50 2018 (r329521) +++ head/sys/dev/mps/mpsvar.h Sun Feb 18 16:41:34 2018 (r329522) @@ -243,6 +243,7 @@ struct mps_command { #define MPS_CM_STATE_FREE 0 #define MPS_CM_STATE_BUSY 1 #define MPS_CM_STATE_TIMEDOUT 2 +#define MPS_CM_STATE_INQUEUE 3 bus_dmamap_t cm_dmamap; struct scsi_sense_data *cm_sense; TAILQ_HEAD(, mps_chain) cm_chain_list; @@ -541,6 +542,8 @@ mps_free_command(struct mps_softc *sc, struct mps_comm { struct mps_chain *chain, *chain_temp; + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n")); + if (cm->cm_reply != NULL) mps_free_reply(sc, cm->cm_reply_data); cm->cm_reply = NULL; @@ -574,8 +577,10 @@ mps_alloc_command(struct mps_softc *sc) if (cm == NULL) return (NULL); + KASSERT(cm->cm_state == MPS_CM_STATE_FREE, + ("mps: Allocating busy command\n")); + TAILQ_REMOVE(&sc->req_list, cm, cm_link); - KASSERT(cm->cm_state == MPS_CM_STATE_FREE, ("mps: Allocating busy command\n")); cm->cm_state = MPS_CM_STATE_BUSY; return (cm); } @@ -585,6 +590,8 @@ mps_free_high_priority_command(struct mps_softc *sc, s { struct mps_chain *chain, *chain_temp; + KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n")); + if (cm->cm_reply != NULL) mps_free_reply(sc, cm->cm_reply_data); cm->cm_reply = NULL; @@ -611,8 +618,10 @@ mps_alloc_high_priority_command(struct mps_softc *sc) if (cm == NULL) return (NULL); + KASSERT(cm->cm_state == MPS_CM_STATE_FREE, + ("mps: Allocating busy command\n")); + TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link); - KASSERT(cm->cm_state == MPS_CM_STATE_FREE, ("mps: Allocating busy command\n")); cm->cm_state = MPS_CM_STATE_BUSY; return (cm); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802181641.w1IGfYa4052116>