Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 18 Feb 2018 16:41:34 +0000 (UTC)
From:      Scott Long <scottl@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r329522 - in head/sys/dev: mpr mps
Message-ID:  <201802181641.w1IGfYa4052116@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: scottl
Date: Sun Feb 18 16:41:34 2018
New Revision: 329522
URL: https://svnweb.freebsd.org/changeset/base/329522

Log:
  Improve command lifecycle debugging and detection of problems.
  
  Sponsored by:	Netflix

Modified:
  head/sys/dev/mpr/mpr.c
  head/sys/dev/mpr/mpr_sas.c
  head/sys/dev/mpr/mprvar.h
  head/sys/dev/mps/mps.c
  head/sys/dev/mps/mps_sas.c
  head/sys/dev/mps/mpsvar.h

Modified: head/sys/dev/mpr/mpr.c
==============================================================================
--- head/sys/dev/mpr/mpr.c	Sun Feb 18 16:03:50 2018	(r329521)
+++ head/sys/dev/mpr/mpr.c	Sun Feb 18 16:41:34 2018	(r329522)
@@ -1135,6 +1135,9 @@ mpr_enqueue_request(struct mpr_softc *sc, struct mpr_c
 	if (++sc->io_cmds_active > sc->io_cmds_highwater)
 		sc->io_cmds_highwater++;
 
+	KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("command not busy\n"));
+	cm->cm_state = MPR_CM_STATE_INQUEUE;
+
 	if (sc->atomic_desc_capable) {
 		rd.u.low = cm->cm_desc.Words.Low;
 		mpr_regwrite(sc, MPI26_ATOMIC_REQUEST_DESCRIPTOR_POST_OFFSET,
@@ -1554,6 +1557,7 @@ mpr_alloc_requests(struct mpr_softc *sc)
 		cm->cm_sense_busaddr = sc->sense_busaddr + i * MPR_SENSE_LEN;
 		cm->cm_desc.Default.SMID = i;
 		cm->cm_sc = sc;
+		cm->cm_state = MPR_CM_STATE_BUSY;
 		TAILQ_INIT(&cm->cm_chain_list);
 		TAILQ_INIT(&cm->cm_prp_page_list);
 		callout_init_mtx(&cm->cm_callout, &sc->mpr_mtx, 0);
@@ -2444,6 +2448,9 @@ mpr_intr_locked(void *data)
 		case MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS:
 		case MPI26_RPY_DESCRIPT_FLAGS_PCIE_ENCAPSULATED_SUCCESS:
 			cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)];
+			KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE,
+			    ("command not inqueue\n"));
+			cm->cm_state = MPR_CM_STATE_BUSY;
 			cm->cm_reply = NULL;
 			break;
 		case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY:
@@ -2513,6 +2520,9 @@ mpr_intr_locked(void *data)
 			} else {
 				cm = &sc->commands[
 				    le16toh(desc->AddressReply.SMID)];
+				KASSERT(cm->cm_state == MPR_CM_STATE_INQUEUE,
+				    ("command not inqueue\n"));
+				cm->cm_state = MPR_CM_STATE_BUSY;
 				cm->cm_reply = reply;
 				cm->cm_reply_data =
 				    le32toh(desc->AddressReply.
@@ -2543,8 +2553,7 @@ mpr_intr_locked(void *data)
 	}
 
 	if (pq != sc->replypostindex) {
-		mpr_dprint(sc, MPR_TRACE,
-		    "%s sc %p writing postindex %d\n",
+		mpr_dprint(sc, MPR_TRACE, "%s sc %p writing postindex %d\n",
 		    __func__, sc, sc->replypostindex);
 		mpr_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET,
 		    sc->replypostindex);

Modified: head/sys/dev/mpr/mpr_sas.c
==============================================================================
--- head/sys/dev/mpr/mpr_sas.c	Sun Feb 18 16:03:50 2018	(r329521)
+++ head/sys/dev/mpr/mpr_sas.c	Sun Feb 18 16:41:34 2018	(r329522)
@@ -1161,6 +1161,10 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
 	/* complete all commands with a NULL reply */
 	for (i = 1; i < sc->num_reqs; i++) {
 		cm = &sc->commands[i];
+		if (cm->cm_state == MPR_CM_STATE_FREE)
+			continue;
+
+		cm->cm_state = MPR_CM_STATE_BUSY;
 		cm->cm_reply = NULL;
 		completed = 0;
 
@@ -1173,9 +1177,7 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
 			    cm, cm->cm_state, cm->cm_ccb);
 			cm->cm_complete(sc, cm);
 			completed = 1;
-		}
-
-		if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) {
+		} else if (cm->cm_flags & MPR_CM_FLAGS_WAKEUP) {
 			mprsas_log_command(cm, MPR_RECOVERY,
 			    "waking up cm %p state %x ccb %p for diag reset\n", 
 			    cm, cm->cm_state, cm->cm_ccb);
@@ -1183,9 +1185,6 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
 			completed = 1;
 		}
 
-		if (cm->cm_sc->io_cmds_active != 0)
-			cm->cm_sc->io_cmds_active--;
-		
 		if ((completed == 0) && (cm->cm_state != MPR_CM_STATE_FREE)) {
 			/* this should never happen, but if it does, log */
 			mprsas_log_command(cm, MPR_RECOVERY,
@@ -1194,6 +1193,8 @@ mprsas_complete_all_commands(struct mpr_softc *sc)
 			    cm->cm_ccb);
 		}
 	}
+
+	sc->io_cmds_active = 0;
 }
 
 void
@@ -1248,6 +1249,11 @@ mprsas_tm_timeout(void *data)
 
 	mprsas_log_command(tm, MPR_INFO|MPR_RECOVERY, "task mgmt %p timed "
 	    "out\n", tm);
+
+	KASSERT(tm->cm_state == MPR_CM_STATE_INQUEUE,
+	    ("command not inqueue\n"));
+
+	tm->cm_state = MPR_CM_STATE_BUSY;
 	mpr_reinit(sc);
 }
 
@@ -1657,7 +1663,7 @@ mprsas_scsiio_timeout(void *data)
 	 * and been re-used, though this is unlikely.
 	 */
 	mpr_intr_locked(sc);
-	if (cm->cm_state == MPR_CM_STATE_FREE) {
+	if (cm->cm_state != MPR_CM_STATE_INQUEUE) {
 		mprsas_log_command(cm, MPR_XINFO,
 		    "SCSI command %p almost timed out\n", cm);
 		return;
@@ -2492,6 +2498,7 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mp
 
 	if (cm->cm_state == MPR_CM_STATE_TIMEDOUT) {
 		TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery);
+		cm->cm_state = MPR_CM_STATE_BUSY;
 		if (cm->cm_reply != NULL)
 			mprsas_log_command(cm, MPR_RECOVERY,
 			    "completed timedout cm %p ccb %p during recovery "

Modified: head/sys/dev/mpr/mprvar.h
==============================================================================
--- head/sys/dev/mpr/mprvar.h	Sun Feb 18 16:03:50 2018	(r329521)
+++ head/sys/dev/mpr/mprvar.h	Sun Feb 18 16:41:34 2018	(r329522)
@@ -243,6 +243,7 @@ struct mpr_command {
 #define MPR_CM_STATE_FREE		0
 #define MPR_CM_STATE_BUSY		1
 #define MPR_CM_STATE_TIMEDOUT		2
+#define MPR_CM_STATE_INQUEUE		3
 	bus_dmamap_t			cm_dmamap;
 	struct scsi_sense_data		*cm_sense;
 	uint64_t			*nvme_error_response;
@@ -569,6 +570,8 @@ mpr_free_command(struct mpr_softc *sc, struct mpr_comm
 	struct mpr_chain *chain, *chain_temp;
 	struct mpr_prp_page *prp_page, *prp_page_temp;
 
+	KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n"));
+
 	if (cm->cm_reply != NULL)
 		mpr_free_reply(sc, cm->cm_reply_data);
 	cm->cm_reply = NULL;
@@ -607,9 +610,10 @@ mpr_alloc_command(struct mpr_softc *sc)
 	if (cm == NULL)
 		return (NULL);
 
+	KASSERT(cm->cm_state == MPR_CM_STATE_FREE,
+	    ("mpr: Allocating busy command\n"));
+
 	TAILQ_REMOVE(&sc->req_list, cm, cm_link);
-	KASSERT(cm->cm_state == MPR_CM_STATE_FREE, ("mpr: Allocating busy "
-	    "command\n"));
 	cm->cm_state = MPR_CM_STATE_BUSY;
 	return (cm);
 }
@@ -619,6 +623,8 @@ mpr_free_high_priority_command(struct mpr_softc *sc, s
 {
 	struct mpr_chain *chain, *chain_temp;
 
+	KASSERT(cm->cm_state == MPR_CM_STATE_BUSY, ("state not busy\n"));
+
 	if (cm->cm_reply != NULL)
 		mpr_free_reply(sc, cm->cm_reply_data);
 	cm->cm_reply = NULL;
@@ -645,9 +651,10 @@ mpr_alloc_high_priority_command(struct mpr_softc *sc)
 	if (cm == NULL)
 		return (NULL);
 
+	KASSERT(cm->cm_state == MPR_CM_STATE_FREE,
+	    ("mpr: Allocating busy command\n"));
+
 	TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link);
-	KASSERT(cm->cm_state == MPR_CM_STATE_FREE, ("mpr: Allocating busy "
-	    "command\n"));
 	cm->cm_state = MPR_CM_STATE_BUSY;
 	return (cm);
 }

Modified: head/sys/dev/mps/mps.c
==============================================================================
--- head/sys/dev/mps/mps.c	Sun Feb 18 16:03:50 2018	(r329521)
+++ head/sys/dev/mps/mps.c	Sun Feb 18 16:41:34 2018	(r329522)
@@ -1102,6 +1102,10 @@ mps_enqueue_request(struct mps_softc *sc, struct mps_c
 	rd.u.low = cm->cm_desc.Words.Low;
 	rd.u.high = cm->cm_desc.Words.High;
 	rd.word = htole64(rd.word);
+
+	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("command not busy\n"));
+	cm->cm_state = MPS_CM_STATE_INQUEUE;
+
 	/* TODO-We may need to make below regwrite atomic */
 	mps_regwrite(sc, MPI2_REQUEST_DESCRIPTOR_POST_LOW_OFFSET,
 	    rd.u.low);
@@ -1502,6 +1506,7 @@ mps_alloc_requests(struct mps_softc *sc)
 		cm->cm_sense_busaddr = sc->sense_busaddr + i * MPS_SENSE_LEN;
 		cm->cm_desc.Default.SMID = i;
 		cm->cm_sc = sc;
+		cm->cm_state = MPS_CM_STATE_BUSY;
 		TAILQ_INIT(&cm->cm_chain_list);
 		callout_init_mtx(&cm->cm_callout, &sc->mps_mtx, 0);
 
@@ -2305,6 +2310,9 @@ mps_intr_locked(void *data)
 		switch (flags) {
 		case MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS:
 			cm = &sc->commands[le16toh(desc->SCSIIOSuccess.SMID)];
+			KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE,
+			    ("command not inqueue\n"));
+			cm->cm_state = MPS_CM_STATE_BUSY;
 			cm->cm_reply = NULL;
 			break;
 		case MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY:
@@ -2340,7 +2348,7 @@ mps_intr_locked(void *data)
 				       sc->reply_frames, sc->fqdepth,
 				       sc->replyframesz);
 				printf("%s: baddr %#x,\n", __func__, baddr);
-				/* LSI-TODO. See Linux Code. Need Graceful exit*/
+				/* LSI-TODO. See Linux Code for Graceful exit */
 				panic("Reply address out of range");
 			}
 			if (le16toh(desc->AddressReply.SMID) == 0) {
@@ -2372,10 +2380,14 @@ mps_intr_locked(void *data)
 					    (MPI2_EVENT_NOTIFICATION_REPLY *)
 					    reply);
 			} else {
-				cm = &sc->commands[le16toh(desc->AddressReply.SMID)];
+				cm = &sc->commands[
+				    le16toh(desc->AddressReply.SMID)];
+				KASSERT(cm->cm_state == MPS_CM_STATE_INQUEUE,
+				    ("command not inqueue\n"));
+				cm->cm_state = MPS_CM_STATE_BUSY;
 				cm->cm_reply = reply;
-				cm->cm_reply_data =
-				    le32toh(desc->AddressReply.ReplyFrameAddress);
+				cm->cm_reply_data = le32toh(
+				    desc->AddressReply.ReplyFrameAddress);
 			}
 			break;
 		}
@@ -2403,10 +2415,10 @@ mps_intr_locked(void *data)
 	}
 
 	if (pq != sc->replypostindex) {
-		mps_dprint(sc, MPS_TRACE,
-		    "%s sc %p writing postindex %d\n",
+		mps_dprint(sc, MPS_TRACE, "%s sc %p writing postindex %d\n",
 		    __func__, sc, sc->replypostindex);
-		mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET, sc->replypostindex);
+		mps_regwrite(sc, MPI2_REPLY_POST_HOST_INDEX_OFFSET,
+		    sc->replypostindex);
 	}
 
 	return;

Modified: head/sys/dev/mps/mps_sas.c
==============================================================================
--- head/sys/dev/mps/mps_sas.c	Sun Feb 18 16:03:50 2018	(r329521)
+++ head/sys/dev/mps/mps_sas.c	Sun Feb 18 16:41:34 2018	(r329522)
@@ -1101,6 +1101,10 @@ mpssas_complete_all_commands(struct mps_softc *sc)
 	/* complete all commands with a NULL reply */
 	for (i = 1; i < sc->num_reqs; i++) {
 		cm = &sc->commands[i];
+		if (cm->cm_state == MPS_CM_STATE_FREE)
+			continue;
+
+		cm->cm_state = MPS_CM_STATE_BUSY;
 		cm->cm_reply = NULL;
 		completed = 0;
 
@@ -1109,14 +1113,12 @@ mpssas_complete_all_commands(struct mps_softc *sc)
 
 		if (cm->cm_complete != NULL) {
 			mpssas_log_command(cm, MPS_RECOVERY,
-			    "completing cm %p state %x ccb %p for diag reset\n", 
+			    "completing cm %p state %x ccb %p for diag reset\n",
 			    cm, cm->cm_state, cm->cm_ccb);
 
 			cm->cm_complete(sc, cm);
 			completed = 1;
-		}
-
-		if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) {
+		} else if (cm->cm_flags & MPS_CM_FLAGS_WAKEUP) {
 			mpssas_log_command(cm, MPS_RECOVERY,
 			    "waking up cm %p state %x ccb %p for diag reset\n", 
 			    cm, cm->cm_state, cm->cm_ccb);
@@ -1124,9 +1126,6 @@ mpssas_complete_all_commands(struct mps_softc *sc)
 			completed = 1;
 		}
 
-		if (cm->cm_sc->io_cmds_active != 0)
-			cm->cm_sc->io_cmds_active--;
-		
 		if ((completed == 0) && (cm->cm_state != MPS_CM_STATE_FREE)) {
 			/* this should never happen, but if it does, log */
 			mpssas_log_command(cm, MPS_RECOVERY,
@@ -1135,6 +1134,8 @@ mpssas_complete_all_commands(struct mps_softc *sc)
 			    cm->cm_ccb);
 		}
 	}
+
+	sc->io_cmds_active = 0;
 }
 
 void
@@ -1191,6 +1192,11 @@ mpssas_tm_timeout(void *data)
 
 	mpssas_log_command(tm, MPS_INFO|MPS_RECOVERY,
 	    "task mgmt %p timed out\n", tm);
+
+	KASSERT(tm->cm_state == MPS_CM_STATE_INQUEUE,
+	    ("command not inqueue\n"));
+
+	tm->cm_state = MPS_CM_STATE_BUSY;
 	mps_reinit(sc);
 }
 
@@ -1591,7 +1597,7 @@ mpssas_scsiio_timeout(void *data)
 	 * and been re-used, though this is unlikely.
 	 */
 	mps_intr_locked(sc);
-	if (cm->cm_state == MPS_CM_STATE_FREE) {
+	if (cm->cm_state != MPS_CM_STATE_INQUEUE) {
 		mpssas_log_command(cm, MPS_XINFO,
 		    "SCSI command %p almost timed out\n", cm);
 		return;
@@ -2031,12 +2037,13 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mp
 
 	if (cm->cm_state == MPS_CM_STATE_TIMEDOUT) {
 		TAILQ_REMOVE(&cm->cm_targ->timedout_commands, cm, cm_recovery);
+		cm->cm_state = MPS_CM_STATE_BUSY;
 		if (cm->cm_reply != NULL)
 			mpssas_log_command(cm, MPS_RECOVERY,
 			    "completed timedout cm %p ccb %p during recovery "
 			    "ioc %x scsi %x state %x xfer %u\n",
-			    cm, cm->cm_ccb,
-			    le16toh(rep->IOCStatus), rep->SCSIStatus, rep->SCSIState,
+			    cm, cm->cm_ccb, le16toh(rep->IOCStatus),
+			    rep->SCSIStatus, rep->SCSIState,
 			    le32toh(rep->TransferCount));
 		else
 			mpssas_log_command(cm, MPS_RECOVERY,
@@ -2047,8 +2054,8 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mp
 			mpssas_log_command(cm, MPS_RECOVERY,
 			    "completed cm %p ccb %p during recovery "
 			    "ioc %x scsi %x state %x xfer %u\n",
-			    cm, cm->cm_ccb,
-			    le16toh(rep->IOCStatus), rep->SCSIStatus, rep->SCSIState,
+			    cm, cm->cm_ccb, le16toh(rep->IOCStatus),
+			    rep->SCSIStatus, rep->SCSIState,
 			    le32toh(rep->TransferCount));
 		else
 			mpssas_log_command(cm, MPS_RECOVERY,

Modified: head/sys/dev/mps/mpsvar.h
==============================================================================
--- head/sys/dev/mps/mpsvar.h	Sun Feb 18 16:03:50 2018	(r329521)
+++ head/sys/dev/mps/mpsvar.h	Sun Feb 18 16:41:34 2018	(r329522)
@@ -243,6 +243,7 @@ struct mps_command {
 #define MPS_CM_STATE_FREE		0
 #define MPS_CM_STATE_BUSY		1
 #define MPS_CM_STATE_TIMEDOUT		2
+#define MPS_CM_STATE_INQUEUE		3
 	bus_dmamap_t			cm_dmamap;
 	struct scsi_sense_data		*cm_sense;
 	TAILQ_HEAD(, mps_chain)		cm_chain_list;
@@ -541,6 +542,8 @@ mps_free_command(struct mps_softc *sc, struct mps_comm
 {
 	struct mps_chain *chain, *chain_temp;
 
+	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n"));
+
 	if (cm->cm_reply != NULL)
 		mps_free_reply(sc, cm->cm_reply_data);
 	cm->cm_reply = NULL;
@@ -574,8 +577,10 @@ mps_alloc_command(struct mps_softc *sc)
 	if (cm == NULL)
 		return (NULL);
 
+	KASSERT(cm->cm_state == MPS_CM_STATE_FREE,
+	    ("mps: Allocating busy command\n"));
+
 	TAILQ_REMOVE(&sc->req_list, cm, cm_link);
-	KASSERT(cm->cm_state == MPS_CM_STATE_FREE, ("mps: Allocating busy command\n"));
 	cm->cm_state = MPS_CM_STATE_BUSY;
 	return (cm);
 }
@@ -585,6 +590,8 @@ mps_free_high_priority_command(struct mps_softc *sc, s
 {
 	struct mps_chain *chain, *chain_temp;
 
+	KASSERT(cm->cm_state == MPS_CM_STATE_BUSY, ("state not busy\n"));
+
 	if (cm->cm_reply != NULL)
 		mps_free_reply(sc, cm->cm_reply_data);
 	cm->cm_reply = NULL;
@@ -611,8 +618,10 @@ mps_alloc_high_priority_command(struct mps_softc *sc)
 	if (cm == NULL)
 		return (NULL);
 
+	KASSERT(cm->cm_state == MPS_CM_STATE_FREE,
+	    ("mps: Allocating busy command\n"));
+
 	TAILQ_REMOVE(&sc->high_priority_req_list, cm, cm_link);
-	KASSERT(cm->cm_state == MPS_CM_STATE_FREE, ("mps: Allocating busy command\n"));
 	cm->cm_state = MPS_CM_STATE_BUSY;
 	return (cm);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201802181641.w1IGfYa4052116>