FreeBSD Mail Archives

Date:      Wed, 6 Sep 2017 09:19:54 +0000 (UTC)
From:      Scott Long <scottl@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r323215 - in head/sys/dev: mpr mps
Message-ID:  <201709060919.v869Jsk9060601@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help


Author: scottl
Date: Wed Sep  6 09:19:54 2017
New Revision: 323215
URL: https://svnweb.freebsd.org/changeset/base/323215

Log:
  Checkpoint the next phase in debug message cleanup, this time focusing on
  error recovery messages.
  
  Sponsored by:	Netflix

Modified:
  head/sys/dev/mpr/mpr_sas.c
  head/sys/dev/mps/mps_sas.c

Modified: head/sys/dev/mpr/mpr_sas.c
==============================================================================
--- head/sys/dev/mpr/mpr_sas.c	Wed Sep  6 07:19:22 2017	(r323214)
+++ head/sys/dev/mpr/mpr_sas.c	Wed Sep  6 09:19:54 2017	(r323215)
@@ -1286,33 +1286,39 @@ mprsas_logical_unit_reset_complete(struct mpr_softc *s
 	 * task management commands don't have S/G lists.
 	 */
 	if ((tm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) {
-		mpr_dprint(sc, MPR_ERROR, "%s: cm_flags = %#x for LUN reset! "
+		mpr_dprint(sc, MPR_RECOVERY|MPR_ERROR,
+		    "%s: cm_flags = %#x for LUN reset! "
 		    "This should not happen!\n", __func__, tm->cm_flags);
 		mprsas_free_tm(sc, tm);
 		return;
 	}
 
 	if (reply == NULL) {
-		mprsas_log_command(tm, MPR_RECOVERY, "NULL reset reply for tm "
-		    "%p\n", tm);
+		mpr_dprint(sc, MPR_RECOVERY, "NULL reset reply for tm %p\n",
+		    tm);
 		if ((sc->mpr_flags & MPR_FLAGS_DIAGRESET) != 0) {
 			/* this completion was due to a reset, just cleanup */
+			mpr_dprint(sc, MPR_RECOVERY, "Hardware undergoing "
+			    "reset, ignoring NULL LUN reset reply\n");
 			targ->tm = NULL;
 			mprsas_free_tm(sc, tm);
 		}
 		else {
 			/* we should have gotten a reply. */
+			mpr_dprint(sc, MPR_INFO|MPR_RECOVERY, "NULL reply on "
+			    "LUN reset attempt, resetting controller\n");
 			mpr_reinit(sc);
 		}
 		return;
 	}
 
-	mprsas_log_command(tm, MPR_RECOVERY,
+	mpr_dprint(sc, MPR_RECOVERY,
 	    "logical unit reset status 0x%x code 0x%x count %u\n",
 	    le16toh(reply->IOCStatus), le32toh(reply->ResponseCode),
 	    le32toh(reply->TerminationCount));
-		
-	/* See if there are any outstanding commands for this LUN.
+
+	/*
+	 * See if there are any outstanding commands for this LUN.
 	 * This could be made more efficient by using a per-LU data
 	 * structure of some sort.
 	 */
@@ -1322,34 +1328,36 @@ mprsas_logical_unit_reset_complete(struct mpr_softc *s
 	}
 
 	if (cm_count == 0) {
-		mprsas_log_command(tm, MPR_RECOVERY|MPR_INFO,
-		    "logical unit %u finished recovery after reset\n",
-		    tm->cm_lun, tm);
+		mpr_dprint(sc, MPR_RECOVERY|MPR_INFO,
+		    "Finished recovery after LUN reset for target %u\n",
+		    targ->tid);
 
-		mprsas_announce_reset(sc, AC_SENT_BDR, tm->cm_targ->tid, 
+		mprsas_announce_reset(sc, AC_SENT_BDR, targ->tid, 
 		    tm->cm_lun);
 
-		/* we've finished recovery for this logical unit.  check and
+		/*
+		 * We've finished recovery for this logical unit.  check and
 		 * see if some other logical unit has a timedout command
 		 * that needs to be processed.
 		 */
 		cm = TAILQ_FIRST(&targ->timedout_commands);
 		if (cm) {
+			mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+			   "More commands to abort for target %u\n", targ->tid);
 			mprsas_send_abort(sc, tm, cm);
-		}
-		else {
+		} else {
 			targ->tm = NULL;
 			mprsas_free_tm(sc, tm);
 		}
-	}
-	else {
+	} else {
 		/* if we still have commands for this LUN, the reset
 		 * effectively failed, regardless of the status reported.
 		 * Escalate to a target reset.
 		 */
-		mprsas_log_command(tm, MPR_RECOVERY,
-		    "logical unit reset complete for tm %p, but still have %u "
-		    "command(s)\n", tm, cm_count);
+		mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+		    "logical unit reset complete for target %u, but still "
+		    "have %u command(s), sending target reset\n", targ->tid,
+		    cm_count);
 		mprsas_send_reset(sc, tm,
 		    MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET);
 	}
@@ -1381,46 +1389,53 @@ mprsas_target_reset_complete(struct mpr_softc *sc, str
 	}
 
 	if (reply == NULL) {
-		mprsas_log_command(tm, MPR_RECOVERY, "NULL reset reply for tm "
-		    "%p\n", tm);
+		mpr_dprint(sc, MPR_RECOVERY,
+		    "NULL target reset reply for tm %p TaskMID %u\n",
+		    tm, le16toh(req->TaskMID));
 		if ((sc->mpr_flags & MPR_FLAGS_DIAGRESET) != 0) {
 			/* this completion was due to a reset, just cleanup */
+			mpr_dprint(sc, MPR_RECOVERY, "Hardware undergoing "
+			    "reset, ignoring NULL target reset reply\n");
 			targ->tm = NULL;
 			mprsas_free_tm(sc, tm);
 		}
 		else {
 			/* we should have gotten a reply. */
+			mpr_dprint(sc, MPR_INFO|MPR_RECOVERY, "NULL reply on "
+			    "target reset attempt, resetting controller\n");
 			mpr_reinit(sc);
 		}
 		return;
 	}
 
-	mprsas_log_command(tm, MPR_RECOVERY,
+	mpr_dprint(sc, MPR_RECOVERY,
 	    "target reset status 0x%x code 0x%x count %u\n",
 	    le16toh(reply->IOCStatus), le32toh(reply->ResponseCode),
 	    le32toh(reply->TerminationCount));
 
 	if (targ->outstanding == 0) {
-		/* we've finished recovery for this target and all
+		/*
+		 * We've finished recovery for this target and all
 		 * of its logical units.
 		 */
-		mprsas_log_command(tm, MPR_RECOVERY|MPR_INFO,
-		    "recovery finished after target reset\n");
+		mpr_dprint(sc, MPR_RECOVERY|MPR_INFO,
+		    "Finished reset recovery for target %u\n", targ->tid);
 
 		mprsas_announce_reset(sc, AC_SENT_BDR, tm->cm_targ->tid,
 		    CAM_LUN_WILDCARD);
 
 		targ->tm = NULL;
 		mprsas_free_tm(sc, tm);
-	}
-	else {
-		/* after a target reset, if this target still has
+	} else {
+		/*
+		 * After a target reset, if this target still has
 		 * outstanding commands, the reset effectively failed,
 		 * regardless of the status reported.  escalate.
 		 */
-		mprsas_log_command(tm, MPR_RECOVERY,
-		    "target reset complete for tm %p, but still have %u "
-		    "command(s)\n", tm, targ->outstanding);
+		mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+		    "Target reset complete for target %u, but still have %u "
+		    "command(s), resetting controller\n", targ->tid,
+		    targ->outstanding);
 		mpr_reinit(sc);
 	}
 }
@@ -1450,20 +1465,20 @@ mprsas_send_reset(struct mpr_softc *sc, struct mpr_com
 		/* XXX Need to handle invalid LUNs */
 		MPR_SET_LUN(req->LUN, tm->cm_lun);
 		tm->cm_targ->logical_unit_resets++;
-		mprsas_log_command(tm, MPR_RECOVERY|MPR_INFO,
-		    "sending logical unit reset\n");
+		mpr_dprint(sc, MPR_RECOVERY|MPR_INFO,
+		    "Sending logical unit reset to target %u lun %d\n",
+		    target->tid, tm->cm_lun);
 		tm->cm_complete = mprsas_logical_unit_reset_complete;
 		mprsas_prepare_for_tm(sc, tm, target, tm->cm_lun);
-	}
-	else if (type == MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET) {
+	} else if (type == MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET) {
 		/*
 		 * Target reset method =
 		 *     SAS Hard Link Reset / SATA Link Reset
 		 */
 		req->MsgFlags = MPI2_SCSITASKMGMT_MSGFLAGS_LINK_RESET;
 		tm->cm_targ->target_resets++;
-		mprsas_log_command(tm, MPR_RECOVERY|MPR_INFO,
-		    "sending target reset\n");
+		mpr_dprint(sc, MPR_RECOVERY|MPR_INFO,
+		    "Sending target reset to target %u\n", target->tid);
 		tm->cm_complete = mprsas_target_reset_complete;
 		mprsas_prepare_for_tm(sc, tm, target, CAM_LUN_WILDCARD);
 	}
@@ -1472,12 +1487,11 @@ mprsas_send_reset(struct mpr_softc *sc, struct mpr_com
 		return -1;
 	}
 
-	mpr_dprint(sc, MPR_INFO, "to target %u handle 0x%04x\n", target->tid,
-	    target->handle);
 	if (target->encl_level_valid) {
-		mpr_dprint(sc, MPR_INFO, "At enclosure level %d, slot %d, "
-		    "connector name (%4s)\n", target->encl_level,
-		    target->encl_slot, target->connector_name);
+		mpr_dprint(sc, MPR_RECOVERY|MPR_INFO,
+		    "At enclosure level %d, slot %d, connector name (%4s)\n",
+		    target->encl_level, target->encl_slot,
+		    target->connector_name);
 	}
 
 	tm->cm_data = NULL;
@@ -1490,7 +1504,7 @@ mprsas_send_reset(struct mpr_softc *sc, struct mpr_com
 
 	err = mpr_map_command(sc, tm);
 	if (err)
-		mprsas_log_command(tm, MPR_RECOVERY,
+		mpr_dprint(sc, MPR_ERROR|MPR_RECOVERY,
 		    "error %d sending reset type %u\n", err, type);
 
 	return err;
@@ -1517,7 +1531,7 @@ mprsas_abort_complete(struct mpr_softc *sc, struct mpr
 	 * task management commands don't have S/G lists.
 	 */
 	if ((tm->cm_flags & MPR_CM_FLAGS_ERROR_MASK) != 0) {
-		mprsas_log_command(tm, MPR_RECOVERY,
+		mpr_dprint(sc, MPR_RECOVERY|MPR_ERROR,
 		    "cm_flags = %#x for abort %p TaskMID %u!\n", 
 		    tm->cm_flags, tm, le16toh(req->TaskMID));
 		mprsas_free_tm(sc, tm);
@@ -1525,22 +1539,25 @@ mprsas_abort_complete(struct mpr_softc *sc, struct mpr
 	}
 
 	if (reply == NULL) {
-		mprsas_log_command(tm, MPR_RECOVERY,
+		mpr_dprint(sc, MPR_RECOVERY,
 		    "NULL abort reply for tm %p TaskMID %u\n", 
 		    tm, le16toh(req->TaskMID));
 		if ((sc->mpr_flags & MPR_FLAGS_DIAGRESET) != 0) {
 			/* this completion was due to a reset, just cleanup */
+			mpr_dprint(sc, MPR_RECOVERY, "Hardware undergoing "
+			    "reset, ignoring NULL abort reply\n");
 			targ->tm = NULL;
 			mprsas_free_tm(sc, tm);
-		}
-		else {
+		} else {
 			/* we should have gotten a reply. */
+			mpr_dprint(sc, MPR_INFO|MPR_RECOVERY, "NULL reply on "
+			    "abort attempt, resetting controller\n");
 			mpr_reinit(sc);
 		}
 		return;
 	}
 
-	mprsas_log_command(tm, MPR_RECOVERY,
+	mpr_dprint(sc, MPR_RECOVERY,
 	    "abort TaskMID %u status 0x%x code 0x%x count %u\n",
 	    le16toh(req->TaskMID),
 	    le16toh(reply->IOCStatus), le32toh(reply->ResponseCode),
@@ -1548,31 +1565,27 @@ mprsas_abort_complete(struct mpr_softc *sc, struct mpr
 
 	cm = TAILQ_FIRST(&tm->cm_targ->timedout_commands);
 	if (cm == NULL) {
-		/* if there are no more timedout commands, we're done with
+		/*
+		 * if there are no more timedout commands, we're done with
 		 * error recovery for this target.
 		 */
-		mprsas_log_command(tm, MPR_RECOVERY,
-		    "finished recovery after aborting TaskMID %u\n",
-		    le16toh(req->TaskMID));
-
+		mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+		    "Finished abort recovery for target %u\n", targ->tid);
 		targ->tm = NULL;
 		mprsas_free_tm(sc, tm);
-	}
-	else if (le16toh(req->TaskMID) != cm->cm_desc.Default.SMID) {
+	} else if (le16toh(req->TaskMID) != cm->cm_desc.Default.SMID) {
 		/* abort success, but we have more timedout commands to abort */
-		mprsas_log_command(tm, MPR_RECOVERY,
-		    "continuing recovery after aborting TaskMID %u\n",
-		    le16toh(req->TaskMID));
-		
+		mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+		    "Continuing abort recovery for target %u\n", targ->tid);
 		mprsas_send_abort(sc, tm, cm);
-	}
-	else {
-		/* we didn't get a command completion, so the abort
+	} else {
+		/*
+		 * we didn't get a command completion, so the abort
 		 * failed as far as we're concerned.  escalate.
 		 */
-		mprsas_log_command(tm, MPR_RECOVERY,
-		    "abort failed for TaskMID %u tm %p\n",
-		    le16toh(req->TaskMID), tm);
+		mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+		    "Abort failed for target %u, sending logical unit reset\n",
+		    targ->tid);
 
 		mprsas_send_reset(sc, tm, 
 		    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET);
@@ -1591,7 +1604,8 @@ mprsas_send_abort(struct mpr_softc *sc, struct mpr_com
 
 	targ = cm->cm_targ;
 	if (targ->handle == 0) {
-		mpr_dprint(sc, MPR_ERROR,"%s null devhandle for target_id %d\n",
+		mpr_dprint(sc, MPR_ERROR|MPR_RECOVERY,
+		   "%s null devhandle for target_id %d\n",
 		    __func__, cm->cm_ccb->ccb_h.target_id);
 		return -1;
 	}
@@ -1622,13 +1636,11 @@ mprsas_send_abort(struct mpr_softc *sc, struct mpr_com
 
 	targ->aborts++;
 
-	mpr_dprint(sc, MPR_INFO, "Sending reset from %s for target ID %d\n",
-	    __func__, targ->tid);
 	mprsas_prepare_for_tm(sc, tm, targ, tm->cm_lun);
 
 	err = mpr_map_command(sc, tm);
 	if (err)
-		mpr_dprint(sc, MPR_RECOVERY,
+		mpr_dprint(sc, MPR_ERROR|MPR_RECOVERY,
 		    "error %d sending abort for cm %p SMID %u\n",
 		    err, cm, req->TaskMID);
 	return err;
@@ -1637,17 +1649,21 @@ mprsas_send_abort(struct mpr_softc *sc, struct mpr_com
 static void
 mprsas_scsiio_timeout(void *data)
 {
+	sbintime_t elapsed, now;
+	union ccb *ccb;
 	struct mpr_softc *sc;
 	struct mpr_command *cm;
 	struct mprsas_target *targ;
 
 	cm = (struct mpr_command *)data;
 	sc = cm->cm_sc;
+	ccb = cm->cm_ccb;
+	now = sbinuptime();
 
 	MPR_FUNCTRACE(sc);
 	mtx_assert(&sc->mpr_mtx, MA_OWNED);
 
-	mpr_dprint(sc, MPR_XINFO, "Timeout checking cm %p\n", cm);
+	mpr_dprint(sc, MPR_XINFO|MPR_RECOVERY, "Timeout checking cm %p\n", cm);
 
 	/*
 	 * Run the interrupt handler to make sure it's not pending.  This
@@ -1669,13 +1685,15 @@ mprsas_scsiio_timeout(void *data)
 	targ = cm->cm_targ;
 	targ->timeouts++;
 
-	mprsas_log_command(cm, MPR_ERROR, "command timeout %d cm %p target "
-	    "%u, handle(0x%04x)\n", cm->cm_ccb->ccb_h.timeout, cm, targ->tid,
-	    targ->handle);
+	elapsed = now - ccb->ccb_h.qos.sim_data;
+	mprsas_log_command(cm, MPR_INFO|MPR_RECOVERY,
+	    "Command timeout on target %u(0x%04x), %d set, %d.%d elapsed\n",
+	    targ->tid, targ->handle, ccb->ccb_h.timeout,
+	    sbintime_getsec(elapsed), elapsed & 0xffffffff);
 	if (targ->encl_level_valid) {
-		mpr_dprint(sc, MPR_ERROR, "At enclosure level %d, slot %d, "
-		    "connector name (%4s)\n", targ->encl_level, targ->encl_slot,
-		    targ->connector_name);
+		mpr_dprint(sc, MPR_INFO|MPR_RECOVERY,
+		    "At enclosure level %d, slot %d, connector name (%4s)\n",
+		    targ->encl_level, targ->encl_slot, targ->connector_name);
 	}
 
 	/* XXX first, check the firmware state, to see if it's still
@@ -1693,10 +1711,13 @@ mprsas_scsiio_timeout(void *data)
 		    "processing by tm %p\n", cm, targ->tm);
 	}
 	else if ((targ->tm = mprsas_alloc_tm(sc)) != NULL) {
-		mpr_dprint(sc, MPR_RECOVERY, "timedout cm %p allocated tm %p\n",
-		    cm, targ->tm);
 
 		/* start recovery by aborting the first timedout command */
+		mpr_dprint(sc, MPR_RECOVERY|MPR_INFO,
+		    "Sending abort to target %u for SMID %d\n", targ->tid,
+		    cm->cm_desc.Default.SMID);
+		mpr_dprint(sc, MPR_RECOVERY, "timedout cm %p allocated tm %p\n",
+		    cm, targ->tm);
 		mprsas_send_abort(sc, targ->tm, cm);
 	}
 	else {
@@ -1709,8 +1730,8 @@ mprsas_scsiio_timeout(void *data)
 		 * more credits than disks in an enclosure, and limit
 		 * ourselves to one TM per target for recovery.
 		 */
-		mpr_dprint(sc, MPR_RECOVERY, "timedout cm %p failed to "
-		    "allocate a tm\n", cm);
+		mpr_dprint(sc, MPR_ERROR|MPR_RECOVERY,
+		    "timedout cm %p failed to allocate a tm\n", cm);
 	}
 }
 
@@ -1843,6 +1864,7 @@ mprsas_build_nvme_unmap(struct mpr_softc *sc, struct m
 	cm->cm_desc.Default.RequestFlags =
 	    MPI26_REQ_DESCRIPT_FLAGS_PCIE_ENCAPSULATED;
 
+	csio->ccb_h.qos.sim_data = sbinuptime();
 #if __FreeBSD_version >= 1000029
 	callout_reset_sbt(&cm->cm_callout, SBT_1MS * ccb->ccb_h.timeout, 0,
 	    mprsas_scsiio_timeout, cm, 0);
@@ -2161,6 +2183,7 @@ mprsas_action_scsiio(struct mprsas_softc *sassc, union
 			cm->cm_desc.SCSIIO.DevHandle = htole16(targ->handle);
 	}
 
+	csio->ccb_h.qos.sim_data = sbinuptime();
 #if __FreeBSD_version >= 1000029
 	callout_reset_sbt(&cm->cm_callout, SBT_1MS * ccb->ccb_h.timeout, 0,
 	    mprsas_scsiio_timeout, cm, 0);
@@ -2936,9 +2959,14 @@ mprsas_scsiio_complete(struct mpr_softc *sc, struct mp
 		 * avoiding getting into an infinite retry loop.
 		 */
 		mprsas_set_ccbstatus(ccb, CAM_REQ_CMP_ERR);
-		mprsas_log_command(cm, MPR_INFO,
-		    "terminated ioc %x loginfo %x scsi %x state %x xfer %u\n",
-		    le16toh(rep->IOCStatus), le32toh(rep->IOCLogInfo),
+		mpr_dprint(sc, MPR_INFO,
+		    "Controller reported %s status for target %u SMID %u, "
+		    "loginfo %x\n", ((rep->IOCStatus & MPI2_IOCSTATUS_MASK) ==
+		    MPI2_IOCSTATUS_SCSI_IOC_TERMINATED) ? "IOC_TERMINATED" :
+		    "EXT_TERMINATED", target_id, cm->cm_desc.Default.SMID,
+		    le32toh(rep->IOCLogInfo));
+		mpr_dprint(sc, MPR_XINFO,
+		    "SCSIStatus %x SCSIState %x xfercount %u\n",
 		    rep->SCSIStatus, rep->SCSIState,
 		    le32toh(rep->TransferCount));
 		break;

Modified: head/sys/dev/mps/mps_sas.c
==============================================================================
--- head/sys/dev/mps/mps_sas.c	Wed Sep  6 07:19:22 2017	(r323214)
+++ head/sys/dev/mps/mps_sas.c	Wed Sep  6 09:19:54 2017	(r323215)
@@ -1227,33 +1227,39 @@ mpssas_logical_unit_reset_complete(struct mps_softc *s
 	 * XXXSL So should it be an assertion?
 	 */
 	if ((tm->cm_flags & MPS_CM_FLAGS_ERROR_MASK) != 0) {
-		mps_dprint(sc, MPS_ERROR, "%s: cm_flags = %#x for LUN reset! "
-			   "This should not happen!\n", __func__, tm->cm_flags);
+		mps_dprint(sc, MPS_RECOVERY|MPS_ERROR,
+		    "%s: cm_flags = %#x for LUN reset! "
+		   "This should not happen!\n", __func__, tm->cm_flags);
 		mpssas_free_tm(sc, tm);
 		return;
 	}
 
 	if (reply == NULL) {
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "NULL reset reply for tm %p\n", tm);
+		mps_dprint(sc, MPS_RECOVERY, "NULL reset reply for tm %p\n",
+		    tm);
 		if ((sc->mps_flags & MPS_FLAGS_DIAGRESET) != 0) {
 			/* this completion was due to a reset, just cleanup */
+			mps_dprint(sc, MPS_RECOVERY, "Hardware undergoing "
+			    "reset, ignoring NULL LUN reset reply\n");
 			targ->tm = NULL;
 			mpssas_free_tm(sc, tm);
 		}
 		else {
 			/* we should have gotten a reply. */
+			mps_dprint(sc, MPS_INFO|MPS_RECOVERY, "NULL reply on "
+			    "LUN reset attempt, resetting controller\n");
 			mps_reinit(sc);
 		}
 		return;
 	}
 
-	mpssas_log_command(tm, MPS_RECOVERY,
+	mps_dprint(sc, MPS_RECOVERY,
 	    "logical unit reset status 0x%x code 0x%x count %u\n",
 	    le16toh(reply->IOCStatus), le32toh(reply->ResponseCode),
 	    le32toh(reply->TerminationCount));
 		
-	/* See if there are any outstanding commands for this LUN.
+	/*
+	 * See if there are any outstanding commands for this LUN.
 	 * This could be made more efficient by using a per-LU data
 	 * structure of some sort.
 	 */
@@ -1263,34 +1269,37 @@ mpssas_logical_unit_reset_complete(struct mps_softc *s
 	}
 
 	if (cm_count == 0) {
-		mpssas_log_command(tm, MPS_RECOVERY|MPS_INFO,
-		    "logical unit %u finished recovery after reset\n",
-		    tm->cm_lun, tm);
+		mps_dprint(sc, MPS_RECOVERY|MPS_INFO,
+		    "Finished recovery after LUN reset for target %u\n",
+		    targ->tid);
 
-		mpssas_announce_reset(sc, AC_SENT_BDR, tm->cm_targ->tid, 
-		    tm->cm_lun);
+		mpssas_announce_reset(sc, AC_SENT_BDR, targ->tid, tm->cm_lun);
 
-		/* we've finished recovery for this logical unit.  check and
+		/*
+		 * We've finished recovery for this logical unit.  check and
 		 * see if some other logical unit has a timedout command
 		 * that needs to be processed.
 		 */
 		cm = TAILQ_FIRST(&targ->timedout_commands);
 		if (cm) {
+			mps_dprint(sc, MPS_INFO|MPS_RECOVERY,
+			    "More commands to abort for target %u\n",
+			    targ->tid);
 			mpssas_send_abort(sc, tm, cm);
-		}
-		else {
+		} else {
 			targ->tm = NULL;
 			mpssas_free_tm(sc, tm);
 		}
-	}
-	else {
-		/* if we still have commands for this LUN, the reset
+	} else {
+		/*
+		 * If we still have commands for this LUN, the reset
 		 * effectively failed, regardless of the status reported.
 		 * Escalate to a target reset.
 		 */
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "logical unit reset complete for tm %p, but still have %u command(s)\n",
-		    tm, cm_count);
+		mps_dprint(sc, MPS_INFO|MPS_RECOVERY,
+		    "logical unit reset complete for target %u, but still "
+		    "have %u command(s), sending target reset\n", targ->tid,
+		    cm_count);
 		mpssas_send_reset(sc, tm,
 		    MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET);
 	}
@@ -1322,21 +1331,25 @@ mpssas_target_reset_complete(struct mps_softc *sc, str
 	}
 
 	if (reply == NULL) {
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "NULL reset reply for tm %p\n", tm);
+		mps_dprint(sc, MPS_RECOVERY,
+		    "NULL target reset reply for tm %pi TaskMID %u\n",
+		    tm, le16toh(req->TaskMID));
 		if ((sc->mps_flags & MPS_FLAGS_DIAGRESET) != 0) {
 			/* this completion was due to a reset, just cleanup */
+			mps_dprint(sc, MPS_RECOVERY, "Hardware undergoing "
+			    "reset, ignoring NULL target reset reply\n");
 			targ->tm = NULL;
 			mpssas_free_tm(sc, tm);
-		}
-		else {
+		} else {
 			/* we should have gotten a reply. */
+			mps_dprint(sc, MPS_INFO|MPS_RECOVERY, "NULL reply on "
+			    "target reset attempt, resetting controller\n");
 			mps_reinit(sc);
 		}
 		return;
 	}
 
-	mpssas_log_command(tm, MPS_RECOVERY,
+	mps_dprint(sc, MPS_RECOVERY,
 	    "target reset status 0x%x code 0x%x count %u\n",
 	    le16toh(reply->IOCStatus), le32toh(reply->ResponseCode),
 	    le32toh(reply->TerminationCount));
@@ -1345,23 +1358,24 @@ mpssas_target_reset_complete(struct mps_softc *sc, str
 		/* we've finished recovery for this target and all
 		 * of its logical units.
 		 */
-		mpssas_log_command(tm, MPS_RECOVERY|MPS_INFO,
-		    "recovery finished after target reset\n");
+		mps_dprint(sc, MPS_RECOVERY|MPS_INFO,
+		    "Finished reset recovery for target %u\n", targ->tid);
 
 		mpssas_announce_reset(sc, AC_SENT_BDR, tm->cm_targ->tid,
 		    CAM_LUN_WILDCARD);
 
 		targ->tm = NULL;
 		mpssas_free_tm(sc, tm);
-	}
-	else {
-		/* after a target reset, if this target still has
+	} else {
+		/*
+		 * After a target reset, if this target still has
 		 * outstanding commands, the reset effectively failed,
 		 * regardless of the status reported.  escalate.
 		 */
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "target reset complete for tm %p, but still have %u command(s)\n", 
-		    tm, targ->outstanding);
+		mps_dprint(sc, MPS_INFO|MPS_RECOVERY,
+		    "Target reset complete for target %u, but still have %u "
+		    "command(s), resetting controller\n", targ->tid,
+		    targ->outstanding);
 		mps_reinit(sc);
 	}
 }
@@ -1391,24 +1405,23 @@ mpssas_send_reset(struct mps_softc *sc, struct mps_com
 		/* XXX Need to handle invalid LUNs */
 		MPS_SET_LUN(req->LUN, tm->cm_lun);
 		tm->cm_targ->logical_unit_resets++;
-		mpssas_log_command(tm, MPS_RECOVERY|MPS_INFO,
-		    "sending logical unit reset\n");
+		mps_dprint(sc, MPS_RECOVERY|MPS_INFO,
+		    "Sending logical unit reset to target %u lun %d\n",
+		    target->tid, tm->cm_lun);
 		tm->cm_complete = mpssas_logical_unit_reset_complete;
 		mpssas_prepare_for_tm(sc, tm, target, tm->cm_lun);
-	}
-	else if (type == MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET) {
+	} else if (type == MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET) {
 		/*
 		 * Target reset method =
 		 * 	SAS Hard Link Reset / SATA Link Reset
 		 */
 		req->MsgFlags = MPI2_SCSITASKMGMT_MSGFLAGS_LINK_RESET;
 		tm->cm_targ->target_resets++;
-		mpssas_log_command(tm, MPS_RECOVERY|MPS_INFO,
-		    "sending target reset\n");
+		mps_dprint(sc, MPS_RECOVERY|MPS_INFO,
+		    "Sending target reset to target %u\n", target->tid);
 		tm->cm_complete = mpssas_target_reset_complete;
 		mpssas_prepare_for_tm(sc, tm, target, CAM_LUN_WILDCARD);
-	}
-	else {
+	} else {
 		mps_dprint(sc, MPS_ERROR, "unexpected reset type 0x%x\n", type);
 		return -1;
 	}
@@ -1422,7 +1435,7 @@ mpssas_send_reset(struct mps_softc *sc, struct mps_com
 
 	err = mps_map_command(sc, tm);
 	if (err)
-		mpssas_log_command(tm, MPS_RECOVERY,
+		mps_dprint(sc, MPS_ERROR|MPS_RECOVERY,
 		    "error %d sending reset type %u\n",
 		    err, type);
 
@@ -1450,7 +1463,7 @@ mpssas_abort_complete(struct mps_softc *sc, struct mps
 	 * task management commands don't have S/G lists.
 	 */
 	if ((tm->cm_flags & MPS_CM_FLAGS_ERROR_MASK) != 0) {
-		mpssas_log_command(tm, MPS_RECOVERY,
+		mps_dprint(sc, MPS_RECOVERY,
 		    "cm_flags = %#x for abort %p TaskMID %u!\n", 
 		    tm->cm_flags, tm, le16toh(req->TaskMID));
 		mpssas_free_tm(sc, tm);
@@ -1458,22 +1471,25 @@ mpssas_abort_complete(struct mps_softc *sc, struct mps
 	}
 
 	if (reply == NULL) {
-		mpssas_log_command(tm, MPS_RECOVERY,
+		mps_dprint(sc, MPS_RECOVERY,
 		    "NULL abort reply for tm %p TaskMID %u\n", 
 		    tm, le16toh(req->TaskMID));
 		if ((sc->mps_flags & MPS_FLAGS_DIAGRESET) != 0) {
 			/* this completion was due to a reset, just cleanup */
+			mps_dprint(sc, MPS_RECOVERY, "Hardware undergoing "
+			    "reset, ignoring NULL abort reply\n");
 			targ->tm = NULL;
 			mpssas_free_tm(sc, tm);
-		}
-		else {
+		} else {
 			/* we should have gotten a reply. */
+			mps_dprint(sc, MPS_INFO|MPS_RECOVERY, "NULL reply on "
+			    "abort attempt, resetting controller\n");
 			mps_reinit(sc);
 		}
 		return;
 	}
 
-	mpssas_log_command(tm, MPS_RECOVERY,
+	mps_dprint(sc, MPS_RECOVERY,
 	    "abort TaskMID %u status 0x%x code 0x%x count %u\n",
 	    le16toh(req->TaskMID),
 	    le16toh(reply->IOCStatus), le32toh(reply->ResponseCode),
@@ -1481,31 +1497,28 @@ mpssas_abort_complete(struct mps_softc *sc, struct mps
 
 	cm = TAILQ_FIRST(&tm->cm_targ->timedout_commands);
 	if (cm == NULL) {
-		/* if there are no more timedout commands, we're done with
+		/*
+		 * If there are no more timedout commands, we're done with
 		 * error recovery for this target.
 		 */
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "finished recovery after aborting TaskMID %u\n",
-		    le16toh(req->TaskMID));
+		mps_dprint(sc, MPS_INFO|MPS_RECOVERY,
+		    "Finished abort recovery for target %u\n", targ->tid);
 
 		targ->tm = NULL;
 		mpssas_free_tm(sc, tm);
-	}
-	else if (le16toh(req->TaskMID) != cm->cm_desc.Default.SMID) {
+	} else if (le16toh(req->TaskMID) != cm->cm_desc.Default.SMID) {
 		/* abort success, but we have more timedout commands to abort */
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "continuing recovery after aborting TaskMID %u\n",
-		    le16toh(req->TaskMID));
+		mps_dprint(sc, MPS_INFO|MPS_RECOVERY,
+		    "Continuing abort recovery for target %u\n", targ->tid);
 		
 		mpssas_send_abort(sc, tm, cm);
-	}
-	else {
+	} else {
 		/* we didn't get a command completion, so the abort
 		 * failed as far as we're concerned.  escalate.
 		 */
-		mpssas_log_command(tm, MPS_RECOVERY,
-		    "abort failed for TaskMID %u tm %p\n",
-		    le16toh(req->TaskMID), tm);
+		mps_dprint(sc, MPS_RECOVERY,
+		    "Abort failed for target %u, sending logical unit reset\n",
+		    targ->tid);
 
 		mpssas_send_reset(sc, tm, 
 		    MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET);
@@ -1523,7 +1536,8 @@ mpssas_send_abort(struct mps_softc *sc, struct mps_com
 
 	targ = cm->cm_targ;
 	if (targ->handle == 0) {
-		mps_dprint(sc, MPS_ERROR,"%s null devhandle for target_id %d\n",
+		mps_dprint(sc, MPS_ERROR|MPS_RECOVERY,
+		    "%s null devhandle for target_id %d\n",
 		    __func__, cm->cm_ccb->ccb_h.target_id);
 		return -1;
 	}
@@ -1553,13 +1567,11 @@ mpssas_send_abort(struct mps_softc *sc, struct mps_com
 
 	targ->aborts++;
 
-	mps_dprint(sc, MPS_INFO, "Sending reset from %s for target ID %d\n",
-	    __func__, targ->tid);
 	mpssas_prepare_for_tm(sc, tm, targ, tm->cm_lun);
 
 	err = mps_map_command(sc, tm);
 	if (err)
-		mps_dprint(sc, MPS_RECOVERY,
+		mps_dprint(sc, MPS_ERROR|MPS_RECOVERY,
 		    "error %d sending abort for cm %p SMID %u\n",
 		    err, cm, req->TaskMID);
 	return err;
@@ -1568,17 +1580,21 @@ mpssas_send_abort(struct mps_softc *sc, struct mps_com
 static void
 mpssas_scsiio_timeout(void *data)
 {
+	sbintime_t elapsed, now;
+	union ccb *ccb;
 	struct mps_softc *sc;
 	struct mps_command *cm;
 	struct mpssas_target *targ;
 
 	cm = (struct mps_command *)data;
 	sc = cm->cm_sc;
+	ccb = cm->cm_ccb;
+	now = sbinuptime();
 
 	MPS_FUNCTRACE(sc);
 	mtx_assert(&sc->mps_mtx, MA_OWNED);
 
-	mps_dprint(sc, MPS_XINFO, "Timeout checking cm %p\n", sc);
+	mps_dprint(sc, MPS_XINFO|MPS_RECOVERY, "Timeout checking cm %p\n", sc);
 
 	/*
 	 * Run the interrupt handler to make sure it's not pending.  This
@@ -1600,9 +1616,11 @@ mpssas_scsiio_timeout(void *data)
 	targ = cm->cm_targ;
 	targ->timeouts++;
 
-	mpssas_log_command(cm, MPS_ERROR, "command timeout %d cm %p target "
-	    "%u, handle(0x%04x)\n", cm->cm_ccb->ccb_h.timeout, cm,  targ->tid,
-	    targ->handle);
+	elapsed = now - ccb->ccb_h.qos.sim_data;
+	mpssas_log_command(cm, MPS_INFO|MPS_RECOVERY,
+	    "Command timeout on target %u(0x%04x) %d set, %d.%d elapsed\n",
+	    targ->tid, targ->handle, ccb->ccb_h.timeout,
+	    sbintime_getsec(elapsed), elapsed & 0xffffffff);
 
 	/* XXX first, check the firmware state, to see if it's still
 	 * operational.  if not, do a diag reset.
@@ -1618,15 +1636,16 @@ mpssas_scsiio_timeout(void *data)
 		mps_dprint(sc, MPS_RECOVERY,
 		    "queued timedout cm %p for processing by tm %p\n",
 		    cm, targ->tm);
-	}
-	else if ((targ->tm = mpssas_alloc_tm(sc)) != NULL) {
+	} else if ((targ->tm = mpssas_alloc_tm(sc)) != NULL) {
+		mps_dprint(sc, MPS_RECOVERY|MPS_INFO,
+		    "Sending abort to target %u for SMID %d\n", targ->tid,
+		    cm->cm_desc.Default.SMID);
 		mps_dprint(sc, MPS_RECOVERY, "timedout cm %p allocated tm %p\n",
 		    cm, targ->tm);
 
 		/* start recovery by aborting the first timedout command */
 		mpssas_send_abort(sc, targ->tm, cm);
-	}
-	else {
+	} else {
 		/* XXX queue this target up for recovery once a TM becomes
 		 * available.  The firmware only has a limited number of
 		 * HighPriority credits for the high priority requests used
@@ -1636,7 +1655,7 @@ mpssas_scsiio_timeout(void *data)
 		 * more credits than disks in an enclosure, and limit
 		 * ourselves to one TM per target for recovery.
 		 */
-		mps_dprint(sc, MPS_RECOVERY,
+		mps_dprint(sc, MPS_ERROR|MPS_RECOVERY,
 		    "timedout cm %p failed to allocate a tm\n", cm);
 	}
 
@@ -1900,6 +1919,7 @@ mpssas_action_scsiio(struct mpssas_softc *sassc, union
 	if (csio->bio != NULL)
 		biotrack(csio->bio, __func__);
 #endif
+	csio->ccb_h.qos.sim_data = sbinuptime();
 	callout_reset_sbt(&cm->cm_callout, SBT_1MS * ccb->ccb_h.timeout, 0,
 	    mpssas_scsiio_timeout, cm, 0);
 
@@ -2469,9 +2489,14 @@ mpssas_scsiio_complete(struct mps_softc *sc, struct mp
 		 * avoiding getting into an infinite retry loop.
 		 */
 		mpssas_set_ccbstatus(ccb, CAM_REQ_CMP_ERR);
-		mpssas_log_command(cm, MPS_INFO,
-		    "terminated ioc %x loginfo %x scsi %x state %x xfer %u\n",
-		    le16toh(rep->IOCStatus), le32toh(rep->IOCLogInfo),
+		mps_dprint(sc, MPS_INFO,
+		    "Controller reported %s status for target %u SMID %u, "
+		    "loginfo %x\n", ((rep->IOCStatus & MPI2_IOCSTATUS_MASK) ==
+		    MPI2_IOCSTATUS_SCSI_IOC_TERMINATED) ? "IOC_TERMINATED" :
+		    "EXT_TERMINATED", target_id, cm->cm_desc.Default.SMID,
+		    le32toh(rep->IOCLogInfo));
+		mps_dprint(sc, MPS_XINFO,
+		    "SCSIStatus %x SCSIState %x xfercount %u\n",
 		    rep->SCSIStatus, rep->SCSIState,
 		    le32toh(rep->TransferCount));
 		break;

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201709060919.v869Jsk9060601>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation