Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 24 Jun 2020 13:49:30 +0000 (UTC)
From:      Alexander Motin <mav@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r362579 - stable/12/sys/dev/nvme
Message-ID:  <202006241349.05ODnUO7039751@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mav
Date: Wed Jun 24 13:49:30 2020
New Revision: 362579
URL: https://svnweb.freebsd.org/changeset/base/362579

Log:
  MFC r362337: Make polled request timeout less invasive.
  
  Instead of panic after one second of polling, make the normal timeout
  handler to activate, reset the controller and abort the outstanding
  requests.  If all of it won't happen within 10 seconds then something
  in the driver is likely stuck bad and panic is the only way out.
  
  In particular this fixed device hot unplug during execution of those
  polled commands, allowing clean device detach instead of panic.

Modified:
  stable/12/sys/dev/nvme/nvme_ctrlr.c
  stable/12/sys/dev/nvme/nvme_private.h
  stable/12/sys/dev/nvme/nvme_qpair.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- stable/12/sys/dev/nvme/nvme_ctrlr.c	Wed Jun 24 13:48:16 2020	(r362578)
+++ stable/12/sys/dev/nvme/nvme_ctrlr.c	Wed Jun 24 13:49:30 2020	(r362579)
@@ -489,7 +489,7 @@ nvme_ctrlr_create_qpairs(struct nvme_controller *ctrlr
 		}
 
 		status.done = 0;
-		nvme_ctrlr_cmd_create_io_sq(qpair->ctrlr, qpair,
+		nvme_ctrlr_cmd_create_io_sq(ctrlr, qpair,
 		    nvme_completion_poll_cb, &status);
 		nvme_completion_poll(&status);
 		if (nvme_completion_is_error(&status.cpl)) {

Modified: stable/12/sys/dev/nvme/nvme_private.h
==============================================================================
--- stable/12/sys/dev/nvme/nvme_private.h	Wed Jun 24 13:48:16 2020	(r362578)
+++ stable/12/sys/dev/nvme/nvme_private.h	Wed Jun 24 13:49:30 2020	(r362579)
@@ -463,20 +463,22 @@ int	nvme_detach(device_t dev);
  * Wait for a command to complete using the nvme_completion_poll_cb.
  * Used in limited contexts where the caller knows it's OK to block
  * briefly while the command runs. The ISR will run the callback which
- * will set status->done to true.usually within microseconds. A 1s
- * pause means something is seriously AFU and we should panic to
- * provide the proper context to diagnose.
+ * will set status->done to true, usually within microseconds. If not,
+ * then after one second timeout handler should reset the controller
+ * and abort all outstanding requests including this polled one. If
+ * still not after ten seconds, then something is wrong with the driver,
+ * and panic is the only way to recover.
  */
 static __inline
 void
 nvme_completion_poll(struct nvme_completion_poll_status *status)
 {
-	int sanity = hz * 1;
+	int sanity = hz * 10;
 
 	while (!atomic_load_acq_int(&status->done) && --sanity > 0)
 		pause("nvme", 1);
 	if (sanity <= 0)
-		panic("NVME polled command failed to complete within 1s.");
+		panic("NVME polled command failed to complete within 10s.");
 }
 
 static __inline void

Modified: stable/12/sys/dev/nvme/nvme_qpair.c
==============================================================================
--- stable/12/sys/dev/nvme/nvme_qpair.c	Wed Jun 24 13:48:16 2020	(r362578)
+++ stable/12/sys/dev/nvme/nvme_qpair.c	Wed Jun 24 13:49:30 2020	(r362579)
@@ -956,6 +956,7 @@ nvme_qpair_submit_tracker(struct nvme_qpair *qpair, st
 {
 	struct nvme_request	*req;
 	struct nvme_controller	*ctrlr;
+	int timeout;
 
 	mtx_assert(&qpair->lock, MA_OWNED);
 
@@ -964,9 +965,14 @@ nvme_qpair_submit_tracker(struct nvme_qpair *qpair, st
 	qpair->act_tr[tr->cid] = tr;
 	ctrlr = qpair->ctrlr;
 
-	if (req->timeout)
-		callout_reset_on(&tr->timer, ctrlr->timeout_period * hz,
-		    nvme_timeout, tr, qpair->cpu);
+	if (req->timeout) {
+		if (req->cb_fn == nvme_completion_poll_cb)
+			timeout = hz;
+		else
+			timeout = ctrlr->timeout_period * hz;
+		callout_reset_on(&tr->timer, timeout, nvme_timeout, tr,
+		    qpair->cpu);
+	}
 
 	/* Copy the command from the tracker to the submission queue. */
 	memcpy(&qpair->cmd[qpair->sq_tail], &req->cmd, sizeof(req->cmd));



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202006241349.05ODnUO7039751>