Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 May 2018 16:31:18 +0000 (UTC)
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r334166 - head/sys/cam/nvme
Message-ID:  <201805241631.w4OGVIN8066467@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: imp
Date: Thu May 24 16:31:18 2018
New Revision: 334166
URL: https://svnweb.freebsd.org/changeset/base/334166

Log:
  We can't release the refcount outside of the periph lock.
  
  We're dropping the periph lock then dropping the refcount. However,
  that violates the locking protocol and is racy. This seems to be
  the cause of weird occasional panics with a bogus assert.
  
  Sponsored by: Netflix
  Differential Revision: https://reviews.freebsd.org/D15517

Modified:
  head/sys/cam/nvme/nvme_da.c

Modified: head/sys/cam/nvme/nvme_da.c
==============================================================================
--- head/sys/cam/nvme/nvme_da.c	Thu May 24 16:25:18 2018	(r334165)
+++ head/sys/cam/nvme/nvme_da.c	Thu May 24 16:31:18 2018	(r334166)
@@ -336,6 +336,8 @@ ndaclose(struct disk *dp)
 
 	while (softc->refcount != 0)
 		cam_periph_sleep(periph, &softc->refcount, PRIBIO, "ndaclose", 1);
+	KASSERT(softc->outstanding_cmds == 0,
+	    ("nda %d outstanding commands", softc->outstanding_cmds));
 	cam_periph_unlock(periph);
 	cam_periph_release(periph);
 	return (0);	
@@ -986,10 +988,11 @@ ndastart(struct cam_periph *periph, union ccb *start_c
 out:
 		start_ccb->ccb_h.flags |= CAM_UNLOCKED;
 		softc->outstanding_cmds++;
-		softc->refcount++;
+		softc->refcount++;			/* For submission only */
 		cam_periph_unlock(periph);
 		xpt_action(start_ccb);
 		cam_periph_lock(periph);
+		softc->refcount--;			/* Submission done */
 
 		/* May have more work to do, so ensure we stay scheduled */
 		ndaschedule(periph);
@@ -1085,6 +1088,7 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb
 			bp1 = TAILQ_FIRST(&queue);
 			cam_iosched_bio_complete(softc->cam_iosched, bp1, done_ccb);
 			xpt_release_ccb(done_ccb);
+			softc->outstanding_cmds--;
 			ndaschedule(periph);
 			cam_periph_unlock(periph);
 			while ((bp2 = TAILQ_FIRST(&queue)) != NULL) {
@@ -1100,11 +1104,6 @@ ndadone(struct cam_periph *periph, union ccb *done_ccb
 				biodone(bp2);
 			}
 		}
-		/*
-		 * Release the periph refcount taken in mdastart() for each CCB.
-		 */
-		KASSERT(softc->refcount >= 1, ("ndadone softc %p refcount %d", softc, softc->refcount));
-		softc->refcount--;
 		return;
 	}
 	case NDA_CCB_DUMP:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201805241631.w4OGVIN8066467>