Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Mar 2018 17:53:37 +0000 (UTC)
From:      Warner Losh <imp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r330935 - in head/sys: cam cam/ata cam/nvme cam/scsi conf
Message-ID:  <201803141753.w2EHrbrE081231@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: imp
Date: Wed Mar 14 17:53:37 2018
New Revision: 330935
URL: https://svnweb.freebsd.org/changeset/base/330935

Log:
  Create a sysctl kern.cam.{,a,n}da.X.invalidate
  
  kern.cam.{,a,n}da.X.invalidate=1 forces *daX to detach by calling
  cam_periph_invalidate on the underlying periph. This is for testing
  purposes only. Include only with options CAM_TEST_FAILURE and rename
  the former [AN]DA_TEST_FAILURE, and fix nda to compile with it set.
  We're using it at work to harden geom and the buffer cache to be
  resilient in the face of drive failure. Today, it far too often
  results in a panic. While much work was done on SIM initiated removal
  for the USB thumnb drive removal work, little has been done for periph
  initiated removal. This simulates what *daerror() does for some errors
  nicely: we get the same panics with it that we do with failing drives.
  
  Sponsored by: Netflix
  Differential Revision: https://reviews.freebsd.org/D14581

Modified:
  head/sys/cam/ata/ata_da.c
  head/sys/cam/cam_periph.c
  head/sys/cam/cam_periph.h
  head/sys/cam/nvme/nvme_da.c
  head/sys/cam/scsi/scsi_da.c
  head/sys/conf/NOTES
  head/sys/conf/options

Modified: head/sys/cam/ata/ata_da.c
==============================================================================
--- head/sys/cam/ata/ata_da.c	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/cam/ata/ata_da.c	Wed Mar 14 17:53:37 2018	(r330935)
@@ -238,7 +238,7 @@ struct ada_softc {
 	int	 write_cache;
 	int	 unmappedio;
 	int	 rotating;
-#ifdef ADA_TEST_FAILURE
+#ifdef CAM_TEST_FAILURE
 	int      force_read_error;
 	int      force_write_error;
 	int      periodic_read_error;
@@ -1475,7 +1475,7 @@ adasysctlinit(void *context, int pending)
 		"max_seq_zones", CTLFLAG_RD, &softc->max_seq_zones,
 		"Maximum Number of Open Sequential Write Required Zones");
 
-#ifdef ADA_TEST_FAILURE
+#ifdef CAM_TEST_FAILURE
 	/*
 	 * Add a 'door bell' sysctl which allows one to set it from userland
 	 * and cause something bad to happen.  For the moment, we only allow
@@ -1493,6 +1493,10 @@ adasysctlinit(void *context, int pending)
 		OID_AUTO, "periodic_read_error", CTLFLAG_RW | CTLFLAG_MPSAFE,
 		&softc->periodic_read_error, 0,
 		"Force a read error every N reads (don't set too low).");
+	SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
+		OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+		periph, 0, cam_periph_invalidate_sysctl, "I",
+		"Write 1 to invalidate the drive immediately");
 #endif
 
 #ifdef CAM_IO_STATS
@@ -2293,7 +2297,7 @@ adastart(struct cam_periph *periph, union ccb *start_c
 				data_ptr = bp;
 			}
 
-#ifdef ADA_TEST_FAILURE
+#ifdef CAM_TEST_FAILURE
 			int fail = 0;
 
 			/*

Modified: head/sys/cam/cam_periph.c
==============================================================================
--- head/sys/cam/cam_periph.c	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/cam/cam_periph.c	Wed Mar 14 17:53:37 2018	(r330935)
@@ -2066,3 +2066,25 @@ cam_periph_devctl_notify(union ccb *ccb)
 	free(sbmsg, M_CAMPERIPH);
 }
 
+/*
+ * Sysctl to force an invalidation of the drive right now. Can be
+ * called with CTLFLAG_MPSAFE since we take periph lock.
+ */
+int
+cam_periph_invalidate_sysctl(SYSCTL_HANDLER_ARGS)
+{
+	struct cam_periph *periph;
+	int error, value;
+
+	periph = arg1;
+	value = 0;
+	error = sysctl_handle_int(oidp, &value, 0, req);
+	if (error != 0 || req->newptr == NULL || value != 1)
+		return (error);
+
+	cam_periph_lock(periph);
+	cam_periph_invalidate(periph);
+	cam_periph_unlock(periph);
+
+	return (0);
+}

Modified: head/sys/cam/cam_periph.h
==============================================================================
--- head/sys/cam/cam_periph.h	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/cam/cam_periph.h	Wed Mar 14 17:53:37 2018	(r330935)
@@ -37,6 +37,7 @@
 #include <cam/cam_sim.h>
 
 #ifdef _KERNEL
+#include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 
 #include <cam/cam_xpt.h>
@@ -198,6 +199,7 @@ void		cam_periph_freeze_after_event(struct cam_periph 
 					      u_int duration_ms);
 int		cam_periph_error(union ccb *ccb, cam_flags camflags,
 				 u_int32_t sense_flags);
+int		cam_periph_invalidate_sysctl(SYSCTL_HANDLER_ARGS);
 
 static __inline struct mtx *
 cam_periph_mtx(struct cam_periph *periph)

Modified: head/sys/cam/nvme/nvme_da.c
==============================================================================
--- head/sys/cam/nvme/nvme_da.c	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/cam/nvme/nvme_da.c	Wed Mar 14 17:53:37 2018	(r330935)
@@ -112,6 +112,12 @@ struct nda_softc {
 	struct task		sysctl_task;
 	struct sysctl_ctx_list	sysctl_ctx;
 	struct sysctl_oid	*sysctl_tree;
+#ifdef CAM_TEST_FAILURE
+	int			force_read_error;
+	int			force_write_error;
+	int			periodic_read_error;
+	int			periodic_read_count;
+#endif
 #ifdef CAM_IO_STATS
 	struct sysctl_ctx_list	sysctl_stats_ctx;
 	struct sysctl_oid	*sysctl_stats_tree;
@@ -666,6 +672,13 @@ ndasysctlinit(void *context, int pending)
 		"Device pack invalidations.");
 #endif
 
+#ifdef CAM_TEST_FAILURE
+	SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
+		OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+		periph, 0, cam_periph_invalidate_sysctl, "I",
+		"Write 1 to invalidate the drive immediately");
+#endif
+
 	cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx,
 	    softc->sysctl_tree);
 
@@ -876,7 +889,7 @@ ndastart(struct cam_periph *periph, union ccb *start_c
 			/* FALLTHROUGH */
 		case BIO_READ:
 		{
-#ifdef NDA_TEST_FAILURE
+#ifdef CAM_TEST_FAILURE
 			int fail = 0;
 
 			/*

Modified: head/sys/cam/scsi/scsi_da.c
==============================================================================
--- head/sys/cam/scsi/scsi_da.c	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/cam/scsi/scsi_da.c	Wed Mar 14 17:53:37 2018	(r330935)
@@ -2203,6 +2203,13 @@ dasysctlinit(void *context, int pending)
 		       0,
 		       "Rotating media");
 
+#ifdef CAM_TEST_FAILURE
+	SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree),
+		OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE,
+		periph, 0, cam_periph_invalidate_sysctl, "I",
+		"Write 1 to invalidate the drive immediately");
+#endif
+
 	/*
 	 * Add some addressing info.
 	 */

Modified: head/sys/conf/NOTES
==============================================================================
--- head/sys/conf/NOTES	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/conf/NOTES	Wed Mar 14 17:53:37 2018	(r330935)
@@ -1356,6 +1356,7 @@ options 	SCSI_NO_SENSE_STRINGS
 options 	SCSI_NO_OP_STRINGS
 options 	SCSI_DELAY=5000	# Be pessimistic about Joe SCSI device
 options 	CAM_IOSCHED_DYNAMIC
+options		CAM_TEST_FAILURE
 
 # Options for the CAM CDROM driver:
 # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN

Modified: head/sys/conf/options
==============================================================================
--- head/sys/conf/options	Wed Mar 14 16:45:04 2018	(r330934)
+++ head/sys/conf/options	Wed Mar 14 17:53:37 2018	(r330935)
@@ -335,12 +335,12 @@ CAM_DEBUG_LUN		opt_cam.h
 CAM_DEBUG_FLAGS		opt_cam.h
 CAM_BOOT_DELAY		opt_cam.h
 CAM_IOSCHED_DYNAMIC	opt_cam.h
+CAM_TEST_FAILURE	opt_cam.h
 SCSI_DELAY		opt_scsi.h
 SCSI_NO_SENSE_STRINGS	opt_scsi.h
 SCSI_NO_OP_STRINGS	opt_scsi.h
 
 # Options used only in cam/ata/ata_da.c
-ADA_TEST_FAILURE	opt_ada.h
 ATA_STATIC_ID		opt_ada.h
 
 # Options used only in cam/scsi/scsi_cd.c



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201803141753.w2EHrbrE081231>