Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 8 Oct 2013 16:00:12 +0000 (UTC)
From:      Jim Harris <jimharris@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r256154 - head/sys/dev/nvme
Message-ID:  <201310081600.r98G0CRl003594@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: jimharris
Date: Tue Oct  8 16:00:12 2013
New Revision: 256154
URL: http://svnweb.freebsd.org/changeset/base/256154

Log:
  Log and then disable asynchronous notification of persistent events after
  they occur.
  
  This prevents repeated notifications of the same event.
  
  Status of these events may be viewed at any time by viewing the
  SMART/Health Info Page using nvmecontrol, whether or not asynchronous
  events notifications for those events are enabled.  This log page can
  be viewed using:
  
      nvmecontrol logpage -p 2 <ctrlr id>
  
  Future enhancements may re-enable these notifications on a periodic basis
  so that if the notified condition persists, it will continue to be logged.
  
  Sponsored by:	Intel
  Reviewed by:	carl
  Approved by:	re (hrs)
  MFC after:	1 week

Modified:
  head/sys/dev/nvme/nvme_ctrlr.c
  head/sys/dev/nvme/nvme_private.h

Modified: head/sys/dev/nvme/nvme_ctrlr.c
==============================================================================
--- head/sys/dev/nvme/nvme_ctrlr.c	Tue Oct  8 15:49:14 2013	(r256153)
+++ head/sys/dev/nvme/nvme_ctrlr.c	Tue Oct  8 16:00:12 2013	(r256154)
@@ -617,9 +617,35 @@ nvme_ctrlr_get_log_page_size(struct nvme
 }
 
 static void
+nvme_ctrlr_log_critical_warnings(struct nvme_controller *ctrlr,
+    union nvme_critical_warning_state state)
+{
+
+	if (state.bits.available_spare == 1)
+		nvme_printf(ctrlr, "available spare space below threshold\n");
+
+	if (state.bits.temperature == 1)
+		nvme_printf(ctrlr, "temperature above threshold\n");
+
+	if (state.bits.device_reliability == 1)
+		nvme_printf(ctrlr, "device reliability degraded\n");
+
+	if (state.bits.read_only == 1)
+		nvme_printf(ctrlr, "media placed in read only mode\n");
+
+	if (state.bits.volatile_memory_backup == 1)
+		nvme_printf(ctrlr, "volatile memory backup device failed\n");
+
+	if (state.bits.reserved != 0)
+		nvme_printf(ctrlr,
+		    "unknown critical warning(s): state = 0x%02x\n", state.raw);
+}
+
+static void
 nvme_ctrlr_async_event_log_page_cb(void *arg, const struct nvme_completion *cpl)
 {
-	struct nvme_async_event_request	*aer = arg;
+	struct nvme_async_event_request		*aer = arg;
+	struct nvme_health_information_page	*health_info;
 
 	/*
 	 * If the log page fetch for some reason completed with an error,
@@ -629,13 +655,33 @@ nvme_ctrlr_async_event_log_page_cb(void 
 	if (nvme_completion_is_error(cpl))
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, NULL, 0);
-	else
+	else {
+		if (aer->log_page_id == NVME_LOG_HEALTH_INFORMATION) {
+			health_info = (struct nvme_health_information_page *)
+			    aer->log_page_buffer;
+			nvme_ctrlr_log_critical_warnings(aer->ctrlr,
+			    health_info->critical_warning);
+			/*
+			 * Critical warnings reported through the
+			 *  SMART/health log page are persistent, so
+			 *  clear the associated bits in the async event
+			 *  config so that we do not receive repeated
+			 *  notifications for the same event.
+			 */
+			aer->ctrlr->async_event_config.raw &=
+			    ~health_info->critical_warning.raw;
+			nvme_ctrlr_cmd_set_async_event_config(aer->ctrlr,
+			    aer->ctrlr->async_event_config, NULL, NULL);
+		}
+
+
 		/*
 		 * Pass the cpl data from the original async event completion,
 		 *  not the log page fetch.
 		 */
 		nvme_notify_async_consumers(aer->ctrlr, &aer->cpl,
 		    aer->log_page_id, aer->log_page_buffer, aer->log_page_size);
+	}
 
 	/*
 	 * Repost another asynchronous event request to replace the one
@@ -709,12 +755,11 @@ static void
 nvme_ctrlr_configure_aer(struct nvme_controller *ctrlr)
 {
 	struct nvme_completion_poll_status	status;
-	union nvme_critical_warning_state	state;
 	struct nvme_async_event_request		*aer;
 	uint32_t				i;
 
-	state.raw = 0xFF;
-	state.bits.reserved = 0;
+	ctrlr->async_event_config.raw = 0xFF;
+	ctrlr->async_event_config.bits.reserved = 0;
 
 	status.done = FALSE;
 	nvme_ctrlr_cmd_get_feature(ctrlr, NVME_FEAT_TEMPERATURE_THRESHOLD,
@@ -725,10 +770,11 @@ nvme_ctrlr_configure_aer(struct nvme_con
 	    (status.cpl.cdw0 & 0xFFFF) == 0xFFFF ||
 	    (status.cpl.cdw0 & 0xFFFF) == 0x0000) {
 		nvme_printf(ctrlr, "temperature threshold not supported\n");
-		state.bits.temperature = 0;
+		ctrlr->async_event_config.bits.temperature = 0;
 	}
 
-	nvme_ctrlr_cmd_set_async_event_config(ctrlr, state, NULL, NULL);
+	nvme_ctrlr_cmd_set_async_event_config(ctrlr,
+	    ctrlr->async_event_config, NULL, NULL);
 
 	/* aerl is a zero-based value, so we need to add 1 here. */
 	ctrlr->num_aers = min(NVME_MAX_ASYNC_EVENTS, (ctrlr->cdata.aerl+1));

Modified: head/sys/dev/nvme/nvme_private.h
==============================================================================
--- head/sys/dev/nvme/nvme_private.h	Tue Oct  8 15:49:14 2013	(r256153)
+++ head/sys/dev/nvme/nvme_private.h	Tue Oct  8 16:00:12 2013	(r256154)
@@ -322,6 +322,9 @@ struct nvme_controller {
 
 	struct cdev			*cdev;
 
+	/** bit mask of warning types currently enabled for async events */
+	union nvme_critical_warning_state	async_event_config;
+
 	uint32_t			num_aers;
 	struct nvme_async_event_request	aer[NVME_MAX_ASYNC_EVENTS];
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201310081600.r98G0CRl003594>