From owner-svn-src-stable-8@FreeBSD.ORG  Tue May 29 23:15:24 2012
Return-Path: <owner-svn-src-stable-8@FreeBSD.ORG>
Delivered-To: svn-src-stable-8@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 43CF6106566B;
	Tue, 29 May 2012 23:15:24 +0000 (UTC)
	(envelope-from jimharris@FreeBSD.org)
Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c])
	by mx1.freebsd.org (Postfix) with ESMTP id 2D1B48FC1C;
	Tue, 29 May 2012 23:15:24 +0000 (UTC)
Received: from svn.freebsd.org (localhost [127.0.0.1])
	by svn.freebsd.org (8.14.4/8.14.4) with ESMTP id q4TNFOrM008994;
	Tue, 29 May 2012 23:15:24 GMT
	(envelope-from jimharris@svn.freebsd.org)
Received: (from jimharris@localhost)
	by svn.freebsd.org (8.14.4/8.14.4/Submit) id q4TNFNVn008988;
	Tue, 29 May 2012 23:15:23 GMT
	(envelope-from jimharris@svn.freebsd.org)
Message-Id: <201205292315.q4TNFNVn008988@svn.freebsd.org>
From: Jim Harris <jimharris@FreeBSD.org>
Date: Tue, 29 May 2012 23:15:23 +0000 (UTC)
To: src-committers@freebsd.org, svn-src-all@freebsd.org,
	svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
X-SVN-Group: stable-8
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Cc: 
Subject: svn commit: r236264 - stable/8/sys/dev/isci
X-BeenThere: svn-src-stable-8@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: SVN commit messages for only the 8-stable src tree
	<svn-src-stable-8.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-stable-8>, 
	<mailto:svn-src-stable-8-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/svn-src-stable-8>
List-Post: <mailto:svn-src-stable-8@freebsd.org>
List-Help: <mailto:svn-src-stable-8-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/svn-src-stable-8>, 
	<mailto:svn-src-stable-8-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Tue, 29 May 2012 23:15:24 -0000

Author: jimharris
Date: Tue May 29 23:15:23 2012
New Revision: 236264
URL: http://svn.freebsd.org/changeset/base/236264

Log:
  MFC r234106, r235751:
  
  Queue CCBs internally instead of using CAM_REQUEUE_REQ status.  This fixes
  problem where userspace apps such as smartctl fail due to CAM_REQUEUE_REQ
  status getting returned when tagged commands are outstanding when smartctl
  sends its I/O using the pass(4) interface.
  
  Sponsored by: Intel

Modified:
  stable/8/sys/dev/isci/isci.h
  stable/8/sys/dev/isci/isci_controller.c
  stable/8/sys/dev/isci/isci_interrupt.c
  stable/8/sys/dev/isci/isci_io_request.c
  stable/8/sys/dev/isci/isci_remote_device.c
Directory Properties:
  stable/8/sys/   (props changed)

Modified: stable/8/sys/dev/isci/isci.h
==============================================================================
--- stable/8/sys/dev/isci/isci.h	Tue May 29 23:10:10 2012	(r236263)
+++ stable/8/sys/dev/isci/isci.h	Tue May 29 23:15:23 2012	(r236264)
@@ -30,6 +30,9 @@
  * $FreeBSD$
  */
 
+#ifndef _ISCI_H
+#define _ISCI_H
+
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
@@ -86,6 +89,31 @@ struct ISCI_REMOTE_DEVICE {
 	BOOL				is_resetting;
 	uint32_t			frozen_lun_mask;
 	SCI_FAST_LIST_ELEMENT_T		pending_device_reset_element;
+
+	/*
+	 * This queue maintains CCBs that have been returned with
+	 *  SCI_IO_FAILURE_INVALID_STATE from the SCI layer.  These CCBs
+	 *  need to be retried, but we cannot return CAM_REQUEUE_REQ because
+	 *  this status gets passed all the way back up to users of the pass(4)
+	 *  interface and breaks things like smartctl.  So instead, we queue
+	 *  these CCBs internally.
+	 */
+	TAILQ_HEAD(,ccb_hdr)		queued_ccbs;
+
+	/*
+	 * Marker denoting this remote device needs its first queued ccb to
+	 *  be retried.
+	 */
+	BOOL				release_queued_ccb;
+
+	/*
+	 * Points to a CCB in the queue that is currently being processed by
+	 *  SCIL.  This allows us to keep in flight CCBs in the queue so as to
+	 *  maintain ordering (i.e. in case we retry an I/O and then find out
+	 *  it needs to be retried again - it just keeps its same place in the
+	 *  queue.
+	 */
+	union ccb *			queued_ccb_in_progress;
 };
 
 struct ISCI_DOMAIN {
@@ -125,6 +153,7 @@ struct ISCI_CONTROLLER
 	BOOL			has_been_scanned;
 	uint32_t		initial_discovery_mask;
 	BOOL			is_frozen;
+	BOOL			release_queued_ccbs;
 	uint8_t			*remote_device_memory;
 	struct ISCI_MEMORY	cached_controller_memory;
 	struct ISCI_MEMORY	uncached_controller_memory;
@@ -290,6 +319,8 @@ int isci_controller_attach_to_cam(struct
 
 void isci_controller_start(void *controller);
 
+void isci_controller_release_queued_ccbs(struct ISCI_CONTROLLER *controller);
+
 void isci_domain_construct(struct ISCI_DOMAIN *domain, uint32_t domain_index,
     struct ISCI_CONTROLLER *controller);
 
@@ -300,3 +331,5 @@ void isci_log_message(uint32_t	verbosity
     char *log_message, ...);
 
 extern uint32_t g_isci_debug_level;
+
+#endif /* #ifndef _ISCI_H */

Modified: stable/8/sys/dev/isci/isci_controller.c
==============================================================================
--- stable/8/sys/dev/isci/isci_controller.c	Tue May 29 23:10:10 2012	(r236263)
+++ stable/8/sys/dev/isci/isci_controller.c	Tue May 29 23:15:23 2012	(r236264)
@@ -201,6 +201,7 @@ void isci_controller_construct(struct IS
 
 	controller->is_started = FALSE;
 	controller->is_frozen = FALSE;
+	controller->release_queued_ccbs = FALSE;
 	controller->sim = NULL;
 	controller->initial_discovery_mask = 0;
 
@@ -430,6 +431,9 @@ int isci_controller_allocate_memory(stru
 		remote_device->frozen_lun_mask = 0;
 		sci_fast_list_element_init(remote_device,
 		    &remote_device->pending_device_reset_element);
+		TAILQ_INIT(&remote_device->queued_ccbs);
+		remote_device->release_queued_ccb = FALSE;
+		remote_device->queued_ccb_in_progress = NULL;
 
 		/*
 		 * For the first SCI_MAX_DOMAINS device objects, do not put
@@ -693,3 +697,47 @@ void isci_action(struct cam_sim *sim, un
 	}
 }
 
+/*
+ * Unfortunately, SCIL doesn't cleanly handle retry conditions.
+ *  CAM_REQUEUE_REQ works only when no one is using the pass(4) interface.  So
+ *  when SCIL denotes an I/O needs to be retried (typically because of mixing
+ *  tagged/non-tagged ATA commands, or running out of NCQ slots), we queue
+ *  these I/O internally.  Once SCIL completes an I/O to this device, or we get
+ *  a ready notification, we will retry the first I/O on the queue.
+ *  Unfortunately, SCIL also doesn't cleanly handle starting the new I/O within
+ *  the context of the completion handler, so we need to retry these I/O after
+ *  the completion handler is done executing.
+ */
+void
+isci_controller_release_queued_ccbs(struct ISCI_CONTROLLER *controller)
+{
+	struct ISCI_REMOTE_DEVICE *dev;
+	struct ccb_hdr *ccb_h;
+	int dev_idx;
+
+	KASSERT(mtx_owned(&controller->lock), ("controller lock not owned"));
+
+	controller->release_queued_ccbs = FALSE;
+	for (dev_idx = 0;
+	     dev_idx < SCI_MAX_REMOTE_DEVICES;
+	     dev_idx++) {
+
+		dev = controller->remote_device[dev_idx];
+		if (dev != NULL &&
+		    dev->release_queued_ccb == TRUE &&
+		    dev->queued_ccb_in_progress == NULL) {
+			dev->release_queued_ccb = FALSE;
+			ccb_h = TAILQ_FIRST(&dev->queued_ccbs);
+
+			if (ccb_h == NULL)
+				continue;
+
+			isci_log_message(1, "ISCI", "release %p %x\n", ccb_h,
+			    ((union ccb *)ccb_h)->csio.cdb_io.cdb_bytes[0]);
+
+			dev->queued_ccb_in_progress = (union ccb *)ccb_h;
+			isci_io_request_execute_scsi_io(
+			    (union ccb *)ccb_h, controller);
+		}
+	}
+}

Modified: stable/8/sys/dev/isci/isci_interrupt.c
==============================================================================
--- stable/8/sys/dev/isci/isci_interrupt.c	Tue May 29 23:10:10 2012	(r236263)
+++ stable/8/sys/dev/isci/isci_interrupt.c	Tue May 29 23:15:23 2012	(r236264)
@@ -177,6 +177,9 @@ isci_interrupt_legacy_handler(void *arg)
 			if (interrupt_handler(scic_controller_handle)) {
 				mtx_lock(&controller->lock);
 				completion_handler(scic_controller_handle);
+				if (controller->release_queued_ccbs == TRUE)
+					isci_controller_release_queued_ccbs(
+					    controller);
 				mtx_unlock(&controller->lock);
 			}
 		}
@@ -204,6 +207,13 @@ isci_interrupt_msix_handler(void *arg)
 	if (interrupt_handler(scic_controller_handle)) {
 		mtx_lock(&controller->lock);
 		completion_handler(scic_controller_handle);
+		/*
+		 * isci_controller_release_queued_ccb() is a relatively
+		 *  expensive routine, so we don't call it until the controller
+		 *  level flag is set to TRUE.
+		 */
+		if (controller->release_queued_ccbs == TRUE)
+			isci_controller_release_queued_ccbs(controller);
 		mtx_unlock(&controller->lock);
 	}
 }

Modified: stable/8/sys/dev/isci/isci_io_request.c
==============================================================================
--- stable/8/sys/dev/isci/isci_io_request.c	Tue May 29 23:10:10 2012	(r236263)
+++ stable/8/sys/dev/isci/isci_io_request.c	Tue May 29 23:15:23 2012	(r236264)
@@ -85,7 +85,9 @@ isci_io_request_complete(SCI_CONTROLLER_
 	struct ISCI_CONTROLLER *isci_controller;
 	struct ISCI_REMOTE_DEVICE *isci_remote_device;
 	union ccb *ccb;
+	BOOL complete_ccb;
 
+	complete_ccb = TRUE;
 	isci_controller = (struct ISCI_CONTROLLER *) sci_object_get_association(scif_controller);
 	isci_remote_device =
 		(struct ISCI_REMOTE_DEVICE *) sci_object_get_association(remote_device);
@@ -163,9 +165,7 @@ isci_io_request_complete(SCI_CONTROLLER_
 
 	case SCI_IO_FAILURE_INVALID_STATE:
 	case SCI_IO_FAILURE_INSUFFICIENT_RESOURCES:
-		ccb->ccb_h.status |= CAM_REQUEUE_REQ;
-		isci_remote_device_freeze_lun_queue(isci_remote_device,
-		    ccb->ccb_h.target_lun);
+		complete_ccb = FALSE;
 		break;
 
 	case SCI_IO_FAILURE_INVALID_REMOTE_DEVICE:
@@ -189,7 +189,7 @@ isci_io_request_complete(SCI_CONTROLLER_
 			    scif_remote_device_get_max_queue_depth(remote_device);
 			xpt_action((union ccb *)&ccb_relsim);
 			xpt_free_path(path);
-			ccb->ccb_h.status |= CAM_REQUEUE_REQ;
+			complete_ccb = FALSE;
 		}
 		break;
 
@@ -209,17 +209,6 @@ isci_io_request_complete(SCI_CONTROLLER_
 		break;
 	}
 
-	if (ccb->ccb_h.status != CAM_REQ_CMP) {
-		/* ccb will be completed with some type of non-success
-		 *  status.  So temporarily freeze the queue until the
-		 *  upper layers can act on the status.  The CAM_DEV_QFRZN
-		 *  flag will then release the queue after the status is
-		 *  acted upon.
-		 */
-		ccb->ccb_h.status |= CAM_DEV_QFRZN;
-		xpt_freeze_devq(ccb->ccb_h.path, 1);
-	}
-
 	callout_stop(&isci_request->parent.timer);
 	bus_dmamap_sync(isci_request->parent.dma_tag,
 	    isci_request->parent.dma_map,
@@ -228,20 +217,82 @@ isci_io_request_complete(SCI_CONTROLLER_
 	bus_dmamap_unload(isci_request->parent.dma_tag,
 	    isci_request->parent.dma_map);
 
-	if (isci_remote_device->frozen_lun_mask != 0 &&
-	    !(ccb->ccb_h.status & CAM_REQUEUE_REQ))
-		isci_remote_device_release_device_queue(isci_remote_device);
-
-	xpt_done(ccb);
 	isci_request->ccb = NULL;
 
-	if (isci_controller->is_frozen == TRUE) {
-		isci_controller->is_frozen = FALSE;
-		xpt_release_simq(isci_controller->sim, TRUE);
-	}
-
 	sci_pool_put(isci_controller->request_pool,
 	    (struct ISCI_REQUEST *)isci_request);
+
+	if (complete_ccb) {
+		if ((ccb->ccb_h.status & CAM_STATUS_MASK) != CAM_REQ_CMP) {
+			/* ccb will be completed with some type of non-success
+			 *  status.  So temporarily freeze the queue until the
+			 *  upper layers can act on the status.  The
+			 *  CAM_DEV_QFRZN flag will then release the queue
+			 *  after the status is acted upon.
+			 */
+			ccb->ccb_h.status |= CAM_DEV_QFRZN;
+			xpt_freeze_devq(ccb->ccb_h.path, 1);
+		}
+
+		if (ccb->ccb_h.status & CAM_SIM_QUEUED) {
+
+			KASSERT(ccb == isci_remote_device->queued_ccb_in_progress,
+			    ("multiple internally queued ccbs in flight"));
+
+			TAILQ_REMOVE(&isci_remote_device->queued_ccbs,
+			    &ccb->ccb_h, sim_links.tqe);
+			ccb->ccb_h.status &= ~CAM_SIM_QUEUED;
+
+			/*
+			 * This CCB that was in the queue was completed, so
+			 *  set the in_progress pointer to NULL denoting that
+			 *  we can retry another CCB from the queue.  We only
+			 *  allow one CCB at a time from the queue to be
+			 *  in progress so that we can effectively maintain
+			 *  ordering.
+			 */
+			isci_remote_device->queued_ccb_in_progress = NULL;
+		}
+
+		if (isci_remote_device->frozen_lun_mask != 0) {
+			isci_remote_device_release_device_queue(isci_remote_device);
+		}
+
+		xpt_done(ccb);
+
+		if (isci_controller->is_frozen == TRUE) {
+			isci_controller->is_frozen = FALSE;
+			xpt_release_simq(isci_controller->sim, TRUE);
+		}
+	} else {
+		isci_remote_device_freeze_lun_queue(isci_remote_device,
+		    ccb->ccb_h.target_lun);
+
+		if (ccb->ccb_h.status & CAM_SIM_QUEUED) {
+
+			KASSERT(ccb == isci_remote_device->queued_ccb_in_progress,
+			    ("multiple internally queued ccbs in flight"));
+
+			/*
+			 *  Do nothing, CCB is already on the device's queue.
+			 *   We leave it on the queue, to be retried again
+			 *   next time a CCB on this device completes, or we
+			 *   get a ready notification for this device.
+			 */
+			isci_log_message(1, "ISCI", "already queued %p %x\n",
+			    ccb, ccb->csio.cdb_io.cdb_bytes[0]);
+
+			isci_remote_device->queued_ccb_in_progress = NULL;
+
+		} else {
+			isci_log_message(1, "ISCI", "queue %p %x\n", ccb,
+			    ccb->csio.cdb_io.cdb_bytes[0]);
+			ccb->ccb_h.status |= CAM_SIM_QUEUED;
+
+			TAILQ_INSERT_TAIL(&isci_remote_device->queued_ccbs,
+			    &ccb->ccb_h, sim_links.tqe);
+		}
+	}
 }
 
 /**

Modified: stable/8/sys/dev/isci/isci_remote_device.c
==============================================================================
--- stable/8/sys/dev/isci/isci_remote_device.c	Tue May 29 23:10:10 2012	(r236263)
+++ stable/8/sys/dev/isci/isci_remote_device.c	Tue May 29 23:15:23 2012	(r236264)
@@ -289,9 +289,26 @@ isci_remote_device_release_lun_queue(str
 
 void
 isci_remote_device_release_device_queue(
-    struct ISCI_REMOTE_DEVICE *remote_device)
+    struct ISCI_REMOTE_DEVICE *device)
 {
-	lun_id_t lun;
-	for (lun = 0; lun < ISCI_MAX_LUN; lun++)
-		isci_remote_device_release_lun_queue(remote_device, lun);
+	if (TAILQ_EMPTY(&device->queued_ccbs)) {
+		lun_id_t lun;
+
+		for (lun = 0; lun < ISCI_MAX_LUN; lun++)
+			isci_remote_device_release_lun_queue(device, lun);
+	} else {
+		/*
+		 * We cannot unfreeze the devq, because there are still
+		 *  CCBs in our internal queue that need to be processed
+		 *  first.  Mark this device, and the controller, so that
+		 *  the first CCB in this device's internal queue will be
+		 *  resubmitted after the current completion context
+		 *  unwinds.
+		 */
+		device->release_queued_ccb = TRUE;
+		device->domain->controller->release_queued_ccbs = TRUE;
+
+		isci_log_message(1, "ISCI", "schedule %p for release\n",
+		    device);
+	}
 }