Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 7 Apr 2012 11:03:25 GMT
From:      Robert Watson <rwatson@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 209192 for review
Message-ID:  <201204071103.q37B3Pl8013879@skunkworks.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://p4web.freebsd.org/@@209192?ac=10

Change 209192 by rwatson@rwatson_svr_ctsrd_mipsbuild on 2012/04/07 11:03:10

	Implement support for retrying transactions when the Altera SD Card 
	IP Core returns errors.  It appears that a number of transient
	errors occur in our current configuration, and while many I/Os
	succeed on the first attempt, a moderate number require 2-4 retries
	before the IP core will return success.  To this end, implement a
	flag indicating that the current transaction has experienced an
	error, and a retry counter with alternative retry delay.  Generate
	console output only then an I/O finally succeeds or fails,
	following an error, rather than on each attempt.
	
	We aren't yet able to interpret the nature of the error in software
	due to inadequate documentation, so this seems reasonable.  If and
	when we are able to interpret the errors, we might want to handle
	different failures differently -- for example, if the core is
	reinitialising following a serious error, we might be willing to
	wait much longer.
	
	Return EINVAL rather than panic() since fsck requires it.

Affected files ...

.. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.c#4 edit
.. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.h#7 edit
.. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_disk.c#5 edit
.. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_io.c#8 edit

Differences ...

==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.c#4 (text+ko) ====

@@ -280,8 +280,11 @@
 
 	/*
 	 * Handle various forms of I/O completion, successful and otherwise.
+	 * The I/O layer may restart the transaction if an error occurred, in
+	 * which case remain in the IO state and reschedule.
 	 */
-	altera_sdcard_io_complete(sc, asr);
+	if (!altera_sdcard_io_complete(sc, asr))
+		return;
 
 	/*
 	 * Now that I/O is complete, process detach requests in preference to
@@ -321,7 +324,10 @@
 		break;
 
 	case ALTERA_SDCARD_STATE_IO:
-		interval = ALTERA_SDCARD_TIMEOUT_IO;
+		if (sc->as_flags & ALTERA_SDCARD_FLAG_IOERROR)
+			interval = ALTERA_SDCARD_TIMEOUT_IOERROR;
+		else
+			interval = ALTERA_SDCARD_TIMEOUT_IO;
 		break;
 
 	default:

==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.h#7 (text+ko) ====

@@ -46,10 +46,15 @@
 	int			 as_state;
 	int			 as_flags;
 	struct disk		*as_disk;
+	struct taskqueue	*as_taskqueue;
+	struct timeout_task	 as_task;
+
+	/*
+	 * Fields relating to in-progress and pending I/O, if any.
+	 */
 	struct bio_queue_head	 as_bioq;
 	struct bio		*as_currentbio;
-	struct taskqueue	*as_taskqueue;
-	struct timeout_task	 as_task;
+	u_int			 as_retriesleft;
 
 	/*
 	 * Infrequently changing fields cached from the SD Card IP Core.
@@ -90,11 +95,18 @@
 #define	ALTERA_SDCARD_TIMEOUT_NOCARD	(hz/2)
 #define	ALTERA_SDCARD_TIMEOUT_IDLE	(hz/2)
 #define	ALTERA_SDCARD_TIMEOUT_IO	(hz/100)
+#define	ALTERA_SDCARD_TIMEOUT_IOERROR	(hz/2)
 
 /*
+ * Maximum number of retries on an I/O.
+ */
+#define	ALTERA_SDCARD_RETRY_LIMIT	10
+
+/*
  * Driver status flags.
  */
 #define	ALTERA_SDCARD_FLAG_DETACHREQ	0x00000001	/* Detach requested. */
+#define	ALTERA_SDCARD_FLAG_IOERROR	0x00000002	/* Error in progress. */
 
 /*
  * Functions for performing low-level register and memory I/O to/from the SD
@@ -105,7 +117,7 @@
 int		altera_sdcard_read_csd(struct altera_sdcard_softc *sc);
 uint16_t	altera_sdcard_read_rr1(struct altera_sdcard_softc *sc);
 
-void	altera_sdcard_io_complete(struct altera_sdcard_softc *sc,
+int	altera_sdcard_io_complete(struct altera_sdcard_softc *sc,
 	    uint16_t asr);
 void	altera_sdcard_io_start(struct altera_sdcard_softc *sc,
 	    struct bio *bp);

==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_disk.c#5 (text+ko) ====

@@ -65,7 +65,8 @@
     struct thread *td)
 {
 
-	panic("%s: not yet", __func__);
+	/* XXXRW: more here? */
+	return (EINVAL);
 }
 
 static void

==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_io.c#8 (text+ko) ====

@@ -233,22 +233,10 @@
 	    len);
 }
 
-void
-altera_sdcard_io_start(struct altera_sdcard_softc *sc, struct bio *bp)
+static void
+altera_sdcard_io_start_internal(struct altera_sdcard_softc *sc, struct bio *bp)
 {
 
-	ALTERA_SDCARD_LOCK_ASSERT(sc);
-	KASSERT(sc->as_currentbio == NULL,
-	    ("%s: bio already started", __func__));
-
-	/*
-	 * We advertise a block size and maximum I/O size up the stack of the
-	 * SD Card IP Core sector size.  Catch any attempts to not follow the
-	 * rules.
-	 */
-	KASSERT(bp->bio_bcount == ALTERA_SDCARD_SECTORSIZE,
-	    ("%s: I/O size not %d", __func__, ALTERA_SDCARD_SECTORSIZE));
-
 	switch (bp->bio_cmd) {
 	case BIO_READ:
 		altera_sdcard_write_cmd_arg(sc, bp->bio_pblkno *
@@ -268,14 +256,39 @@
 		panic("%s: unsupported I/O operation %d", __func__,
 		    bp->bio_cmd);
 	}
+}
+
+void
+altera_sdcard_io_start(struct altera_sdcard_softc *sc, struct bio *bp)
+{
+
+	ALTERA_SDCARD_LOCK_ASSERT(sc);
+	KASSERT(sc->as_currentbio == NULL,
+	    ("%s: bio already started", __func__));
+
+	/*
+	 * We advertise a block size and maximum I/O size up the stack of the
+	 * SD Card IP Core sector size.  Catch any attempts to not follow the
+	 * rules.
+	 */
+	KASSERT(bp->bio_bcount == ALTERA_SDCARD_SECTORSIZE,
+	    ("%s: I/O size not %d", __func__, ALTERA_SDCARD_SECTORSIZE));
+	altera_sdcard_io_start_internal(sc, bp);
 	sc->as_currentbio = bp;
+	sc->as_retriesleft = ALTERA_SDCARD_RETRY_LIMIT;
 }
 
-void
+/*
+ * Handle completed I/O.  ASR is passed in to avoid reading it more than once.
+ * Return 1 if the I/O is actually complete (success, or retry limit
+ * exceeded), or 0 if not.
+ */
+int
 altera_sdcard_io_complete(struct altera_sdcard_softc *sc, uint16_t asr)
 {
 	struct bio *bp;
 	uint16_t rr1;
+	int error;
 
 	ALTERA_SDCARD_LOCK_ASSERT(sc);
 	KASSERT(!(asr & ALTERA_SDCARD_ASR_CMDINPROGRESS),
@@ -284,40 +297,57 @@
 	    ("%s: card removed", __func__));
 
 	bp = sc->as_currentbio;
-	sc->as_currentbio = NULL;
 
 	/*
-	 * Handle I/O errors.  Assume that the caller has already checked for
-	 * unexpected removal.
+	 * Handle I/O retries if an error is returned by the device.
 	 */
 	if (asr &
 	    (ALTERA_SDCARD_ASR_CMDTIMEOUT | ALTERA_SDCARD_ASR_CMDDATAERROR)) {
 		rr1 = altera_sdcard_read_rr1(sc);
+		sc->as_retriesleft--;
+		if (sc->as_retriesleft != 0) {
+			sc->as_flags |= ALTERA_SDCARD_FLAG_IOERROR;
+			altera_sdcard_io_start_internal(sc, bp);
+			return (0);
+		}
 		device_printf(sc->as_dev, "%s: %s operation block %ju length "
-		    "%ju failed; asr 0x%08x (rr1: 0x%04x)\n",
-		    __func__, bp->bio_cmd == BIO_READ ? "BIO_READ" :
-		    (bp->bio_cmd == BIO_WRITE ? "write" : "unknown"),
+		    "%ju failed; asr 0x%08x (rr1: 0x%04x)\n", __func__,
+		    bp->bio_cmd == BIO_READ ? "BIO_READ" :
+		    (bp->bio_cmd == BIO_WRITE ? "BIO_WRITE" : "unknown"),
 		    bp->bio_pblkno, bp->bio_bcount, asr, rr1);
-		biofinish(bp, NULL, EIO);
-		return;
-	}
+		sc->as_flags &= ~ALTERA_SDCARD_FLAG_IOERROR;
+		error = EIO;
+	} else {
+		if (sc->as_flags & ALTERA_SDCARD_FLAG_IOERROR) {
+			device_printf(sc->as_dev, "%s: %s operation block %ju"
+			    " length %ju succeeded after %d retries\n",
+			    __func__, bp->bio_cmd == BIO_READ ? "BIO_READ" :
+			    (bp->bio_cmd == BIO_WRITE ? "write" : "unknown"),
+			    bp->bio_pblkno, bp->bio_bcount,
+			    ALTERA_SDCARD_RETRY_LIMIT - sc->as_retriesleft);
+			sc->as_flags &= ~ALTERA_SDCARD_FLAG_IOERROR;
+		}
 
-	/*
-	 * Successful I/O completion path.
-	 */
-	switch (bp->bio_cmd) {
-	case BIO_READ:
-		altera_sdcard_read_rxtx_buffer(sc, bp->bio_data,
-		    bp->bio_bcount);
-		break;
+		/*
+		 * Successful I/O completion path.
+		 */
+		switch (bp->bio_cmd) {
+		case BIO_READ:
+			altera_sdcard_read_rxtx_buffer(sc, bp->bio_data,
+			    bp->bio_bcount);
+			break;
 
-	case BIO_WRITE:
-		break;
+		case BIO_WRITE:
+			break;
 
-	default:
-		panic("%s: unsupported I/O operation %d", __func__,
-		    bp->bio_cmd);
+		default:
+			panic("%s: unsupported I/O operation %d", __func__,
+			    bp->bio_cmd);
+		}
+		bp->bio_resid = 0;
+		error = 0;
 	}
-	bp->bio_resid = 0;
-	biofinish(bp, NULL, 0);
+	biofinish(bp, NULL, error);
+	sc->as_currentbio = NULL;
+	return (1);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204071103.q37B3Pl8013879>