From owner-p4-projects@FreeBSD.ORG Sat Apr 7 11:03:26 2012 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id E03C41065672; Sat, 7 Apr 2012 11:03:25 +0000 (UTC) Delivered-To: perforce@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [69.147.83.52]) by hub.freebsd.org (Postfix) with ESMTP id 94702106566B for ; Sat, 7 Apr 2012 11:03:25 +0000 (UTC) (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: from skunkworks.freebsd.org (skunkworks.freebsd.org [IPv6:2001:4f8:fff6::2d]) by mx1.freebsd.org (Postfix) with ESMTP id 7AC928FC0A for ; Sat, 7 Apr 2012 11:03:25 +0000 (UTC) Received: from skunkworks.freebsd.org (localhost [127.0.0.1]) by skunkworks.freebsd.org (8.14.4/8.14.4) with ESMTP id q37B3Pp2013882 for ; Sat, 7 Apr 2012 11:03:25 GMT (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Received: (from perforce@localhost) by skunkworks.freebsd.org (8.14.4/8.14.4/Submit) id q37B3Pl8013879 for perforce@freebsd.org; Sat, 7 Apr 2012 11:03:25 GMT (envelope-from bb+lists.freebsd.perforce@cyrus.watson.org) Date: Sat, 7 Apr 2012 11:03:25 GMT Message-Id: <201204071103.q37B3Pl8013879@skunkworks.freebsd.org> X-Authentication-Warning: skunkworks.freebsd.org: perforce set sender to bb+lists.freebsd.perforce@cyrus.watson.org using -f From: Robert Watson To: Perforce Change Reviews Precedence: bulk Cc: Subject: PERFORCE change 209192 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 07 Apr 2012 11:03:26 -0000 http://p4web.freebsd.org/@@209192?ac=10 Change 209192 by rwatson@rwatson_svr_ctsrd_mipsbuild on 2012/04/07 11:03:10 Implement support for retrying transactions when the Altera SD Card IP Core returns errors. It appears that a number of transient errors occur in our current configuration, and while many I/Os succeed on the first attempt, a moderate number require 2-4 retries before the IP core will return success. To this end, implement a flag indicating that the current transaction has experienced an error, and a retry counter with alternative retry delay. Generate console output only then an I/O finally succeeds or fails, following an error, rather than on each attempt. We aren't yet able to interpret the nature of the error in software due to inadequate documentation, so this seems reasonable. If and when we are able to interpret the errors, we might want to handle different failures differently -- for example, if the core is reinitialising following a serious error, we might be willing to wait much longer. Return EINVAL rather than panic() since fsck requires it. Affected files ... .. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.c#4 edit .. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.h#7 edit .. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_disk.c#5 edit .. //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_io.c#8 edit Differences ... ==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.c#4 (text+ko) ==== @@ -280,8 +280,11 @@ /* * Handle various forms of I/O completion, successful and otherwise. + * The I/O layer may restart the transaction if an error occurred, in + * which case remain in the IO state and reschedule. */ - altera_sdcard_io_complete(sc, asr); + if (!altera_sdcard_io_complete(sc, asr)) + return; /* * Now that I/O is complete, process detach requests in preference to @@ -321,7 +324,10 @@ break; case ALTERA_SDCARD_STATE_IO: - interval = ALTERA_SDCARD_TIMEOUT_IO; + if (sc->as_flags & ALTERA_SDCARD_FLAG_IOERROR) + interval = ALTERA_SDCARD_TIMEOUT_IOERROR; + else + interval = ALTERA_SDCARD_TIMEOUT_IO; break; default: ==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard.h#7 (text+ko) ==== @@ -46,10 +46,15 @@ int as_state; int as_flags; struct disk *as_disk; + struct taskqueue *as_taskqueue; + struct timeout_task as_task; + + /* + * Fields relating to in-progress and pending I/O, if any. + */ struct bio_queue_head as_bioq; struct bio *as_currentbio; - struct taskqueue *as_taskqueue; - struct timeout_task as_task; + u_int as_retriesleft; /* * Infrequently changing fields cached from the SD Card IP Core. @@ -90,11 +95,18 @@ #define ALTERA_SDCARD_TIMEOUT_NOCARD (hz/2) #define ALTERA_SDCARD_TIMEOUT_IDLE (hz/2) #define ALTERA_SDCARD_TIMEOUT_IO (hz/100) +#define ALTERA_SDCARD_TIMEOUT_IOERROR (hz/2) /* + * Maximum number of retries on an I/O. + */ +#define ALTERA_SDCARD_RETRY_LIMIT 10 + +/* * Driver status flags. */ #define ALTERA_SDCARD_FLAG_DETACHREQ 0x00000001 /* Detach requested. */ +#define ALTERA_SDCARD_FLAG_IOERROR 0x00000002 /* Error in progress. */ /* * Functions for performing low-level register and memory I/O to/from the SD @@ -105,7 +117,7 @@ int altera_sdcard_read_csd(struct altera_sdcard_softc *sc); uint16_t altera_sdcard_read_rr1(struct altera_sdcard_softc *sc); -void altera_sdcard_io_complete(struct altera_sdcard_softc *sc, +int altera_sdcard_io_complete(struct altera_sdcard_softc *sc, uint16_t asr); void altera_sdcard_io_start(struct altera_sdcard_softc *sc, struct bio *bp); ==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_disk.c#5 (text+ko) ==== @@ -65,7 +65,8 @@ struct thread *td) { - panic("%s: not yet", __func__); + /* XXXRW: more here? */ + return (EINVAL); } static void ==== //depot/projects/ctsrd/beribsd/src/sys/dev/altera/sdcard/altera_sdcard_io.c#8 (text+ko) ==== @@ -233,22 +233,10 @@ len); } -void -altera_sdcard_io_start(struct altera_sdcard_softc *sc, struct bio *bp) +static void +altera_sdcard_io_start_internal(struct altera_sdcard_softc *sc, struct bio *bp) { - ALTERA_SDCARD_LOCK_ASSERT(sc); - KASSERT(sc->as_currentbio == NULL, - ("%s: bio already started", __func__)); - - /* - * We advertise a block size and maximum I/O size up the stack of the - * SD Card IP Core sector size. Catch any attempts to not follow the - * rules. - */ - KASSERT(bp->bio_bcount == ALTERA_SDCARD_SECTORSIZE, - ("%s: I/O size not %d", __func__, ALTERA_SDCARD_SECTORSIZE)); - switch (bp->bio_cmd) { case BIO_READ: altera_sdcard_write_cmd_arg(sc, bp->bio_pblkno * @@ -268,14 +256,39 @@ panic("%s: unsupported I/O operation %d", __func__, bp->bio_cmd); } +} + +void +altera_sdcard_io_start(struct altera_sdcard_softc *sc, struct bio *bp) +{ + + ALTERA_SDCARD_LOCK_ASSERT(sc); + KASSERT(sc->as_currentbio == NULL, + ("%s: bio already started", __func__)); + + /* + * We advertise a block size and maximum I/O size up the stack of the + * SD Card IP Core sector size. Catch any attempts to not follow the + * rules. + */ + KASSERT(bp->bio_bcount == ALTERA_SDCARD_SECTORSIZE, + ("%s: I/O size not %d", __func__, ALTERA_SDCARD_SECTORSIZE)); + altera_sdcard_io_start_internal(sc, bp); sc->as_currentbio = bp; + sc->as_retriesleft = ALTERA_SDCARD_RETRY_LIMIT; } -void +/* + * Handle completed I/O. ASR is passed in to avoid reading it more than once. + * Return 1 if the I/O is actually complete (success, or retry limit + * exceeded), or 0 if not. + */ +int altera_sdcard_io_complete(struct altera_sdcard_softc *sc, uint16_t asr) { struct bio *bp; uint16_t rr1; + int error; ALTERA_SDCARD_LOCK_ASSERT(sc); KASSERT(!(asr & ALTERA_SDCARD_ASR_CMDINPROGRESS), @@ -284,40 +297,57 @@ ("%s: card removed", __func__)); bp = sc->as_currentbio; - sc->as_currentbio = NULL; /* - * Handle I/O errors. Assume that the caller has already checked for - * unexpected removal. + * Handle I/O retries if an error is returned by the device. */ if (asr & (ALTERA_SDCARD_ASR_CMDTIMEOUT | ALTERA_SDCARD_ASR_CMDDATAERROR)) { rr1 = altera_sdcard_read_rr1(sc); + sc->as_retriesleft--; + if (sc->as_retriesleft != 0) { + sc->as_flags |= ALTERA_SDCARD_FLAG_IOERROR; + altera_sdcard_io_start_internal(sc, bp); + return (0); + } device_printf(sc->as_dev, "%s: %s operation block %ju length " - "%ju failed; asr 0x%08x (rr1: 0x%04x)\n", - __func__, bp->bio_cmd == BIO_READ ? "BIO_READ" : - (bp->bio_cmd == BIO_WRITE ? "write" : "unknown"), + "%ju failed; asr 0x%08x (rr1: 0x%04x)\n", __func__, + bp->bio_cmd == BIO_READ ? "BIO_READ" : + (bp->bio_cmd == BIO_WRITE ? "BIO_WRITE" : "unknown"), bp->bio_pblkno, bp->bio_bcount, asr, rr1); - biofinish(bp, NULL, EIO); - return; - } + sc->as_flags &= ~ALTERA_SDCARD_FLAG_IOERROR; + error = EIO; + } else { + if (sc->as_flags & ALTERA_SDCARD_FLAG_IOERROR) { + device_printf(sc->as_dev, "%s: %s operation block %ju" + " length %ju succeeded after %d retries\n", + __func__, bp->bio_cmd == BIO_READ ? "BIO_READ" : + (bp->bio_cmd == BIO_WRITE ? "write" : "unknown"), + bp->bio_pblkno, bp->bio_bcount, + ALTERA_SDCARD_RETRY_LIMIT - sc->as_retriesleft); + sc->as_flags &= ~ALTERA_SDCARD_FLAG_IOERROR; + } - /* - * Successful I/O completion path. - */ - switch (bp->bio_cmd) { - case BIO_READ: - altera_sdcard_read_rxtx_buffer(sc, bp->bio_data, - bp->bio_bcount); - break; + /* + * Successful I/O completion path. + */ + switch (bp->bio_cmd) { + case BIO_READ: + altera_sdcard_read_rxtx_buffer(sc, bp->bio_data, + bp->bio_bcount); + break; - case BIO_WRITE: - break; + case BIO_WRITE: + break; - default: - panic("%s: unsupported I/O operation %d", __func__, - bp->bio_cmd); + default: + panic("%s: unsupported I/O operation %d", __func__, + bp->bio_cmd); + } + bp->bio_resid = 0; + error = 0; } - bp->bio_resid = 0; - biofinish(bp, NULL, 0); + biofinish(bp, NULL, error); + sc->as_currentbio = NULL; + return (1); }