Date: Sat, 19 Feb 2005 15:56:35 +0000 From: Ian Dowse <iedowse@maths.tcd.ie> To: Paul Mather <paul@gromit.dlib.vt.edu> Cc: Reid Linnemann <lreid@a.cs.okstate.edu> Subject: Re: ad WRITE_DMA timing out frequently Message-ID: <200502191556.aa96337@salmon.maths.tcd.ie> In-Reply-To: Your message of "Fri, 18 Feb 2005 13:47:25 EST." <1108752445.1105.34.camel@zappa.Chelsea-Ct.Org>
next in thread | previous in thread | raw e-mail | index | archive | help
In message <1108752445.1105.34.camel@zappa.Chelsea-Ct.Org>, Paul Mather writes: >The "TIMEOUT - WRITE_DMA" issue has been a recurring problem for me >since somewhere in the 5.2.1--5.3 release range. (It's been so long now >that I don't remember whether it first started plaguing me in 5.2.1 or >5.3. I do know for definite I never got this problem in 5.1 and it only >crept in during an "upgrade.") On a recent -CURRENT you could try the following patch. It attempts to clean up the handling of timeouts in the ATA code by using the new callout_init_mtx() function, and appeared to cure fairly frequent WRITE_DMA timeout messages for me. Ian Index: sys/dev/ata/ata-all.c =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-all.c,v retrieving revision 1.235 diff -u -r1.235 ata-all.c --- sys/dev/ata/ata-all.c 7 Feb 2005 17:14:42 -0000 1.235 +++ sys/dev/ata/ata-all.c 17 Feb 2005 20:52:27 -0000 @@ -429,6 +429,7 @@ ch->state = ATA_ACTIVE; else ch->state = ATA_IDLE; + callout_stop(&request->callout); mtx_unlock(&ch->state_mtx); ch->locking(ch, ATA_LF_UNLOCK); ata_finish(request); Index: sys/dev/ata/ata-queue.c =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/dev/ata/ata-queue.c,v retrieving revision 1.41 diff -u -r1.41 ata-queue.c --- sys/dev/ata/ata-queue.c 8 Dec 2004 11:16:33 -0000 1.41 +++ sys/dev/ata/ata-queue.c 18 Feb 2005 04:27:18 -0000 @@ -56,7 +56,7 @@ /* mark request as virgin */ request->result = request->status = request->error = 0; - callout_init(&request->callout, 1); + callout_init_mtx(&request->callout, &ch->state_mtx, CALLOUT_RETURNUNLOCKED); if (!request->callback && !(request->flags & ATA_R_REQUEUE)) sema_init(&request->done, 0, "ATA request done"); @@ -67,6 +67,7 @@ (request->flags & (ATA_R_CONTROL | ATA_R_IMMEDIATE))) { /* arm timeout */ + mtx_lock(&ch->state_mtx); if (!dumping) callout_reset(&request->callout, request->timeout * hz, (timeout_t*)ata_timeout, request); @@ -75,11 +76,13 @@ ch->running = request; if (ch->hw.begin_transaction(request) == ATA_OP_FINISHED) { ch->running = NULL; - callout_drain(&request->callout); + callout_stop(&request->callout); if (!request->callback) sema_destroy(&request->done); + mtx_unlock(&ch->state_mtx); return; } + mtx_unlock(&ch->state_mtx); } else { /* put request on the locked queue at the specified location */ @@ -193,6 +196,7 @@ ch->running = NULL; ch->state = ATA_IDLE; mtx_unlock(&ch->queue_mtx); + callout_stop(&request->callout); mtx_unlock(&ch->state_mtx); ch->locking(ch, ATA_LF_UNLOCK); ata_finish(request); @@ -216,9 +220,6 @@ ata_completed(request, 0); } else { - if (!dumping) - callout_reset(&request->callout, request->timeout * hz, - (timeout_t*)ata_timeout, request); if (request->bio && !(request->flags & ATA_R_TIMEOUT)) { ATA_DEBUG_RQ(request, "finish bio_taskqueue"); bio_taskqueue(request->bio, (bio_task_t *)ata_completed, request); @@ -263,9 +264,6 @@ request->result = EIO; } else { - /* untimeout request now we have control back */ - callout_drain(&request->callout); - /* if this is a soft ECC error warn about it */ if ((request->status & (ATA_S_CORR | ATA_S_ERROR)) == ATA_S_CORR) { ata_prtdev(request->device, @@ -408,9 +406,8 @@ { struct ata_channel *ch = request->device->channel; - mtx_lock(&ch->state_mtx); - ATA_DEBUG_RQ(request, "timeout"); + mtx_assert(&ch->state_mtx, MA_OWNED); /* if interrupt has been seen, shout and just rearm timeout */ if (request->flags & ATA_R_INTR_SEEN) { @@ -426,6 +423,7 @@ callout_reset(&request->callout, request->timeout * hz, (timeout_t*)ata_timeout, request); mtx_unlock(&ch->state_mtx); + /* CALLOUT_RETURNUNLOCKED */ return; } @@ -454,6 +452,7 @@ mtx_unlock(&ch->state_mtx); ata_prtdev(request->device, "timeout state=%d unexpected\n", ch->state); } + /* CALLOUT_RETURNUNLOCKED */ } void @@ -464,10 +463,11 @@ mtx_lock(&ch->state_mtx); request = ch->running; ch->running = NULL; + if (request) + callout_stop(&request->callout); mtx_unlock(&ch->state_mtx); if (request) { - callout_drain(&request->callout); ata_prtdev(request->device, "WARNING - %s requeued due to channel reset", ata_cmd2str(request)); @@ -501,11 +501,12 @@ mtx_lock(&ch->state_mtx); request = ch->running; ch->running = NULL; + if (request) + callout_stop(&request->callout); mtx_unlock(&ch->state_mtx); /* if we have a request "in flight" fail it as well */ if (request && (!device || request->device == device)) { - callout_drain(&request->callout); request->result = ENXIO; if (request->callback) (request->callback)(request);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200502191556.aa96337>