Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 20 Nov 2003 04:11:50 +0100
From:      Thomas Moestl <t.moestl@tu-bs.de>
To:        Kris Kennaway <kris@obsecurity.org>
Cc:        sos@freebsd.org
Subject:   Re: ultra5/cmd646 hang
Message-ID:  <20031120031150.GA759@timesink.dyndns.org>
In-Reply-To: <20031118011019.GA37617@xor.obsecurity.org>
References:  <20031114105853.A92204@carver.gumbysoft.com> <20031114134001.D92204@carver.gumbysoft.com> <20031117130205.R22102@carver.gumbysoft.com> <20031117225453.GA35569@xor.obsecurity.org> <20031118011019.GA37617@xor.obsecurity.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--3MwIy2ne0vdjdPXF
Content-Type: text/plain; charset=iso-8859-1
Content-Disposition: inline
Content-Transfer-Encoding: 8bit

On Mon, 2003/11/17 at 17:10:19 -0800, Kris Kennaway wrote:
> On Mon, Nov 17, 2003 at 02:54:54PM -0800, Kris Kennaway wrote:
> 
> > I'm unable to break to debugger here.  A kernel from July boots fine on this system.
> 
> ALT_BREAK_TO_DEBUGGER lets me break into DDB.  I see the following:
>
> [...]
> 
> db> show intr
> fast pil13      11
> ithrd pil2      5600774
> sab0 vec2027    12
> atapci0 vec2016 5600774
> tick pil14      3552
> db>
> 
> Interrupt storm!

I think I've found the reason for this; the attached patch works around it.

The issue is that the chip does not set ATA_BMSTAT_INTERRUPT, even though
it is DMA-capable. My hackaround is to add an interrupt handler for the
CMD646 that does only check this bit if a DMA transfer is in progress,
like it was done in pre-ATAng times. This probably isn't the best solution,
but it works for now. On the problematic boxen, you will also have to
set hw.ata.ata_dma=0 (they fell back to PIO before automatically anyway,
and DMA operations time out even if all interrupts are admitted).
I am not sure whether this is caused by some missing bit of setup that
is required, but that the firmware doesn't do for us, or whether it
applies only to some buggy/crippled chip revision in some machines.

Additionally, this patch should fix the panic that were reported on
request timeouts - the ata_request structure used to print the timeout
warning could already be freed at that point, causing references to
locations near 0xdeadc0de. I am not sure that the check for
(ch->running != request) is completely safe with this in mind, since
it could be possible that the request was freed and already reinserted.

Søren, can you think of a reason why the interrupt bit would not be set?
Otherwise, can you please take a look at the patch and tweak it so that
some fix for this problem can be committed?

Thanks,
	- Thomas

-- 
Thomas Moestl <t.moestl@tu-bs.de>	http://www.tu-bs.de/~y0015675/
              <tmm@FreeBSD.org>		http://people.FreeBSD.org/~tmm/
PGP fingerprint: 1C97 A604 2BD0 E492 51D0  9C0F 1FE6 4F1D 419C 776C

--3MwIy2ne0vdjdPXF
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename="ata-hang.diff"

Index: ata-chipset.c
===================================================================
RCS file: /vol/ncvs/src/sys/dev/ata/ata-chipset.c,v
retrieving revision 1.46
diff -u -r1.46 ata-chipset.c
--- ata-chipset.c	18 Nov 2003 15:27:28 -0000	1.46
+++ ata-chipset.c	20 Nov 2003 00:56:19 -0000
@@ -101,6 +101,7 @@
 static int ata_sii_mio_allocate(device_t, struct ata_channel *);
 static void ata_sii_intr(void *);
 static void ata_cmd_intr(void *);
+static void ata_nobms_intr(void *);
 static void ata_sii_setmode(struct ata_device *, int);
 static void ata_cmd_setmode(struct ata_device *, int);
 static int ata_sis_chipinit(device_t);
@@ -1581,7 +1582,7 @@
      { ATA_CMD649,    0x00, 0,	      SIIINTR,   ATA_UDMA5, "CMD 649" },
      { ATA_CMD648,    0x00, 0,	      SIIINTR,   ATA_UDMA4, "CMD 648" },
      { ATA_CMD646,    0x07, 0,	      0,	 ATA_UDMA2, "CMD 646U2" },
-     { ATA_CMD646,    0x00, 0,	      0,	 ATA_WDMA2, "CMD 646" },
+     { ATA_CMD646,    0x00, 0,	      SIINOBMS,	 ATA_WDMA2, "CMD 646" },
      { 0, 0, 0, 0, 0, 0}};
     char buffer[64];
 
@@ -1599,6 +1600,7 @@
 ata_sii_chipinit(device_t dev)
 {
     struct ata_pci_controller *ctlr = device_get_softc(dev);
+    void (*ih)(void *);
     int rid = ATA_IRQ_RID;
 
     if (!(ctlr->r_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &rid, 0, ~0, 1,
@@ -1637,10 +1639,14 @@
 	    ctlr->setmode = ata_sii_setmode;
     }
     else {
+	if (ctlr->chip->cfg2 & SIIINTR)
+	    ih = ata_cmd_intr;
+	else if (ctlr->chip->cfg2 & SIINOBMS)
+	    ih = ata_nobms_intr;
+	else
+	    ih = ata_generic_intr;
 	if ((bus_setup_intr(dev, ctlr->r_irq, ATA_INTR_FLAGS,
-			    ctlr->chip->cfg2 & SIIINTR ? 
-			    ata_cmd_intr : ata_generic_intr,
-			    ctlr, &ctlr->handle))) {
+			    ih, ctlr, &ctlr->handle))) {
 	    device_printf(dev, "unable to setup interrupt\n");
 	    return ENXIO;
 	}
@@ -1739,6 +1745,34 @@
 	    }
 	    ctlr->interrupt[unit].function(ch);
 	}
+    }
+}
+
+static void
+ata_nobms_intr(void *data)
+{
+    struct ata_pci_controller *ctlr = data;
+    struct ata_channel *ch;
+    int unit;
+
+    /* implement this as a toggle instead to balance load XXX */
+    for (unit = 0; unit < 2; unit++) {
+	if (!(ch = ctlr->interrupt[unit].argument))
+	    continue;
+	/*
+	 * The CMD646 does not always seem to set ATA_BMSTAT_INTERRUPT.
+	 * Only check it when a transfer is active.
+	 */
+	if (ch->dma && (ch->dma->flags & ATA_DMA_ACTIVE)) {
+	    int bmstat = ATA_IDX_INB(ch, ATA_BMSTAT_PORT) & ATA_BMSTAT_MASK;
+
+	    if ((bmstat & (ATA_BMSTAT_ACTIVE | ATA_BMSTAT_INTERRUPT)) !=
+		ATA_BMSTAT_INTERRUPT)
+		continue;
+	    ATA_IDX_OUTB(ch, ATA_BMSTAT_PORT, bmstat & ~ATA_BMSTAT_ERROR);
+	    DELAY(1);
+	}
+	ctlr->interrupt[unit].function(ch);
     }
 }
 
Index: ata-pci.h
===================================================================
RCS file: /vol/ncvs/src/sys/dev/ata/ata-pci.h,v
retrieving revision 1.18
diff -u -r1.18 ata-pci.h
--- ata-pci.h	18 Nov 2003 15:27:28 -0000	1.18
+++ ata-pci.h	19 Nov 2003 23:38:08 -0000
@@ -255,6 +255,7 @@
 #define SIIMEMIO	1
 #define SIIINTR		0x01
 #define SIISETCLK	0x02
+#define SIINOBMS	0x04
 
 #define SIS_SOUTH	1
 #define SISSATA		2
Index: ata-queue.c
===================================================================
RCS file: /vol/ncvs/src/sys/dev/ata/ata-queue.c,v
retrieving revision 1.11
diff -u -r1.11 ata-queue.c
--- ata-queue.c	20 Oct 2003 14:28:37 -0000	1.11
+++ ata-queue.c	20 Nov 2003 00:56:48 -0000
@@ -316,6 +316,8 @@
 ata_timeout(struct ata_request *request)
 {
     struct ata_channel *ch = request->device->channel;
+    struct ata_device *reqdev = request->device;
+    char *reqstr = ata_cmd2str(request);
     int quiet = request->flags & ATA_R_QUIET;
 
     /* clear timeout etc */
@@ -324,10 +326,11 @@
     /* call hw.interrupt to try finish up the command */
     ch->hw.interrupt(request->device->channel);
     if (ch->running != request) {
+	/* request might already be freed - use copies. */
 	if (!quiet)
-	    ata_prtdev(request->device,
+	    ata_prtdev(reqdev,
 		       "WARNING - %s recovered from missing interrupt\n",
-		       ata_cmd2str(request));
+		       reqstr);
 	return;
     }
 

--3MwIy2ne0vdjdPXF--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20031120031150.GA759>