Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 06 Nov 2009 13:52:48 +0900
From:      Alexander Nedotsukov <bland@bbnest.net>
To:        Alexander Nedotsukov <bland@freebsd.org>
Cc:        current@freebsd.org, Andrew Thompson <thompsa@freebsd.org>, Hans Petter Selasky <hselasky@c2i.net>
Subject:   Re: umass problem.
Message-ID:  <20d8e6193795d83f9ffa30ab94bf86eb@mail>
In-Reply-To: <202969100caf7f0bb8098572b0dad622@mail>
References:  <202969100caf7f0bb8098572b0dad622@mail>

next in thread | previous in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
Well I do. This is a regression in EHCI driver introduced along the new
usb stack. Please review patch attached.

Thanks,
Alexander.

On Tue, 20 Oct 2009 10:53:36 +0900, Alexander Nedotsukov
<bland@freebsd.org> wrote:
> Hi,
> 
> I can reproduce this reliably with simple dd if=/dev/da0. At the end
> external USB drive turns off the lights (tough it still spinning) and
> system fall into the state where it is impossible to reboot it cleanly.
I
> can get drive back only after full power cycle (simple reboot is not
> enough). Using same drive under moderate load seems mostly to work. It
also
> is used to work under 7.2.
> 
> Anyone have an idea what this could be?

[-- Attachment #2 --]
Index: controller/ehci_pci.c
===================================================================
--- controller/ehci_pci.c	(revision 198966)
+++ controller/ehci_pci.c	(working copy)
@@ -414,6 +414,20 @@
 		sc->sc_intr_hdl = NULL;
 		goto error;
 	}
+
+	/* Enable workaround for dropped interrupts as required */
+	switch (pci_get_vendor(self)) {
+	case PCI_EHCI_VENDORID_ATI:
+	case PCI_EHCI_VENDORID_VIA:
+		sc->sc_flags |= EHCI_SCFLG_LOSTINTRBUG;
+		if (bootverbose)
+			device_printf(self,
+			    "Dropped interrupts workaround enabled\n");
+		break;
+	default:
+		break;
+	}
+
 	ehci_pci_takecontroller(self);
 
 	/* Undocumented quirks taken from Linux */
Index: controller/ehci.c
===================================================================
--- controller/ehci.c	(revision 198966)
+++ controller/ehci.c	(working copy)
@@ -117,6 +117,7 @@
 static void ehci_device_done(struct usb_xfer *xfer, usb_error_t error);
 static uint8_t ehci_check_transfer(struct usb_xfer *xfer);
 static void ehci_timeout(void *arg);
+static void ehci_intrq_timeout(void *arg);
 static void ehci_root_intr(ehci_softc_t *sc);
 
 struct ehci_std_temp {
@@ -243,6 +244,7 @@
 	DPRINTF("start\n");
 
 	usb_callout_init_mtx(&sc->sc_tmo_pcd, &sc->sc_bus.bus_mtx, 0);
+	usb_callout_init_mtx(&sc->sc_tmo_intrq, &sc->sc_bus.bus_mtx, 0);
 
 #if USB_DEBUG
 	if (ehcidebug > 2) {
@@ -519,6 +521,7 @@
 {
 	USB_BUS_LOCK(&sc->sc_bus);
 
+	usb_callout_stop(&sc->sc_tmo_intrq);
 	usb_callout_stop(&sc->sc_tmo_pcd);
 
 	EOWRITE4(sc, EHCI_USBINTR, sc->sc_eintrs);
@@ -1472,6 +1475,26 @@
 	}
 }
 
+/*
+ * Some EHCI chips from VIA / ATI seem to trigger interrupts before writing
+ * back the qTD status, or miss signalling occasionally under heavy load.
+ * If the host machine is too fast, we can miss transaction completion - when
+ * we scan the active list the transaction still seems to be active. This
+ * generally exhibits itself as a umass stall that never recovers.
+ *
+ * We work around this behaviour by setting up this callback after any softintr
+ * that completes with transactions still pending, giving us another chance to
+ * check for completion after the writeback has taken place.
+ */
+void
+ehci_intrq_timeout(void* arg)
+{
+	ehci_softc_t *sc = arg;
+
+	DPRINTFN(3, "ehci_intrq_timeout\n");
+	ehci_interrupt_poll(sc);
+}
+
 /*------------------------------------------------------------------------*
  *	ehci_interrupt - EHCI interrupt handler
  *
@@ -1539,6 +1562,11 @@
 	/* poll all the USB transfers */
 	ehci_interrupt_poll(sc);
 
+	if ((sc->sc_flags & EHCI_SCFLG_LOSTINTRBUG) &&
+	    !TAILQ_EMPTY(&sc->sc_bus.intr_q.head))
+		usb_callout_reset(&sc->sc_tmo_intrq, hz / 5, (void *)&ehci_intrq_timeout,
+			sc);
+
 done:
 	USB_BUS_UNLOCK(&sc->sc_bus);
 }
Index: controller/ehci.h
===================================================================
--- controller/ehci.h	(revision 198966)
+++ controller/ehci.h	(working copy)
@@ -321,6 +321,7 @@
 	struct ehci_hw_softc sc_hw;
 	struct usb_bus sc_bus;		/* base device */
 	struct usb_callout sc_tmo_pcd;
+	struct usb_callout sc_tmo_intrq;
 	union ehci_hub_desc sc_hub_desc;
 
 	struct usb_device *sc_devices[EHCI_MAX_DEVICES];
@@ -348,6 +349,7 @@
 #define	EHCI_SCFLG_BIGEDESC	0x0008	/* big-endian byte order descriptors */
 #define	EHCI_SCFLG_BIGEMMIO	0x0010	/* big-endian byte order MMIO */
 #define	EHCI_SCFLG_TT		0x0020	/* transaction translator present */
+#define	EHCI_SCFLG_LOSTINTRBUG	0x0040  /* workaround for VIA / ATI chipsets */
 
 	uint8_t	sc_offs;		/* offset to operational registers */
 	uint8_t	sc_doorbell_disable;	/* set on doorbell failure */

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20d8e6193795d83f9ffa30ab94bf86eb>