Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 28 Nov 2002 10:01:33 +0100 (CET)
From:      Tomas Pluskal <plusik@pohoda.cz>
To:        Ian Dowse <iedowse@maths.tcd.ie>
Cc:        n_hibma@van-laarhoven.org, <freebsd-hackers@freebsd.org>
Subject:   Re: umass driver speed 
Message-ID:  <20021128095842.M389-200000@localhost.localdomain>
In-Reply-To: <200211280053.aa40102@salmon.maths.tcd.ie>

next in thread | previous in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]

Bingo! :)

When I changed the bsqh initialisation, I get speed above 400 KB/s,
that makes me feel much better :)

I am sending a corrected patch as attachment.

Tomas

On Thu, 28 Nov 2002, Ian Dowse wrote:

> I had a quick look at the patch  - one thing I noticed is that you
> are possibly not initialising all of the bsqh fields. In -current,
> bsqh->hlink and bsqh->qh.qh_hlink are set up to point at the `lsqh'
> QH. It might not be the problem though.
>
> Ian
>

[-- Attachment #2 --]
diff -u stable/uhci.c verze3/uhci.c
--- stable/uhci.c	Wed Nov  6 21:23:50 2002
+++ verze3/uhci.c	Thu Nov 28 09:48:45 2002
@@ -79,6 +79,9 @@
 #include <dev/usb/uhcireg.h>
 #include <dev/usb/uhcivar.h>
 
+/* Use bandwidth reclamation for control transfers. Some devices choke on it. */
+/*#define UHCI_CTL_LOOP */
+
 #if defined(__FreeBSD__)
 #include <machine/clock.h>
 
@@ -97,22 +100,37 @@
 #define DPRINTF(x)	if (uhcidebug) printf x
 #define DPRINTFN(n,x)	if (uhcidebug>(n)) printf x
 int uhcidebug = 0;
+int uhcinoloop = 0;
 SYSCTL_NODE(_hw_usb, OID_AUTO, uhci, CTLFLAG_RW, 0, "USB uhci");
 SYSCTL_INT(_hw_usb_uhci, OID_AUTO, debug, CTLFLAG_RW,
 	   &uhcidebug, 0, "uhci debug level");
+SYSCTL_INT(_hw_usb_uhci, OID_AUTO, loop, CTLFLAG_RW,
+	   &uhcinoloop, 0, "uhci noloop");
 #else
 #define DPRINTF(x)
 #define DPRINTFN(n,x)
 #endif
 
+/* 
+ * Just a workaround, because FreeBSD 4-STABLE doesn't have bswap32
+ */
+#if defined(__FreeBSD__)
+#define bswap32(x) (((x << 24) & 0xff000000 ) | \
+                    ((x <<  8) & 0x00ff0000 ) | \
+                    ((x >>  8) & 0x0000ff00 ) | \
+                    ((x >> 24) & 0x000000ff ))
+#endif
+
 /*
  * The UHCI controller is little endian, so on big endian machines
  * the data strored in memory needs to be swapped.
  */
 #if BYTE_ORDER == BIG_ENDIAN
 #define LE(x) (bswap32(x))
+#define HE(x) (x)
 #else
 #define LE(x) (x)
+#define HE(x) (bswap32(x))
 #endif
 
 struct uhci_pipe {
@@ -183,11 +201,15 @@
 Static void		uhci_timeout(void *);
 Static void		uhci_lock_frames(uhci_softc_t *);
 Static void		uhci_unlock_frames(uhci_softc_t *);
-Static void		uhci_add_ctrl(uhci_softc_t *, uhci_soft_qh_t *);
+Static void		uhci_add_ls_ctrl(uhci_softc_t *, uhci_soft_qh_t *);
+Static void		uhci_add_hs_ctrl(uhci_softc_t *, uhci_soft_qh_t *);
 Static void		uhci_add_bulk(uhci_softc_t *, uhci_soft_qh_t *);
-Static void		uhci_remove_ctrl(uhci_softc_t *,uhci_soft_qh_t *);
+Static void		uhci_remove_ls_ctrl(uhci_softc_t *,uhci_soft_qh_t *);
+Static void		uhci_remove_hs_ctrl(uhci_softc_t *,uhci_soft_qh_t *);
 Static void		uhci_remove_bulk(uhci_softc_t *,uhci_soft_qh_t *);
 Static int		uhci_str(usb_string_descriptor_t *, int, char *);
+Static void             uhci_add_loop(uhci_softc_t *sc);
+Static void             uhci_rem_loop(uhci_softc_t *sc);
 
 Static usbd_status	uhci_setup_isoc(usbd_pipe_handle pipe);
 Static void		uhci_device_isoc_enter(usbd_xfer_handle);
@@ -243,6 +265,10 @@
 Static usbd_status	uhci_device_setintr(uhci_softc_t *sc, 
 			    struct uhci_pipe *pipe, int ival);
 
+Static __inline__ uhci_soft_qh_t *uhci_find_prev_qh(uhci_soft_qh_t *,
+						    uhci_soft_qh_t *);
+
+
 Static void		uhci_device_clear_toggle(usbd_pipe_handle pipe);
 Static void		uhci_noop(usbd_pipe_handle pipe);
 
@@ -333,6 +359,22 @@
 	uhci_device_isoc_done,
 };
 
+Static __inline__ uhci_soft_qh_t *
+uhci_find_prev_qh(uhci_soft_qh_t *pqh, uhci_soft_qh_t *sqh)
+{
+	DPRINTFN(15,("uhci_find_prev_qh: pqh=%p sqh=%p\n", pqh, sqh));
+
+	for (; pqh->hlink != sqh; pqh = pqh->hlink) {
+#if defined(DIAGNOSTIC) || defined(USB_DEBUG)
+		if (HE(pqh->qh.qh_hlink) & UHCI_PTR_T) {
+			printf("uhci_find_prev_qh: QH not found\n");
+			return (NULL);
+		}
+#endif
+	}
+	return (pqh);
+}
+
 void
 uhci_busreset(uhci_softc_t *sc)
 {
@@ -346,7 +388,7 @@
 {
 	usbd_status err;
 	int i, j;
-	uhci_soft_qh_t *csqh, *bsqh, *sqh;
+	uhci_soft_qh_t *clsqh, *chsqh, *bsqh, *sqh, *lsqh;
 	uhci_soft_td_t *std;
 
 	DPRINTFN(1,("uhci_init: start\n"));
@@ -371,22 +413,60 @@
 	UWRITE2(sc, UHCI_FRNUM, 0);		/* set frame number to 0 */
 	UWRITE4(sc, UHCI_FLBASEADDR, DMAADDR(&sc->sc_dma, 0)); /* set frame list*/
 
+	/*
+	 * Allocate a TD, inactive, that hangs from the last QH.
+	 * This is to avoid a bug in the PIIX that makes it run berserk
+	 * otherwise.
+	 */
+	std = uhci_alloc_std(sc);
+	if (std == NULL)
+		return (USBD_NOMEM);
+	std->link.std = NULL;
+	std->td.td_link = LE(UHCI_PTR_T);
+	std->td.td_status = LE(0); /* inactive */
+	std->td.td_token = LE(0);
+	std->td.td_buffer = LE(0);
+        
+	/* Allocate the dummy QH marking the end and used for looping the QHs.*/
+	lsqh = uhci_alloc_sqh(sc);
+	if (lsqh == NULL)
+		return (USBD_NOMEM);
+	lsqh->hlink = NULL;
+	lsqh->qh.qh_hlink = LE(UHCI_PTR_T);	/* end of QH chain */
+	lsqh->elink = std;
+	lsqh->qh.qh_elink = LE(std->physaddr | UHCI_PTR_TD);
+	sc->sc_last_qh = lsqh;
+        
 	/* Allocate the dummy QH where bulk traffic will be queued. */
 	bsqh = uhci_alloc_sqh(sc);
 	if (bsqh == NULL)
 		return (USBD_NOMEM);
-	bsqh->qh.qh_hlink = LE(UHCI_PTR_T);	/* end of QH chain */
+	bsqh->hlink = lsqh;
+	bsqh->qh.qh_hlink = LE(lsqh->physaddr | UHCI_PTR_Q);
+	bsqh->elink = NULL;
 	bsqh->qh.qh_elink = LE(UHCI_PTR_T);
 	sc->sc_bulk_start = sc->sc_bulk_end = bsqh;
 
-	/* Allocate the dummy QH where control traffic will be queued. */
-	csqh = uhci_alloc_sqh(sc);
-	if (csqh == NULL)
+        /* Allocate dummy QH where high speed control traffic will be queued. */
+	chsqh = uhci_alloc_sqh(sc);
+	if (chsqh == NULL)
 		return (USBD_NOMEM);
-	csqh->hlink = bsqh;
-	csqh->qh.qh_hlink = LE(bsqh->physaddr | UHCI_PTR_Q);
-	csqh->qh.qh_elink = LE(UHCI_PTR_T);
-	sc->sc_ctl_start = sc->sc_ctl_end = csqh;
+	chsqh->hlink = bsqh;
+	chsqh->qh.qh_hlink = LE(bsqh->physaddr | UHCI_PTR_Q);
+	chsqh->elink = NULL;
+	chsqh->qh.qh_elink = LE(UHCI_PTR_T);
+	sc->sc_hctl_start = sc->sc_hctl_end = chsqh;
+
+	/* Allocate dummy QH where control traffic will be queued. */
+	clsqh = uhci_alloc_sqh(sc);
+	if (clsqh == NULL)
+		return (USBD_NOMEM);
+	clsqh->hlink = bsqh;
+	clsqh->qh.qh_hlink = LE(chsqh->physaddr | UHCI_PTR_Q);
+	clsqh->elink = NULL;
+	clsqh->qh.qh_elink = LE(UHCI_PTR_T);
+	sc->sc_lctl_start = sc->sc_lctl_end = clsqh;
+
 
 	/* 
 	 * Make all (virtual) frame list pointers point to the interrupt
@@ -403,8 +483,8 @@
 		std->td.td_status = LE(UHCI_TD_IOS);	/* iso, inactive */
 		std->td.td_token = LE(0);
 		std->td.td_buffer = LE(0);
-		sqh->hlink = csqh;
-		sqh->qh.qh_hlink = LE(csqh->physaddr | UHCI_PTR_Q);
+		sqh->hlink = clsqh;
+		sqh->qh.qh_hlink = LE(clsqh->physaddr | UHCI_PTR_Q);
 		sqh->elink = 0;
 		sqh->qh.qh_elink = LE(UHCI_PTR_T);
 		sc->sc_vframes[i].htd = std;
@@ -825,45 +905,128 @@
 	LIST_INSERT_HEAD(&uhci_ii_free, ii, list); /* and put on free list */
 }
 
-/* Add control QH, called at splusb(). */
+/*
+ * Let the last QH loop back to the high speed control transfer QH.
+ * This is what intel calls "bandwidth reclamation" and improves
+ * USB performance a lot for some devices.
+ * If we are already looping, just count it.
+ */
+void
+uhci_add_loop(uhci_softc_t *sc) {
+#ifdef USB_DEBUG
+	if (uhcinoloop)
+		return;
+#endif
+	if (++sc->sc_loops == 1) {
+		DPRINTFN(5,("uhci_start_loop: add\n"));
+		/* Note, we don't loop back the soft pointer. */
+		sc->sc_last_qh->qh.qh_hlink =
+		    LE(sc->sc_hctl_start->physaddr | UHCI_PTR_Q);
+	}
+}
+
+void
+uhci_rem_loop(uhci_softc_t *sc) {
+#ifdef USB_DEBUG
+	if (uhcinoloop)
+		return;
+#endif
+	if (--sc->sc_loops == 0) {
+		DPRINTFN(5,("uhci_end_loop: remove\n"));
+		sc->sc_last_qh->qh.qh_hlink = LE(UHCI_PTR_T);
+	}
+}
+
+/* Add high speed control QH, called at splusb(). */
 void
-uhci_add_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh)
+uhci_add_hs_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh)
 {
 	uhci_soft_qh_t *eqh;
 
 	SPLUSBCHECK;
 
 	DPRINTFN(10, ("uhci_add_ctrl: sqh=%p\n", sqh));
-	eqh = sc->sc_ctl_end;
+	eqh = sc->sc_hctl_end;
 	sqh->hlink       = eqh->hlink;
 	sqh->qh.qh_hlink = eqh->qh.qh_hlink;
 	eqh->hlink       = sqh;
 	eqh->qh.qh_hlink = LE(sqh->physaddr | UHCI_PTR_Q);
-	sc->sc_ctl_end = sqh;
+	sc->sc_hctl_end = sqh;
+#ifdef UHCI_CTL_LOOP
+	uhci_add_loop(sc);
+#endif
 }
 
-/* Remove control QH, called at splusb(). */
+/* Remove high speed control QH, called at splusb(). */
 void
-uhci_remove_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh)
+uhci_remove_hs_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh)
 {
 	uhci_soft_qh_t *pqh;
 
 	SPLUSBCHECK;
 
-	DPRINTFN(10, ("uhci_remove_ctrl: sqh=%p\n", sqh));
-	for (pqh = sc->sc_ctl_start; pqh->hlink != sqh; pqh=pqh->hlink)
-#if defined(DIAGNOSTIC) || defined(USB_DEBUG)		
-		if (LE(pqh->qh.qh_hlink) & UHCI_PTR_T) {
-			printf("uhci_remove_ctrl: QH not found\n");
-			return;
-		}
-#else
-		;
+	DPRINTFN(10, ("uhci_remove_hs_ctrl: sqh=%p\n", sqh));
+#ifdef UHCI_CTL_LOOP
+	uhci_rem_loop(sc);
 #endif
-	pqh->hlink       = sqh->hlink;
+	/*
+	 * The T bit should be set in the elink of the QH so that the HC
+	 * doesn't follow the pointer.  This condition may fail if the
+	 * the transferred packet was short so that the QH still points
+	 * at the last used TD.
+	 * In this case we set the T bit and wait a little for the HC
+	 * to stop looking at the TD.
+	 */
+	if (!(sqh->qh.qh_elink & LE(UHCI_PTR_T))) {
+		sqh->qh.qh_elink = LE(UHCI_PTR_T);
+		delay(UHCI_QH_REMOVE_DELAY);
+	}
+
+	pqh = uhci_find_prev_qh(sc->sc_hctl_start, sqh);
+	pqh->hlink = sqh->hlink;
 	pqh->qh.qh_hlink = sqh->qh.qh_hlink;
-	if (sc->sc_ctl_end == sqh)
-		sc->sc_ctl_end = pqh;
+	delay(UHCI_QH_REMOVE_DELAY);
+	if (sc->sc_hctl_end == sqh)
+		sc->sc_hctl_end = pqh;
+}
+
+/* Add low speed control QH, called at splusb(). */
+void
+uhci_add_ls_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh)
+{
+	uhci_soft_qh_t *eqh;
+
+	SPLUSBCHECK;
+
+	DPRINTFN(10, ("uhci_add_ls_ctrl: sqh=%p\n", sqh));
+	eqh = sc->sc_lctl_end;
+	sqh->hlink = eqh->hlink;
+	sqh->qh.qh_hlink = eqh->qh.qh_hlink;
+	eqh->hlink = sqh;
+	eqh->qh.qh_hlink = LE(sqh->physaddr | UHCI_PTR_Q);
+	sc->sc_lctl_end = sqh;
+}
+
+/* Remove low speed control QH, called at splusb(). */
+void
+uhci_remove_ls_ctrl(uhci_softc_t *sc, uhci_soft_qh_t *sqh)
+{
+	uhci_soft_qh_t *pqh;
+
+	SPLUSBCHECK;
+
+	DPRINTFN(10, ("uhci_remove_ls_ctrl: sqh=%p\n", sqh));
+	/* See comment in uhci_remove_hs_ctrl() */
+	if (!(sqh->qh.qh_elink & LE(UHCI_PTR_T))) {
+		sqh->qh.qh_elink = LE(UHCI_PTR_T);
+		delay(UHCI_QH_REMOVE_DELAY);
+	}
+	pqh = uhci_find_prev_qh(sc->sc_lctl_start, sqh);
+	pqh->hlink = sqh->hlink;
+	pqh->qh.qh_hlink = sqh->qh.qh_hlink;
+	delay(UHCI_QH_REMOVE_DELAY);
+	if (sc->sc_lctl_end == sqh)
+		sc->sc_lctl_end = pqh;
 }
 
 /* Add bulk QH, called at splusb(). */
@@ -881,6 +1044,7 @@
 	eqh->hlink       = sqh;
 	eqh->qh.qh_hlink = LE(sqh->physaddr | UHCI_PTR_Q);
 	sc->sc_bulk_end = sqh;
+        uhci_add_loop(sc);
 }
 
 /* Remove bulk QH, called at splusb(). */
@@ -892,6 +1056,8 @@
 	SPLUSBCHECK;
 
 	DPRINTFN(10, ("uhci_remove_bulk: sqh=%p\n", sqh));
+	uhci_rem_loop(sc);
+        
 	for (pqh = sc->sc_bulk_start; pqh->hlink != sqh; pqh = pqh->hlink)
 #if defined(DIAGNOSTIC) || defined(USB_DEBUG)		
 		if (LE(pqh->qh.qh_hlink) & UHCI_PTR_T) {
@@ -1929,7 +2095,10 @@
 	sqh->intr_info = ii;
 
 	s = splusb();
-	uhci_add_ctrl(sc, sqh);
+	if (dev->lowspeed)
+		uhci_add_ls_ctrl(sc, sqh);
+	else
+		uhci_add_hs_ctrl(sc, sqh);
 	LIST_INSERT_HEAD(&sc->sc_intrhead, ii, list);
 #ifdef USB_DEBUG
 	if (uhcidebug > 12) {
@@ -2320,7 +2489,10 @@
 
 	LIST_REMOVE(ii, list);	/* remove from active list */
 
-	uhci_remove_ctrl(sc, upipe->u.ctl.sqh);
+    	if (upipe->pipe.device->lowspeed)
+		uhci_remove_ls_ctrl(sc, upipe->u.ctl.sqh);
+	else
+		uhci_remove_hs_ctrl(sc, upipe->u.ctl.sqh);
 
 	if (upipe->u.ctl.length != 0)
 		uhci_free_std_chain(sc, ii->stdstart->link.std, ii->stdend);
@@ -2604,6 +2776,7 @@
 		USETW2(p->bString[i], 0, s[i]);
 	return (2*i+2);
 }
+
 
 /*
  * Simulate a hardware hub by handling all the necessary requests.
diff -u stable/uhcireg.h verze3/uhcireg.h
--- stable/uhcireg.h	Sun Jul  2 13:43:59 2000
+++ verze3/uhcireg.h	Thu Nov 28 00:32:23 2002
@@ -113,9 +113,16 @@
 #define UHCI_QH_ALIGN		16
 
 typedef u_int32_t uhci_physaddr_t;
+#define UHCI_PTR_TD             0x00000000
 #define UHCI_PTR_T		0x00000001
 #define UHCI_PTR_Q		0x00000002
 #define UHCI_PTR_VF		0x00000004
+
+/* 
+ * Wait this long after a QH has been removed.  This gives that HC a
+ * chance to stop looking at it before it's recycled.
+ */
+#define UHCI_QH_REMOVE_DELAY	5
 
 /*
  * The Queue Heads and Transfer Descriptors and accessed
diff -u stable/uhcivar.h verze3/uhcivar.h
--- stable/uhcivar.h	Wed Nov  1 00:23:29 2000
+++ verze3/uhcivar.h	Thu Nov 28 00:23:12 2002
@@ -141,10 +141,14 @@
 	usb_dma_t sc_dma;
 	struct uhci_vframe sc_vframes[UHCI_VFRAMELIST_COUNT];
 
-	uhci_soft_qh_t *sc_ctl_start;	/* dummy QH for control */
-	uhci_soft_qh_t *sc_ctl_end;	/* last control QH */
+	uhci_soft_qh_t *sc_lctl_start;	/* dummy QH for low speed control */
+	uhci_soft_qh_t *sc_lctl_end;	/* last control QH */
+	uhci_soft_qh_t *sc_hctl_start;	/* dummy QH for high speed control */
+	uhci_soft_qh_t *sc_hctl_end;	/* last control QH */
 	uhci_soft_qh_t *sc_bulk_start;	/* dummy QH for bulk */
 	uhci_soft_qh_t *sc_bulk_end;	/* last bulk transfer */
+	uhci_soft_qh_t *sc_last_qh;	/* dummy QH at the end */
+	u_int32_t sc_loops;		/* number of QHs that wants looping */
 
 	uhci_soft_td_t *sc_freetds;	/* TD free list */
 	uhci_soft_qh_t *sc_freeqhs;	/* QH free list */

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20021128095842.M389-200000>