From owner-p4-projects@FreeBSD.ORG Fri Sep 5 21:01:33 2008 Return-Path: Delivered-To: p4-projects@freebsd.org Received: by hub.freebsd.org (Postfix, from userid 32767) id 73218106566B; Fri, 5 Sep 2008 21:01:33 +0000 (UTC) Delivered-To: perforce@FreeBSD.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 36E10106567C for ; Fri, 5 Sep 2008 21:01:33 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repoman.freebsd.org (repoman.freebsd.org [IPv6:2001:4f8:fff6::29]) by mx1.freebsd.org (Postfix) with ESMTP id 212BF8FC18 for ; Fri, 5 Sep 2008 21:01:33 +0000 (UTC) (envelope-from hselasky@FreeBSD.org) Received: from repoman.freebsd.org (localhost [127.0.0.1]) by repoman.freebsd.org (8.14.3/8.14.3) with ESMTP id m85L1XjN047008 for ; Fri, 5 Sep 2008 21:01:33 GMT (envelope-from hselasky@FreeBSD.org) Received: (from perforce@localhost) by repoman.freebsd.org (8.14.3/8.14.3/Submit) id m85L1X94047001 for perforce@freebsd.org; Fri, 5 Sep 2008 21:01:33 GMT (envelope-from hselasky@FreeBSD.org) Date: Fri, 5 Sep 2008 21:01:33 GMT Message-Id: <200809052101.m85L1X94047001@repoman.freebsd.org> X-Authentication-Warning: repoman.freebsd.org: perforce set sender to hselasky@FreeBSD.org using -f From: Hans Petter Selasky To: Perforce Change Reviews Cc: Subject: PERFORCE change 149280 for review X-BeenThere: p4-projects@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: p4 projects tree changes List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 05 Sep 2008 21:01:33 -0000 http://perforce.freebsd.org/chv.cgi?CH=149280 Change 149280 by hselasky@hselasky_laptop001 on 2008/09/05 21:00:32 Optimisations and improvements. 1) Lower remote wakeup delay to make up for extra delay added by the system timer. 2) Optimise PIO to use 32-bit access whenever possible. 3) Add support for DMA transfer of USB packets. Affected files ... .. //depot/projects/usb/src/sys/dev/usb2/controller/musb2_otg.c#10 edit .. //depot/projects/usb/src/sys/dev/usb2/controller/musb2_otg.h#5 edit Differences ... ==== //depot/projects/usb/src/sys/dev/usb2/controller/musb2_otg.c#10 (text+ko) ==== @@ -92,6 +92,7 @@ static void musbotg_do_poll(struct usb2_bus *bus); static void musbotg_root_ctrl_poll(struct musbotg_softc *sc); static void musbotg_standard_done(struct usb2_xfer *xfer); +static void musbotg_interrupt_poll(struct musbotg_softc *sc); static usb2_sw_transfer_func_t musbotg_root_intr_done; static usb2_sw_transfer_func_t musbotg_root_ctrl_done; @@ -221,13 +222,13 @@ temp |= MUSB2_MASK_RESUME; MUSB2_WRITE_1(sc, MUSB2_REG_POWER, temp); - /* wait 10 milliseconds */ + /* wait 8 milliseconds */ if (use_polling) { /* polling */ - DELAY(10000); + DELAY(8000); } else { /* Wait for reset to complete. */ - usb2_pause_mtx(&sc->sc_bus.mtx, 10); + usb2_pause_mtx(&sc->sc_bus.mtx, 8); } temp = MUSB2_READ_1(sc, MUSB2_REG_POWER); @@ -422,16 +423,23 @@ return (0); /* we are complete */ } while (count > 0) { + uint32_t temp; + usb2_get_page(td->pc, td->offset, &buf_res); /* get correct length */ if (buf_res.length > count) { buf_res.length = count; } + /* + * Compute the least number of bytes to the next buffer + * alignment address: + */ + temp = 4 - (USB_P2U(buf_res.buffer) & 3); + /* check if we can optimise */ - if ((!(USB_P2U(buf_res.buffer) & 3)) && + if ((temp == 4) && (buf_res.length >= 4)) { - uint32_t temp; /* receive data 4 bytes at a time */ bus_space_read_multi_4(sc->sc_io_tag, sc->sc_io_hdl, @@ -446,6 +454,10 @@ td->remainder -= temp; continue; } + /* minimise data transfer length */ + if (buf_res.length > temp) { + buf_res.length = temp; + } /* receive data */ bus_space_read_multi_1(sc->sc_io_tag, sc->sc_io_hdl, MUSB2_REG_EPFIFO(0), buf_res.buffer, buf_res.length); @@ -514,6 +526,7 @@ count = td->remainder; } while (count > 0) { + uint32_t temp; usb2_get_page(td->pc, td->offset, &buf_res); @@ -521,10 +534,15 @@ if (buf_res.length > count) { buf_res.length = count; } + /* + * Compute the least number of bytes to the next buffer + * alignment address: + */ + temp = 4 - (USB_P2U(buf_res.buffer) & 3); + /* check if we can optimise */ - if ((!(USB_P2U(buf_res.buffer) & 3)) && + if ((temp == 4) && (buf_res.length >= 4)) { - uint32_t temp; /* transmit data 4 bytes at a time */ bus_space_write_multi_4(sc->sc_io_tag, sc->sc_io_hdl, @@ -539,6 +557,10 @@ td->remainder -= temp; continue; } + /* minimise data transfer length */ + if (buf_res.length > temp) { + buf_res.length = temp; + } /* transmit data */ bus_space_write_multi_1(sc->sc_io_tag, sc->sc_io_hdl, MUSB2_REG_EPFIFO(0), buf_res.buffer, buf_res.length); @@ -598,6 +620,33 @@ return (0); /* complete */ } +#ifdef MUSB2_DMA_ENABLED +void +musbotg_complete_dma_cb(void *arg, uint32_t is_error) +{ + struct musbotg_dma *dma = arg; + struct musbotg_softc *sc; + + sc = dma->sc; + + mtx_lock(&sc->sc_bus.mtx); + + dma->busy = 0; + + if (is_error) { + dma->error = 1; + } + DPRINTFN(4, "DMA interrupt\n"); + + musbotg_interrupt_poll(sc); + + mtx_unlock(&sc->sc_bus.mtx); + + return; +} + +#endif + static uint8_t musbotg_data_rx(struct musbotg_td *td) { @@ -614,6 +663,11 @@ /* get pointer to softc */ sc = MUSBOTG_PC2SC(td->pc); +#ifdef MUSB2_DMA_ENABLED + if (sc->sc_rx_dma[td->ep_no].busy) { + return (1); /* not complete */ + } +#endif /* select endpoint */ MUSB2_WRITE_1(sc, MUSB2_REG_EPINDEX, td->ep_no); @@ -636,7 +690,24 @@ /* get the packet byte count */ count = MUSB2_READ_2(sc, MUSB2_REG_RXCOUNT); - /* verify the packet byte count */ + DPRINTFN(4, "count=0x%04x\n", count); + + /* + * First check for DMA complete and then check for short or + * invalid packet: + */ +#ifdef MUSB2_DMA_ENABLED + if (sc->sc_rx_dma[td->ep_no].complete) { + sc->sc_rx_dma[td->ep_no].complete = 0; + /* check for errors */ + if ((count >= td->max_frame_size) || + (sc->sc_rx_dma[td->ep_no].error)) { + /* invalid USB packet */ + td->error = 1; + return (0); /* we are complete */ + } + } else +#endif if (count != td->max_frame_size) { if (count < td->max_frame_size) { /* we have a short packet */ @@ -655,16 +726,61 @@ return (0); /* we are complete */ } while (count > 0) { + uint32_t temp; + usb2_get_page(td->pc, td->offset, &buf_res); /* get correct length */ if (buf_res.length > count) { buf_res.length = count; } +#ifdef MUSB2_DMA_ENABLED + if (td->dma_enabled) { + /* + * Compute the least number of bytes to the next DMA + * alignment address: + */ + temp = sc->sc_dma_align - + (USB_P2U(buf_res.buffer) & (sc->sc_dma_align - 1)); + + /* check if we can do DMA */ + if ((temp == sc->sc_dma_align) && + (buf_res.length >= sc->sc_dma_align)) { + + temp = buf_res.length & ~(sc->sc_dma_align - 1); + + /* set some status bits */ + sc->sc_rx_dma[td->ep_no].busy = 1; + sc->sc_rx_dma[td->ep_no].complete = 1; + sc->sc_rx_dma[td->ep_no].error = 0; + + /* start DMA job */ + musbotg_start_rxdma(sc->sc_rx_dma + td->ep_no, + buf_res.buffer, temp, td->ep_no); + + /* + * Pre-advance buffer pointers because the + * USB chip will update its counters: + */ + td->offset += temp; + td->remainder -= temp; + return (1); /* wait for callback */ + } + /* minimise data transfer length */ + if (buf_res.length > temp) { + buf_res.length = temp; + } + } +#endif + /* + * Compute the least number of bytes to the next buffer + * alignment address: + */ + temp = 4 - (USB_P2U(buf_res.buffer) & 3); + /* check if we can optimise */ - if ((!(USB_P2U(buf_res.buffer) & 3)) && + if ((temp == 4) && (buf_res.length >= 4)) { - uint32_t temp; /* receive data 4 bytes at a time */ bus_space_read_multi_4(sc->sc_io_tag, sc->sc_io_hdl, @@ -679,6 +795,10 @@ td->remainder -= temp; continue; } + /* minimise data transfer length */ + if (buf_res.length > temp) { + buf_res.length = temp; + } /* receive data */ bus_space_read_multi_1(sc->sc_io_tag, sc->sc_io_hdl, MUSB2_REG_EPFIFO(td->ep_no), buf_res.buffer, @@ -721,6 +841,11 @@ /* get pointer to softc */ sc = MUSBOTG_PC2SC(td->pc); +#ifdef MUSB2_DMA_ENABLED + if (sc->sc_tx_dma[td->ep_no].busy) { + return (1); /* not complete */ + } +#endif /* select endpoint */ MUSB2_WRITE_1(sc, MUSB2_REG_EPINDEX, td->ep_no); @@ -739,6 +864,7 @@ if (csr & MUSB2_MASK_CSRL_TXPKTRDY) { return (1); /* not complete */ } + /* check for short packet */ count = td->max_frame_size; if (td->remainder < count) { /* we have a short packet */ @@ -746,6 +872,7 @@ count = td->remainder; } while (count > 0) { + uint32_t temp; usb2_get_page(td->pc, td->offset, &buf_res); @@ -753,10 +880,67 @@ if (buf_res.length > count) { buf_res.length = count; } +#ifdef MUSB2_DMA_ENABLED + if (td->dma_enabled) { + /* + * Compute the least number of bytes to the next DMA + * alignment address: + */ + temp = sc->sc_dma_align - + (USB_P2U(buf_res.buffer) & (sc->sc_dma_align - 1)); + + /* check if we can do DMA */ + if ((temp == sc->sc_dma_align) && + (buf_res.length >= sc->sc_dma_align)) { + + temp = buf_res.length & ~(sc->sc_dma_align - 1); + + /* + * Check for DMA complete or if we should + * start DMA: + */ + if (sc->sc_tx_dma[td->ep_no].complete) { + sc->sc_tx_dma[td->ep_no].complete = 0; + + /* check for errors */ + if (sc->sc_tx_dma[td->ep_no].error) { + /* invalid USB packet */ + td->error = 1; + /* we are complete */ + return (0); + } + /* update counters */ + count -= temp; + td->offset += temp; + td->remainder -= temp; + continue; + } else { + /* set some status bits */ + sc->sc_tx_dma[td->ep_no].busy = 1; + sc->sc_tx_dma[td->ep_no].complete = 1; + sc->sc_tx_dma[td->ep_no].error = 0; + + /* start DMA job */ + musbotg_start_txdma(sc->sc_tx_dma + td->ep_no, + buf_res.buffer, temp, td->ep_no); + return (1); /* wait for callback */ + } + } + /* minimise data transfer length */ + if (buf_res.length > temp) { + buf_res.length = temp; + } + } +#endif + /* + * Compute the least number of bytes to the next buffer + * alignment address: + */ + temp = 4 - (USB_P2U(buf_res.buffer) & 3); + /* check if we can optimise */ - if ((!(USB_P2U(buf_res.buffer) & 3)) && + if ((temp == 4) && (buf_res.length >= 4)) { - uint32_t temp; /* transmit data 4 bytes at a time */ bus_space_write_multi_4(sc->sc_io_tag, sc->sc_io_hdl, @@ -771,6 +955,10 @@ td->remainder -= temp; continue; } + /* minimise data transfer length */ + if (buf_res.length > temp) { + buf_res.length = temp; + } /* transmit data */ bus_space_write_multi_1(sc->sc_io_tag, sc->sc_io_hdl, MUSB2_REG_EPFIFO(td->ep_no), buf_res.buffer, @@ -1193,6 +1381,21 @@ else temp &= ~MUSB2_MASK_EPINT(ep_no); MUSB2_WRITE_2(sc, MUSB2_REG_INTRXE, temp); + +#ifdef MUSB2_DMA_ENABLED + if (on == 0) { + if (sc->sc_rx_dma[ep_no].busy) { + /* + * The USB driver uses a DMA delay + * so there is no need for an + * immediate DMA stop! + */ + musbotg_stop_rxdma_async(ep_no); + } + sc->sc_rx_dma[ep_no].complete = 0; + sc->sc_rx_dma[ep_no].busy = 0; + } +#endif } else { temp = MUSB2_READ_2(sc, MUSB2_REG_INTTXE); if (on) @@ -1200,6 +1403,21 @@ else temp &= ~MUSB2_MASK_EPINT(ep_no); MUSB2_WRITE_2(sc, MUSB2_REG_INTTXE, temp); + +#ifdef MUSB2_DMA_ENABLED + if (on == 0) { + if (sc->sc_tx_dma[ep_no].busy) { + /* + * The USB driver uses a DMA delay + * so there is no need for an + * immediate DMA stop! + */ + musbotg_stop_txdma_async(ep_no); + } + sc->sc_tx_dma[ep_no].complete = 0; + sc->sc_tx_dma[ep_no].busy = 0; + } +#endif } } return; @@ -1453,23 +1671,31 @@ if (ep_dir == UE_DIR_IN) { + /* check if we support DMA */ + if (musbotg_support_txdma(ep_no)) { + temp = MUSB2_MASK_CSRH_TXDMAREQMODE | + MUSB2_MASK_CSRH_TXDMAREQENA; + } else { + temp = 0; + } + /* Configure endpoint */ switch (ep_type) { case UE_INTERRUPT: MUSB2_WRITE_1(sc, MUSB2_REG_TXMAXP, wMaxPacket); MUSB2_WRITE_1(sc, MUSB2_REG_TXCSRH, - MUSB2_MASK_CSRH_TXMODE); + MUSB2_MASK_CSRH_TXMODE | temp); break; case UE_ISOCHRONOUS: MUSB2_WRITE_1(sc, MUSB2_REG_TXMAXP, wMaxPacket); MUSB2_WRITE_1(sc, MUSB2_REG_TXCSRH, MUSB2_MASK_CSRH_TXMODE | - MUSB2_MASK_CSRH_TXISO); + MUSB2_MASK_CSRH_TXISO | temp); break; case UE_BULK: MUSB2_WRITE_1(sc, MUSB2_REG_TXMAXP, wMaxPacket); MUSB2_WRITE_1(sc, MUSB2_REG_TXCSRH, - MUSB2_MASK_CSRH_TXMODE); + MUSB2_MASK_CSRH_TXMODE | temp); break; default: break; @@ -1512,22 +1738,30 @@ } } else { + /* check if we support DMA */ + if (musbotg_support_rxdma(ep_no)) { + temp = MUSB2_MASK_CSRH_RXDMAREQMODE | + MUSB2_MASK_CSRH_RXDMAREQENA; + } else { + temp = 0; + } + /* Configure endpoint */ switch (ep_type) { case UE_INTERRUPT: MUSB2_WRITE_1(sc, MUSB2_REG_RXMAXP, wMaxPacket); MUSB2_WRITE_1(sc, MUSB2_REG_RXCSRH, - MUSB2_MASK_CSRH_RXNYET); + MUSB2_MASK_CSRH_RXNYET | temp); break; case UE_ISOCHRONOUS: MUSB2_WRITE_1(sc, MUSB2_REG_RXMAXP, wMaxPacket); MUSB2_WRITE_1(sc, MUSB2_REG_RXCSRH, MUSB2_MASK_CSRH_RXNYET | - MUSB2_MASK_CSRH_RXISO); + MUSB2_MASK_CSRH_RXISO | temp); break; case UE_BULK: MUSB2_WRITE_1(sc, MUSB2_REG_RXMAXP, wMaxPacket); - MUSB2_WRITE_1(sc, MUSB2_REG_RXCSRH, 0); + MUSB2_WRITE_1(sc, MUSB2_REG_RXCSRH, temp); break; default: break; @@ -1618,6 +1852,15 @@ sc->sc_bus.usbrev = USB_REV_2_0; sc->sc_bus.methods = &musbotg_bus_methods; +#ifdef MUSB2_DMA_ENABLED + /* initialise DMA structures */ + for (temp = 0; temp != 16; temp++) { + sc->sc_rx_dma[temp].sc = sc; + sc->sc_tx_dma[temp].sc = sc; + } + sc->sc_dma_align = musbotg_get_dma_align(); +#endif + mtx_lock(&sc->sc_bus.mtx); /* turn on clocks */ @@ -2732,6 +2975,17 @@ td->ep_no = ep_no; td->obj_next = last_obj; + /* check for DMA support */ + if ((xfer->endpoint & (UE_DIR_IN | + UE_DIR_OUT)) == UE_DIR_IN) { + if (musbotg_support_txdma(ep_no)) { + td->dma_enabled = 1; + } + } else { + if (musbotg_support_rxdma(ep_no)) { + td->dma_enabled = 1; + } + } last_obj = td; } parm->size[0] += sizeof(*td); ==== //depot/projects/usb/src/sys/dev/usb2/controller/musb2_otg.h#5 (text+ko) ==== @@ -288,9 +288,17 @@ bus_space_write_1((sc)->sc_io_tag, (sc)->sc_io_hdl, reg, data) struct musbotg_td; +struct musbotg_softc; typedef uint8_t (musbotg_cmd_t)(struct musbotg_td *td); +struct musbotg_dma { + struct musbotg_softc *sc; + uint8_t busy:1; + uint8_t complete:1; + uint8_t error:1; +}; + struct musbotg_td { struct musbotg_td *obj_next; musbotg_cmd_t *func; @@ -304,6 +312,7 @@ uint8_t short_pkt:1; uint8_t support_multi_buffer:1; uint8_t did_stall:1; + uint8_t dma_enabled:1; }; struct musbotg_std_temp { @@ -355,6 +364,10 @@ struct usb2_sw_transfer sc_root_intr; struct usb2_config_td sc_config_td; struct usb2_hw_ep_profile sc_hw_ep_profile[16]; +#ifdef MUSB2_DMA_ENABLED + struct musbotg_dma sc_rx_dma[16]; + struct musbotg_dma sc_tx_dma[16]; +#endif struct resource *sc_io_res; struct resource *sc_irq_res; @@ -367,7 +380,10 @@ void (*sc_clocks_off) (void *arg); void *sc_clocks_arg; - uint8_t sc_ep_max; /* maximum number of duplex endpoints */ + uint32_t sc_dma_align; /* DMA buffer alignment */ + + uint8_t sc_ep_max; /* maximum number of RX and TX + * endpoints supported */ uint8_t sc_rt_addr; /* root HUB address */ uint8_t sc_dv_addr; /* device address */ uint8_t sc_conf; /* root HUB config */ @@ -388,4 +404,20 @@ void musbotg_resume(struct musbotg_softc *sc); void musbotg_interrupt(struct musbotg_softc *sc); +#ifdef MUSB2_DMA_ENABLED +void musbotg_start_rxdma(void *arg, const void *dstaddr, uint32_t bytecount, uint32_t ep_no); +void musbotg_start_txdma(void *arg, const void *srcaddr, uint32_t bytecount, uint32_t ep_no); +void musbotg_stop_rxdma_async(uint32_t ep_no); +void musbotg_stop_txdma_async(uint32_t ep_no); +void musbotg_complete_dma_cb(void *arg, uint32_t is_error); +uint32_t musbotg_support_rxdma(uint32_t ep_no); +uint32_t musbotg_support_txdma(uint32_t ep_no); +uint32_t musbotg_get_dma_align(void); + +#else +#define musbotg_support_rxdma(...) 0 +#define musbotg_support_txdma(...) 0 +#define musbotg_get_dma_align(...) 0 +#endif + #endif /* _MUSB2_OTG_H_ */