Date: Sun, 12 Nov 2006 00:18:15 GMT From: Sam Leffler <sam@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 109763 for review Message-ID: <200611120018.kAC0IFSA097769@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=109763 Change 109763 by sam@sam_ebb on 2006/11/12 00:17:27 Revamp descriptor/buffer handling to try and improve performance and to get second port closer to working: o split per-packet h/w state from s/w state so the s/w state is in cached memory o add array of backpointers to the softc structs for npe_txdone to locate state given an NPE ID o assign separate rx qid's for each NPE; this allows us to avoid switching between softc's on each packet but means we lose interleaving of rx packets; may want to revisit this o correct macro that extracs the NPE ID from the qmgr entry as returned by the NPE o change txdone processing to empty the h/w q and link done buffers together for each device; then post them back to the tx_free list at the very end o eliminate sc_portid; this is no longer important; the NPE ID is the used so we can just reference the value from the npeconfig structure when needed Affected files ... .. //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npe.c#20 edit .. //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npereg.h#6 edit Differences ... ==== //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npe.c#20 (text+ko) ==== @@ -68,14 +68,23 @@ #include "miibus_if.h" +struct npebuf { + struct npebuf *ix_next; /* chain to next buffer */ + void *ix_m; /* backpointer to mbuf */ + bus_dmamap_t ix_map; /* bus dma map for associated data */ + struct npehwbuf *ix_hw; /* associated h/w block */ + uint32_t ix_neaddr; /* phys address of ix_hw */ +}; + struct npedma { const char* name; int nbuf; /* # npebuf's allocated */ bus_dma_tag_t mtag; /* bus dma tag for mbuf data */ - struct npebuf *buf; /* NPE buffers */ + struct npehwbuf *hwbuf; /* NPE h/w buffers */ bus_dma_tag_t buf_tag; /* tag+map for NPE buffers */ bus_dmamap_t buf_map; bus_addr_t buf_phys; /* phys addr of buffers */ + struct npebuf *buf; /* s/w buffers (1-1 w/ h/w) */ }; struct npe_softc { @@ -88,7 +97,6 @@ device_t sc_mii; /* child miibus */ bus_space_handle_t sc_miih; /* MII register window */ struct ixpnpe_softc *sc_npe; /* NPE support */ - int sc_portid; /* NPE port identification */ int sc_debug; /* DPRINTF* control */ int sc_tickinterval; struct callout tick_ch; /* Tick callout */ @@ -112,6 +120,7 @@ */ static const struct { const char *desc; /* device description */ + int npeid; /* NPE assignment */ int portid; /* NPE Ethernet port */ uint32_t imageid; /* NPE firmware image id */ uint32_t regbase; @@ -122,8 +131,9 @@ uint8_t rx_freeqid; uint8_t tx_qid; uint8_t tx_doneqid; -} npeconfig[] = { +} npeconfig[NPE_PORTS_MAX] = { { .desc = "IXP NPE-B", + .npeid = NPE_B, .portid = 0, .imageid = IXP425_NPE_B_IMAGEID, .regbase = IXP425_MAC_A_HWBASE, @@ -136,18 +146,20 @@ .tx_doneqid = 31 }, { .desc = "IXP NPE-C", + .npeid = NPE_C, .portid = 1, .imageid = IXP425_NPE_C_IMAGEID, .regbase = IXP425_MAC_B_HWBASE, .regsize = IXP425_MAC_B_SIZE, .miibase = IXP425_MAC_A_HWBASE, .miisize = IXP425_MAC_A_SIZE, - .rx_qid = 4, + .rx_qid = 12, .rx_freeqid = 28, .tx_qid = 25, .tx_doneqid = 31 }, }; +static struct npe_softc *npes[NPE_MAX]; /* NB: indexed by npeid */ static __inline uint32_t RD4(struct npe_softc *sc, bus_size_t off) @@ -198,9 +210,8 @@ static int npe_setloopback(struct npe_softc *, int ena); #endif -/* NB: all tx+rx traffic goes through one queue */ +/* NB: all tx done processing goes through one queue */ static int tx_doneqid = -1; -static int rx_qid = -1; SYSCTL_NODE(_hw, OID_AUTO, npe, CTLFLAG_RD, 0, "IXP425 NPE driver parameters"); @@ -231,17 +242,15 @@ static int npe_probe(device_t dev) { -#define N(a) (sizeof(a)/sizeof(a[0])) int unit = device_get_unit(dev); - if (unit >= N(npeconfig)) { + if (unit >= NPE_PORTS_MAX) { device_printf(dev, "unit %d not supported\n", unit); return EINVAL; } /* XXX check feature register to see if enabled */ device_set_desc(dev, npeconfig[unit].desc); return 0; -#undef N } static int @@ -398,6 +407,7 @@ npe_dma_setup(struct npe_softc *sc, struct npedma *dma, const char *name, int nbuf, int maxseg) { + int portid = npeconfig[device_get_unit(sc->sc_dev)].portid; int error, i; memset(dma, 0, sizeof(dma)); @@ -406,8 +416,9 @@ dma->nbuf = nbuf; /* DMA tag for mapped mbufs */ - error = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT, - BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, maxseg, MCLBYTES, 0, + error = bus_dma_tag_create(NULL, 1, 0, + BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, + MCLBYTES, maxseg, MCLBYTES, 0, busdma_lock_mutex, &sc->sc_mtx, &dma->mtag); if (error != 0) { device_printf(sc->sc_dev, "unable to create %s mbuf dma tag, " @@ -416,11 +427,11 @@ } /* DMA tag and map for the NPE buffers */ - error = bus_dma_tag_create(NULL, sizeof(struct npebuf), 0, + error = bus_dma_tag_create(NULL, sizeof(uint32_t), 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, - nbuf * sizeof(struct npebuf), 1, - nbuf * sizeof(struct npebuf), 0, busdma_lock_mutex, - &sc->sc_mtx, &dma->buf_tag); + nbuf * sizeof(struct npehwbuf), 1, + nbuf * sizeof(struct npehwbuf), 0, + busdma_lock_mutex, &sc->sc_mtx, &dma->buf_tag); if (error != 0) { device_printf(sc->sc_dev, "unable to create %s npebuf dma tag, error %u\n", @@ -428,29 +439,37 @@ return error; } /* XXX COHERENT for now */ - if (bus_dmamem_alloc(dma->buf_tag, (void **)&dma->buf, + if (bus_dmamem_alloc(dma->buf_tag, (void **)&dma->hwbuf, BUS_DMA_NOWAIT | BUS_DMA_ZERO | BUS_DMA_COHERENT, &dma->buf_map) != 0) { device_printf(sc->sc_dev, - "unable to allocate memory for %s npebuf's, error %u\n", + "unable to allocate memory for %s h/w buffers, error %u\n", dma->name, error); return error; } + /* XXX M_TEMP */ + dma->buf = malloc(nbuf * sizeof(struct npebuf), M_TEMP, M_NOWAIT | M_ZERO); + if (dma->buf == NULL) { + device_printf(sc->sc_dev, + "unable to allocate memory for %s s/w buffers\n", + dma->name); + return error; + } if (bus_dmamap_load(dma->buf_tag, dma->buf_map, - dma->buf, nbuf * sizeof(struct npebuf), npe_getaddr, sc, 0) != 0) { + dma->hwbuf, nbuf*sizeof(struct npehwbuf), npe_getaddr, sc, 0) != 0) { device_printf(sc->sc_dev, - "unable to load memory for %s npebuf's, error %u\n", + "unable to map memory for %s h/w buffers, error %u\n", dma->name, error); return error; } dma->buf_phys = sc->buf_phys; for (i = 0; i < dma->nbuf; i++) { struct npebuf *npe = &dma->buf[i]; + struct npehwbuf *hw = &dma->hwbuf[i]; /* calculate offset to shared area */ - npe->ix_neaddr = dma->buf_phys - + (i * sizeof(struct npebuf)) - + offsetof(struct npebuf, ix_ne); + npe->ix_neaddr = dma->buf_phys + + ((uintptr_t)hw - (uintptr_t)dma->hwbuf); KASSERT((npe->ix_neaddr & 0x1f) == 0, ("ixpbuf misaligned, PA 0x%x", npe->ix_neaddr)); error = bus_dmamap_create(dma->mtag, BUS_DMA_NOWAIT, @@ -462,7 +481,8 @@ return error; } /* add port id once */ - npe->ix_neaddr |= sc->sc_portid << 3; + npe->ix_neaddr |= portid << 3; + npe->ix_hw = hw; } bus_dmamap_sync(dma->buf_tag, dma->buf_map, BUS_DMASYNC_PREWRITE); return 0; @@ -473,15 +493,17 @@ { int i; - if (dma->buf != NULL) { + if (dma->hwbuf != NULL) { for (i = 0; i < dma->nbuf; i++) { struct npebuf *npe = &dma->buf[i]; bus_dmamap_destroy(dma->mtag, npe->ix_map); } bus_dmamap_unload(dma->buf_tag, dma->buf_map); - bus_dmamem_free(dma->buf_tag, dma->buf, dma->buf_map); + bus_dmamem_free(dma->buf_tag, dma->hwbuf, dma->buf_map); bus_dmamap_destroy(dma->buf_tag, dma->buf_map); } + if (dma->buf != NULL) + free(dma->buf, M_TEMP); if (dma->buf_tag) bus_dma_tag_destroy(dma->buf_tag); if (dma->mtag) @@ -496,12 +518,12 @@ int unit = device_get_unit(dev); int error, i; + /* load NPE firmware and start it running */ error = ixpnpe_init(sc->sc_npe, "npe_fw", npeconfig[unit].imageid); if (error != 0) return error; - sc->sc_portid = npeconfig[unit].portid; if (bus_space_map(sc->sc_iot, npeconfig[unit].regbase, npeconfig[unit].regsize, 0, &sc->sc_ioh)) { device_printf(dev, "Cannot map registers 0x%x:0x%x\n", @@ -532,9 +554,9 @@ return error; /* setup statistics block */ - error = bus_dma_tag_create(NULL, 4, 0, BUS_SPACE_MAXADDR_32BIT, - BUS_SPACE_MAXADDR, NULL, NULL, sizeof(struct npestats), 1, - sizeof(struct npestats), 0, + error = bus_dma_tag_create(NULL, sizeof(uint32_t), 0, + BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, + sizeof(struct npestats), 1, sizeof(struct npestats), 0, busdma_lock_mutex, &sc->sc_mtx, &sc->sc_stats_tag); if (error != 0) { device_printf(sc->sc_dev, "unable to create stats tag, " @@ -576,12 +598,9 @@ * changed at the time the q is configured. */ sc->rx_qid = npeconfig[unit].rx_qid; + ixpqmgr_qconfig(sc->rx_qid, npe_rxbuf, 0, 1, + IX_QMGR_Q_SOURCE_ID_NOT_E, npe_rxdone, sc); sc->rx_freeqid = npeconfig[unit].rx_freeqid; - if (rx_qid == -1) { - ixpqmgr_qconfig(sc->rx_qid, npe_rxbuf, 0, 1, - IX_QMGR_Q_SOURCE_ID_NOT_E, npe_rxdone, sc); - rx_qid = sc->rx_qid; - } ixpqmgr_qconfig(sc->rx_freeqid, npe_rxbuf, 0, npe_rxbuf/2, 0, NULL, sc); /* tell the NPE to direct all traffic to rx_qid */ #if 0 @@ -601,6 +620,10 @@ tx_doneqid = sc->tx_doneqid; } + KASSERT(npes[npeconfig[unit].npeid] == NULL, + ("npe %u already setup", npeconfig[unit].npeid)); + npes[npeconfig[unit].npeid] = sc; + return 0; } @@ -608,7 +631,10 @@ npe_deactivate(device_t dev) { struct npe_softc *sc = device_get_softc(dev); + int unit = device_get_unit(dev); + npes[npeconfig[unit].npeid] = NULL; + /* XXX disable q's */ if (sc->sc_npe != NULL) ixpnpe_stop(sc->sc_npe); @@ -756,43 +782,73 @@ eaddr[5] = RD4(sc, NPE_MAC_UNI_ADDR_6) & 0xff; } +struct txdone { + struct npebuf *head; + struct npebuf **tail; + int count; +}; + +static __inline void +npe_txdone_finish(struct npe_softc *sc, const struct txdone *td) +{ + struct ifnet *ifp = sc->sc_ifp; + + NPE_LOCK(sc); + *td->tail = sc->tx_free; + sc->tx_free = td->head; + /* + * We're no longer busy, so clear the busy flag and call the + * start routine to xmit more packets. + */ + ifp->if_opackets += td->count; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + ifp->if_timer = 0; + npestart_locked(ifp); + NPE_UNLOCK(sc); +} + /* * Q manager callback on tx done queue. Reap mbufs * and return tx buffers to the free list. Finally - * restart output. - * XXX multiple NPE's + * restart output. Note the microcode has only one + * txdone q wired into it so we must use the port id + * returned with each npebuf to decide where to send + * buffers. */ static void npe_txdone(int qid, void *arg) { -/* NB: the / handles the offset to ix_ne */ -#define P2V(a) &dma->buf[((a) - dma->buf_phys) / sizeof(struct npebuf)] - struct npe_softc *sc = arg; - struct ifnet *ifp = sc->sc_ifp; - struct npedma *dma = &sc->txdma; +#define P2V(a, dma) \ + &(dma)->buf[((a) - (dma)->buf_phys) / sizeof(struct npehwbuf)] + struct npe_softc *sc0 = arg; + struct npe_softc *sc; + struct npebuf *npe; + struct txdone *td, q[NPE_MAX]; uint32_t entry; - NPE_LOCK(sc); + /* XXX no NPE-A support */ + q[NPE_B].tail = &q[NPE_B].head; q[NPE_B].count = 0; + q[NPE_C].tail = &q[NPE_C].head; q[NPE_C].count = 0; /* XXX max # at a time? */ while (ixpqmgr_qread(qid, &entry) == 0) { - struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry)); + DPRINTF(sc0, "%s: entry 0x%x NPE %u port %u\n", + __func__, entry, NPE_QM_Q_NPE(entry), NPE_QM_Q_PORT(entry)); - DPRINTF(sc, "%s: entry 0x%x ne_addr 0x%x\n", - __func__, entry, npe->ix_neaddr); + sc = npes[NPE_QM_Q_NPE(entry)]; + npe = P2V(NPE_QM_Q_ADDR(entry), &sc->txdma); m_freem(npe->ix_m); npe->ix_m = NULL; - npe->ix_next = sc->tx_free; - sc->tx_free = npe; - ifp->if_opackets++; + + td = &q[NPE_QM_Q_NPE(entry)]; + *td->tail = npe; + td->tail = &npe->ix_next; + td->count++; } - /* - * We're no longer busy, so clear the busy flag and call the - * start routine to xmit more packets. - */ - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - ifp->if_timer = 0; - npestart_locked(ifp); - NPE_UNLOCK(sc); + + if (q[NPE_B].count) + npe_txdone_finish(npes[NPE_B], &q[NPE_B]); + if (q[NPE_C].count) + npe_txdone_finish(npes[NPE_C], &q[NPE_C]); #undef P2V } @@ -801,6 +857,7 @@ { bus_dma_segment_t segs[1]; struct npedma *dma = &sc->rxdma; + struct npehwbuf *hw; int error, nseg; if (m == NULL) { @@ -819,11 +876,12 @@ m_freem(m); return error; } - npe->ix_ne[0].data = htobe32(segs[0].ds_addr); + hw = npe->ix_hw; + hw->ix_ne[0].data = htobe32(segs[0].ds_addr); /* NB: NPE requires length be a multiple of 64 */ /* NB: buffer length is shifted in word */ - npe->ix_ne[0].len = htobe32(segs[0].ds_len << 16); - npe->ix_ne[0].next = 0; + hw->ix_ne[0].len = htobe32(segs[0].ds_len << 16); + hw->ix_ne[0].next = 0; npe->ix_m = m; /* Flush the memory in the mbuf */ bus_dmamap_sync(dma->mtag, npe->ix_map, BUS_DMASYNC_PREREAD); @@ -831,26 +889,25 @@ } /* - * Q manager callback on rx. Claim entries from the - * hardware queue and pass the frames up the stack. - * Pass the rx buffers to the free list. + * RX q processing for a specific NPE. Claim entries + * from the hardware queue and pass the frames up the + * stack. Pass the rx buffers to the free list. */ static void npe_rxdone(int qid, void *arg) { -#define P2V(a) &dma->buf[((a) - dma->buf_phys) / sizeof(struct npebuf)] +#define P2V(a, dma) \ + &(dma)->buf[((a) - (dma)->buf_phys) / sizeof(struct npehwbuf)] struct npe_softc *sc = arg; - struct ifnet *ifp = sc->sc_ifp; struct npedma *dma = &sc->rxdma; uint32_t entry; - bus_dmamap_sync(dma->buf_tag, dma->buf_map, BUS_DMASYNC_POSTREAD); while (ixpqmgr_qread(qid, &entry) == 0) { - struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry)); + struct npebuf *npe = P2V(NPE_QM_Q_ADDR(entry), dma); struct mbuf *m; DPRINTF(sc, "%s: entry 0x%x neaddr 0x%x ne_len 0x%x\n", - __func__, entry, npe->ix_neaddr, npe->ix_ne[0].len); + __func__, entry, npe->ix_neaddr, npe->ix_hw->ix_ne[0].len); /* * Allocate a new mbuf to replenish the rx buffer. * If doing so fails we drop the rx'd frame so we @@ -862,13 +919,16 @@ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); if (m != NULL) { struct mbuf *mrx = npe->ix_m; + struct npehwbuf *hw = npe->ix_hw; + struct ifnet *ifp = sc->sc_ifp; /* Flush mbuf memory for rx'd data */ bus_dmamap_sync(dma->mtag, npe->ix_map, BUS_DMASYNC_POSTREAD); + /* XXX flush hw buffer; works now 'cuz coherent */ /* set m_len etc. per rx frame size */ - mrx->m_len = be32toh(npe->ix_ne[0].len) & 0xffff; + mrx->m_len = be32toh(hw->ix_ne[0].len) & 0xffff; mrx->m_pkthdr.len = mrx->m_len; mrx->m_pkthdr.rcvif = ifp; mrx->m_flags |= M_HASFCS; @@ -1100,6 +1160,7 @@ { struct npe_softc *sc = ifp->if_softc; struct npebuf *npe; + struct npehwbuf *hw; struct mbuf *m, *n; struct npedma *dma = &sc->txdma; bus_dma_segment_t segs[NPE_MAXSEG]; @@ -1149,22 +1210,23 @@ BPF_MTAP(ifp, m); npe->ix_m = m; + hw = npe->ix_hw; len = m->m_pkthdr.len; - next = npe->ix_neaddr + sizeof(npe->ix_ne[0]); + next = npe->ix_neaddr + sizeof(hw->ix_ne[0]); for (i = 0; i < nseg; i++) { - npe->ix_ne[i].data = htobe32(segs[i].ds_addr); - npe->ix_ne[i].len = htobe32((segs[i].ds_len<<16) | len); - npe->ix_ne[i].next = htobe32(next); + hw->ix_ne[i].data = htobe32(segs[i].ds_addr); + hw->ix_ne[i].len = htobe32((segs[i].ds_len<<16) | len); + hw->ix_ne[i].next = htobe32(next); len = 0; /* zero for segments > 1 */ - next += sizeof(npe->ix_ne[0]); + next += sizeof(hw->ix_ne[0]); } - npe->ix_ne[i-1].next = 0; /* zero last in chain */ + hw->ix_ne[i-1].next = 0; /* zero last in chain */ /* XXX flush descriptor instead of using uncached memory */ DPRINTF(sc, "%s: qwrite(%u, 0x%x) ne_data %x ne_len 0x%x\n", __func__, sc->tx_qid, npe->ix_neaddr, - npe->ix_ne[0].data, npe->ix_ne[0].len); + hw->ix_ne[0].data, hw->ix_ne[0].len); /* stick it on the tx q */ /* XXX add vlan priority */ ixpqmgr_qwrite(sc->tx_qid, npe->ix_neaddr); @@ -1320,9 +1382,10 @@ static int npe_setrxqosentry(struct npe_softc *sc, int classix, int trafclass, int qid) { + int portid = npeconfig[device_get_unit(sc->sc_dev)].portid; uint32_t msg[2]; - msg[0] = (NPE_SETRXQOSENTRY << 24) | (sc->sc_portid << 16) | classix; + msg[0] = (NPE_SETRXQOSENTRY << 24) | (portid << 16) | classix; msg[1] = (trafclass << 24) | (1 << 23) | (qid << 16) | (qid << 4); return ixpnpe_sendandrecvmsg(sc->sc_npe, msg, msg); } ==== //depot/projects/arm/src/sys/arm/xscale/ixp425/if_npereg.h#6 (text+ko) ==== @@ -73,12 +73,7 @@ */ #define NPE_MAXSEG 3 /* empirically selected */ -struct npebuf { - struct npebuf *ix_next; /* chain to next buffer */ - void *ix_m; /* backpointer to mbuf */ - uint32_t ix_neaddr; /* phys address of ix_ne */ - bus_dmamap_t ix_map; /* bus dma map for associated data */ - uint32_t ix_reserved[4]; +struct npehwbuf { struct { /* NPE shared area, cacheline aligned */ uint32_t next; /* phys addr of next segment */ uint32_t len; /* buffer/segment length (bytes) */ @@ -87,7 +82,13 @@ } ix_ne[NPE_MAXSEG]; }; -#define NPE_PORTS_MAX 3 +/* NPE ID's */ +#define NPE_A 0 +#define NPE_B 1 +#define NPE_C 2 +#define NPE_MAX (NPE_C+1) + +#define NPE_PORTS_MAX 2 /* logical ports */ #define NPE_FRAME_SIZE_DEFAULT 1536 #define NPE_FRAME_SIZE_MAX (65536-64) #define NPE_FRAME_SIZE_MIN 64 @@ -98,7 +99,7 @@ * These define the layout of 32-bit Q entries passed * between the host cpu and the NPE's. */ -#define NPE_QM_Q_NPE(e) (((e)>>0)&0x1) /* NPE ID */ +#define NPE_QM_Q_NPE(e) (((e)>>0)&0x3) /* NPE ID */ #define NPE_QM_Q_PORT(e) (((e)>>3)&0x1) /* Port ID */ #define NPE_QM_Q_PRIO(e) (((e)>>0)&0x3) /* 802.1d priority */ #define NPE_QM_Q_ADDR(e) ((e)&0xfffffffe0) /* phys address */
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200611120018.kAC0IFSA097769>