Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 4 Jan 2012 21:50:59 +0000 (UTC)
From:      Pyun YongHyeon <yongari@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r229524 - stable/9/sys/dev/msk
Message-ID:  <201201042150.q04LoxpY097898@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: yongari
Date: Wed Jan  4 21:50:59 2012
New Revision: 229524
URL: http://svn.freebsd.org/changeset/base/229524

Log:
  MFC r227582:
    Enable 64bit DMA addressing support for all msk(4) controllers.
    Unnecessarily complex LE format used on Marvell controller was
    main reason not to enable 64bit DMA addressing in driver.  If high
    32bit address of DMA address of TX/RX buffer is changed, driver has
    to generate a new LE.  In TX path, driver will keep track of lastly
    used high 32bit address of DMA address and generate a new LE
    whenever it sees high address change in the DMA address. In RX path,
    driver will always use two LEs to specify 64bit DMA address of RX
    buffer.  If the high 32bit address of DMA address of RX buffer is
    the same as previous DMA address of RX buffer, driver does not have
    to use two LEs but driver will use two LEs for simplicity in RX
    ring management.
  
    One of draw back for switching to 64bit DMA addressing is that the
    large amount of LEs are used to specify 64bit DMA address such that
    number of available LEs for TX/RX buffers are considerably reduced.
    To mitigate the issue, increase number of available LEs from 256 to
    384 for TX and from 256 to 512 for RX. For 32bit architectures,
    msk(4) does not use 64bit DMA addressing to save resources.

Modified:
  stable/9/sys/dev/msk/if_msk.c
  stable/9/sys/dev/msk/if_mskreg.h
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)

Modified: stable/9/sys/dev/msk/if_msk.c
==============================================================================
--- stable/9/sys/dev/msk/if_msk.c	Wed Jan  4 21:28:49 2012	(r229523)
+++ stable/9/sys/dev/msk/if_msk.c	Wed Jan  4 21:50:59 2012	(r229524)
@@ -692,7 +692,7 @@ msk_init_rx_ring(struct msk_if_softc *sc
 {
 	struct msk_ring_data *rd;
 	struct msk_rxdesc *rxd;
-	int i, prod;
+	int i, nbuf, prod;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
@@ -702,11 +702,18 @@ msk_init_rx_ring(struct msk_if_softc *sc
 
 	rd = &sc_if->msk_rdata;
 	bzero(rd->msk_rx_ring, sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT);
-	prod = sc_if->msk_cdata.msk_rx_prod;
-	i = 0;
+	for (i = prod = 0; i < MSK_RX_RING_CNT; i++) {
+		rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
+		rxd->rx_m = NULL;
+		rxd->rx_le = &rd->msk_rx_ring[prod];
+		MSK_INC(prod, MSK_RX_RING_CNT);
+	}
+	nbuf = MSK_RX_BUF_CNT;
+	prod = 0;
 	/* Have controller know how to compute Rx checksum. */
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
+#ifdef MSK_64BIT_DMA
 		rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_rx_ring[prod];
@@ -715,15 +722,21 @@ msk_init_rx_ring(struct msk_if_softc *sc
 		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
 		MSK_INC(prod, MSK_RX_RING_CNT);
 		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
-		i++;
-	}
-	for (; i < MSK_RX_RING_CNT; i++) {
+#endif
 		rxd = &sc_if->msk_cdata.msk_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_rx_ring[prod];
+		rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
+		    ETHER_HDR_LEN);
+		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
+		MSK_INC(prod, MSK_RX_RING_CNT);
+		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
+		nbuf--;
+	}
+	for (i = 0; i < nbuf; i++) {
 		if (msk_newbuf(sc_if, prod) != 0)
 			return (ENOBUFS);
-		MSK_INC(prod, MSK_RX_RING_CNT);
+		MSK_RX_INC(prod, MSK_RX_RING_CNT);
 	}
 
 	bus_dmamap_sync(sc_if->msk_cdata.msk_rx_ring_tag,
@@ -731,10 +744,11 @@ msk_init_rx_ring(struct msk_if_softc *sc
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
 	/* Update prefetch unit. */
-	sc_if->msk_cdata.msk_rx_prod = MSK_RX_RING_CNT - 1;
+	sc_if->msk_cdata.msk_rx_prod = prod;
 	CSR_WRITE_2(sc_if->msk_softc,
 	    Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
-	    sc_if->msk_cdata.msk_rx_prod);
+	    (sc_if->msk_cdata.msk_rx_prod + MSK_RX_RING_CNT - 1) %
+	    MSK_RX_RING_CNT);
 	if (msk_rx_fill(sc_if, 0) != 0)
 		return (ENOBUFS);
 	return (0);
@@ -745,7 +759,7 @@ msk_init_jumbo_rx_ring(struct msk_if_sof
 {
 	struct msk_ring_data *rd;
 	struct msk_rxdesc *rxd;
-	int i, prod;
+	int i, nbuf, prod;
 
 	MSK_IF_LOCK_ASSERT(sc_if);
 
@@ -756,11 +770,18 @@ msk_init_jumbo_rx_ring(struct msk_if_sof
 	rd = &sc_if->msk_rdata;
 	bzero(rd->msk_jumbo_rx_ring,
 	    sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT);
-	prod = sc_if->msk_cdata.msk_rx_prod;
-	i = 0;
+	for (i = prod = 0; i < MSK_JUMBO_RX_RING_CNT; i++) {
+		rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
+		rxd->rx_m = NULL;
+		rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
+		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
+	}
+	nbuf = MSK_RX_BUF_CNT;
+	prod = 0;
 	/* Have controller know how to compute Rx checksum. */
 	if ((sc_if->msk_flags & MSK_FLAG_DESCV2) == 0 &&
 	    (sc_if->msk_ifp->if_capenable & IFCAP_RXCSUM) != 0) {
+#ifdef MSK_64BIT_DMA
 		rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
@@ -769,25 +790,33 @@ msk_init_jumbo_rx_ring(struct msk_if_sof
 		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
 		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
 		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
-		i++;
-	}
-	for (; i < MSK_JUMBO_RX_RING_CNT; i++) {
+#endif
 		rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[prod];
 		rxd->rx_m = NULL;
 		rxd->rx_le = &rd->msk_jumbo_rx_ring[prod];
+		rxd->rx_le->msk_addr = htole32(ETHER_HDR_LEN << 16 |
+		    ETHER_HDR_LEN);
+		rxd->rx_le->msk_control = htole32(OP_TCPSTART | HW_OWNER);
+		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
+		MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
+		nbuf--;
+	}
+	for (i = 0; i < nbuf; i++) {
 		if (msk_jumbo_newbuf(sc_if, prod) != 0)
 			return (ENOBUFS);
-		MSK_INC(prod, MSK_JUMBO_RX_RING_CNT);
+		MSK_RX_INC(prod, MSK_JUMBO_RX_RING_CNT);
 	}
 
 	bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 	    sc_if->msk_cdata.msk_jumbo_rx_ring_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 
-	sc_if->msk_cdata.msk_rx_prod = MSK_JUMBO_RX_RING_CNT - 1;
+	/* Update prefetch unit. */
+	sc_if->msk_cdata.msk_rx_prod = prod;
 	CSR_WRITE_2(sc_if->msk_softc,
 	    Y2_PREF_Q_ADDR(sc_if->msk_rxq, PREF_UNIT_PUT_IDX_REG),
-	    sc_if->msk_cdata.msk_rx_prod);
+	    (sc_if->msk_cdata.msk_rx_prod + MSK_JUMBO_RX_RING_CNT - 1) %
+	    MSK_JUMBO_RX_RING_CNT);
 	if (msk_rx_fill(sc_if, 1) != 0)
 		return (ENOBUFS);
 	return (0);
@@ -805,6 +834,7 @@ msk_init_tx_ring(struct msk_if_softc *sc
 	sc_if->msk_cdata.msk_tx_prod = 0;
 	sc_if->msk_cdata.msk_tx_cons = 0;
 	sc_if->msk_cdata.msk_tx_cnt = 0;
+	sc_if->msk_cdata.msk_tx_high_addr = 0;
 
 	rd = &sc_if->msk_rdata;
 	bzero(rd->msk_tx_ring, sizeof(struct msk_tx_desc) * MSK_TX_RING_CNT);
@@ -826,6 +856,12 @@ msk_discard_rxbuf(struct msk_if_softc *s
 	struct msk_rxdesc *rxd;
 	struct mbuf *m;
 
+#ifdef MSK_64BIT_DMA
+	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
+	rx_le = rxd->rx_le;
+	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+	MSK_INC(idx, MSK_RX_RING_CNT);
+#endif
 	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
 	m = rxd->rx_m;
 	rx_le = rxd->rx_le;
@@ -839,6 +875,12 @@ msk_discard_jumbo_rxbuf(struct msk_if_so
 	struct msk_rxdesc *rxd;
 	struct mbuf *m;
 
+#ifdef MSK_64BIT_DMA
+	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
+	rx_le = rxd->rx_le;
+	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+	MSK_INC(idx, MSK_JUMBO_RX_RING_CNT);
+#endif
 	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
 	m = rxd->rx_m;
 	rx_le = rxd->rx_le;
@@ -876,10 +918,18 @@ msk_newbuf(struct msk_if_softc *sc_if, i
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
+#ifdef MSK_64BIT_DMA
+	rx_le = rxd->rx_le;
+	rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr));
+	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+	MSK_INC(idx, MSK_RX_RING_CNT);
+	rxd = &sc_if->msk_cdata.msk_rxdesc[idx];
+#endif
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_sync(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap,
 		    BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc_if->msk_cdata.msk_rx_tag, rxd->rx_dmamap);
+		rxd->rx_m = NULL;
 	}
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc_if->msk_cdata.msk_rx_sparemap;
@@ -929,11 +979,19 @@ msk_jumbo_newbuf(struct msk_if_softc *sc
 	KASSERT(nsegs == 1, ("%s: %d segments returned!", __func__, nsegs));
 
 	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
+#ifdef MSK_64BIT_DMA
+	rx_le = rxd->rx_le;
+	rx_le->msk_addr = htole32(MSK_ADDR_HI(segs[0].ds_addr));
+	rx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+	MSK_INC(idx, MSK_JUMBO_RX_RING_CNT);
+	rxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[idx];
+#endif
 	if (rxd->rx_m != NULL) {
 		bus_dmamap_sync(sc_if->msk_cdata.msk_jumbo_rx_tag,
 		    rxd->rx_dmamap, BUS_DMASYNC_POSTREAD);
 		bus_dmamap_unload(sc_if->msk_cdata.msk_jumbo_rx_tag,
 		    rxd->rx_dmamap);
+		rxd->rx_m = NULL;
 	}
 	map = rxd->rx_dmamap;
 	rxd->rx_dmamap = sc_if->msk_cdata.msk_jumbo_rx_sparemap;
@@ -1464,7 +1522,7 @@ mskc_reset(struct msk_softc *sc)
 
 	/* Clear status list. */
 	bzero(sc->msk_stat_ring,
-	    sizeof(struct msk_stat_desc) * MSK_STAT_RING_CNT);
+	    sizeof(struct msk_stat_desc) * sc->msk_stat_count);
 	sc->msk_stat_cons = 0;
 	bus_dmamap_sync(sc->msk_stat_tag, sc->msk_stat_map,
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
@@ -1475,7 +1533,7 @@ mskc_reset(struct msk_softc *sc)
 	CSR_WRITE_4(sc, STAT_LIST_ADDR_LO, MSK_ADDR_LO(addr));
 	CSR_WRITE_4(sc, STAT_LIST_ADDR_HI, MSK_ADDR_HI(addr));
 	/* Set the status list last index. */
-	CSR_WRITE_2(sc, STAT_LAST_IDX, MSK_STAT_RING_CNT - 1);
+	CSR_WRITE_2(sc, STAT_LAST_IDX, sc->msk_stat_count - 1);
 	if (sc->msk_hw_id == CHIP_ID_YUKON_EC &&
 	    sc->msk_hw_rev == CHIP_REV_YU_EC_A1) {
 		/* WA for dev. #4.3 */
@@ -2075,17 +2133,29 @@ static int
 msk_status_dma_alloc(struct msk_softc *sc)
 {
 	struct msk_dmamap_arg ctx;
-	int error;
+	bus_size_t stat_sz;
+	int count, error;
 
+	/*
+	 * It seems controller requires number of status LE entries
+	 * is power of 2 and the maximum number of status LE entries
+	 * is 4096.  For dual-port controllers, the number of status
+	 * LE entries should be large enough to hold both port's
+	 * status updates.
+	 */
+	count = 3 * MSK_RX_RING_CNT + MSK_TX_RING_CNT;
+	count = imin(4096, roundup2(count, 1024));
+	sc->msk_stat_count = count;
+	stat_sz = count * sizeof(struct msk_stat_desc);
 	error = bus_dma_tag_create(
 		    bus_get_dma_tag(sc->msk_dev),	/* parent */
 		    MSK_STAT_ALIGN, 0,		/* alignment, boundary */
 		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
-		    MSK_STAT_RING_SZ,		/* maxsize */
+		    stat_sz,			/* maxsize */
 		    1,				/* nsegments */
-		    MSK_STAT_RING_SZ,		/* maxsegsize */
+		    stat_sz,			/* maxsegsize */
 		    0,				/* flags */
 		    NULL, NULL,			/* lockfunc, lockarg */
 		    &sc->msk_stat_tag);
@@ -2106,9 +2176,8 @@ msk_status_dma_alloc(struct msk_softc *s
 	}
 
 	ctx.msk_busaddr = 0;
-	error = bus_dmamap_load(sc->msk_stat_tag,
-	    sc->msk_stat_map, sc->msk_stat_ring, MSK_STAT_RING_SZ,
-	    msk_dmamap_cb, &ctx, 0);
+	error = bus_dmamap_load(sc->msk_stat_tag, sc->msk_stat_map,
+	    sc->msk_stat_ring, stat_sz, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc->msk_dev,
 		    "failed to load DMA'able memory for status ring\n");
@@ -2149,27 +2218,10 @@ msk_txrx_dma_alloc(struct msk_if_softc *
 	int error, i;
 
 	/* Create parent DMA tag. */
-	/*
-	 * XXX
-	 * It seems that Yukon II supports full 64bits DMA operations. But
-	 * it needs two descriptors(list elements) for 64bits DMA operations.
-	 * Since we don't know what DMA address mappings(32bits or 64bits)
-	 * would be used in advance for each mbufs, we limits its DMA space
-	 * to be in range of 32bits address space. Otherwise, we should check
-	 * what DMA address is used and chain another descriptor for the
-	 * 64bits DMA operation. This also means descriptor ring size is
-	 * variable. Limiting DMA address to be in 32bit address space greatly
-	 * simplifies descriptor handling and possibly would increase
-	 * performance a bit due to efficient handling of descriptors.
-	 * Apart from harassing checksum offloading mechanisms, it seems
-	 * it's really bad idea to use a separate descriptor for 64bit
-	 * DMA operation to save small descriptor memory. Anyway, I've
-	 * never seen these exotic scheme on ethernet interface hardware.
-	 */
 	error = bus_dma_tag_create(
 		    bus_get_dma_tag(sc_if->msk_if_dev),	/* parent */
 		    1, 0,			/* alignment, boundary */
-		    BUS_SPACE_MAXADDR_32BIT,	/* lowaddr */
+		    BUS_SPACE_MAXADDR,		/* lowaddr */
 		    BUS_SPACE_MAXADDR,		/* highaddr */
 		    NULL, NULL,			/* filter, filterarg */
 		    BUS_SPACE_MAXSIZE_32BIT,	/* maxsize */
@@ -2275,7 +2327,7 @@ msk_txrx_dma_alloc(struct msk_if_softc *
 	ctx.msk_busaddr = 0;
 	error = bus_dmamap_load(sc_if->msk_cdata.msk_tx_ring_tag,
 	    sc_if->msk_cdata.msk_tx_ring_map, sc_if->msk_rdata.msk_tx_ring,
-	    MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, 0);
+	    MSK_TX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to load DMA'able memory for Tx ring\n");
@@ -2296,7 +2348,7 @@ msk_txrx_dma_alloc(struct msk_if_softc *
 	ctx.msk_busaddr = 0;
 	error = bus_dmamap_load(sc_if->msk_cdata.msk_rx_ring_tag,
 	    sc_if->msk_cdata.msk_rx_ring_map, sc_if->msk_rdata.msk_rx_ring,
-	    MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, 0);
+	    MSK_RX_RING_SZ, msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to load DMA'able memory for Rx ring\n");
@@ -2413,7 +2465,7 @@ msk_rx_dma_jalloc(struct msk_if_softc *s
 	error = bus_dmamap_load(sc_if->msk_cdata.msk_jumbo_rx_ring_tag,
 	    sc_if->msk_cdata.msk_jumbo_rx_ring_map,
 	    sc_if->msk_rdata.msk_jumbo_rx_ring, MSK_JUMBO_RX_RING_SZ,
-	    msk_dmamap_cb, &ctx, 0);
+	    msk_dmamap_cb, &ctx, BUS_DMA_NOWAIT);
 	if (error != 0) {
 		device_printf(sc_if->msk_if_dev,
 		    "failed to load DMA'able memory for jumbo Rx ring\n");
@@ -2773,6 +2825,18 @@ msk_encap(struct msk_if_softc *sc_if, st
 		}
 	}
 
+#ifdef MSK_64BIT_DMA
+	if (MSK_ADDR_HI(txsegs[0].ds_addr) !=
+	    sc_if->msk_cdata.msk_tx_high_addr) {
+		sc_if->msk_cdata.msk_tx_high_addr =
+		    MSK_ADDR_HI(txsegs[0].ds_addr);
+		tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+		tx_le->msk_addr = htole32(MSK_ADDR_HI(txsegs[0].ds_addr));
+		tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+		sc_if->msk_cdata.msk_tx_cnt++;
+		MSK_INC(prod, MSK_TX_RING_CNT);
+	}
+#endif
 	si = prod;
 	tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
 	tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[0].ds_addr));
@@ -2787,6 +2851,20 @@ msk_encap(struct msk_if_softc *sc_if, st
 
 	for (i = 1; i < nseg; i++) {
 		tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+#ifdef MSK_64BIT_DMA
+		if (MSK_ADDR_HI(txsegs[i].ds_addr) !=
+		    sc_if->msk_cdata.msk_tx_high_addr) {
+			sc_if->msk_cdata.msk_tx_high_addr =
+			    MSK_ADDR_HI(txsegs[i].ds_addr);
+			tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+			tx_le->msk_addr =
+			    htole32(MSK_ADDR_HI(txsegs[i].ds_addr));
+			tx_le->msk_control = htole32(OP_ADDR64 | HW_OWNER);
+			sc_if->msk_cdata.msk_tx_cnt++;
+			MSK_INC(prod, MSK_TX_RING_CNT);
+			tx_le = &sc_if->msk_rdata.msk_tx_ring[prod];
+		}
+#endif
 		tx_le->msk_addr = htole32(MSK_ADDR_LO(txsegs[i].ds_addr));
 		tx_le->msk_control = htole32(txsegs[i].ds_len | control |
 		    OP_BUFFER | HW_OWNER);
@@ -3139,7 +3217,12 @@ msk_rxeof(struct msk_if_softc *sc_if, ui
 			msk_discard_rxbuf(sc_if, cons);
 			break;
 		}
+#ifdef MSK_64BIT_DMA
+		rxd = &sc_if->msk_cdata.msk_rxdesc[(cons + 1) %
+		    MSK_RX_RING_CNT];
+#else
 		rxd = &sc_if->msk_cdata.msk_rxdesc[cons];
+#endif
 		m = rxd->rx_m;
 		if (msk_newbuf(sc_if, cons) != 0) {
 			ifp->if_iqdrops++;
@@ -3167,8 +3250,8 @@ msk_rxeof(struct msk_if_softc *sc_if, ui
 		MSK_IF_LOCK(sc_if);
 	} while (0);
 
-	MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
-	MSK_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT);
+	MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_RX_RING_CNT);
+	MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_RX_RING_CNT);
 }
 
 static void
@@ -3199,7 +3282,12 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_
 			msk_discard_jumbo_rxbuf(sc_if, cons);
 			break;
 		}
+#ifdef MSK_64BIT_DMA
+		jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[(cons + 1) %
+		    MSK_JUMBO_RX_RING_CNT];
+#else
 		jrxd = &sc_if->msk_cdata.msk_jumbo_rxdesc[cons];
+#endif
 		m = jrxd->rx_m;
 		if (msk_jumbo_newbuf(sc_if, cons) != 0) {
 			ifp->if_iqdrops++;
@@ -3227,8 +3315,8 @@ msk_jumbo_rxeof(struct msk_if_softc *sc_
 		MSK_IF_LOCK(sc_if);
 	} while (0);
 
-	MSK_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
-	MSK_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT);
+	MSK_RX_INC(sc_if->msk_cdata.msk_rx_cons, MSK_JUMBO_RX_RING_CNT);
+	MSK_RX_INC(sc_if->msk_cdata.msk_rx_prod, MSK_JUMBO_RX_RING_CNT);
 }
 
 static void
@@ -3573,7 +3661,7 @@ msk_handle_events(struct msk_softc *sc)
 			    control & STLE_OP_MASK);
 			break;
 		}
-		MSK_INC(cons, MSK_STAT_RING_CNT);
+		MSK_INC(cons, sc->msk_stat_count);
 		if (rxprog > sc->msk_process_limit)
 			break;
 	}

Modified: stable/9/sys/dev/msk/if_mskreg.h
==============================================================================
--- stable/9/sys/dev/msk/if_mskreg.h	Wed Jan  4 21:28:49 2012	(r229523)
+++ stable/9/sys/dev/msk/if_mskreg.h	Wed Jan  4 21:50:59 2012	(r229524)
@@ -2315,35 +2315,48 @@ struct msk_stat_desc {
 #define BMU_UDP_CHECK	(0x57<<16)	/* Descr with UDP ext (YUKON only) */
 #define BMU_BBC		0xffff	/* Bit 15.. 0:	Buffer Byte Counter */
 
+/*
+ * Controller requires an additional LE op code for 64bit DMA operation.
+ * Driver uses fixed number of RX buffers such that this limitation
+ * reduces number of available RX buffers with 64bit DMA so double
+ * number of RX buffers on platforms that support 64bit DMA. For TX
+ * side, controller requires an additional OP_ADDR64 op code if a TX
+ * buffer uses different high address value than previously used one.
+ * Driver monitors high DMA address change in TX and inserts an
+ * OP_ADDR64 op code if the high DMA address is changed.  Driver
+ * allocates 50% more total TX buffers on platforms that support 64bit
+ * DMA.
+ */
+#if (BUS_SPACE_MAXADDR > 0xFFFFFFFF)
+#define	MSK_64BIT_DMA
+#define MSK_TX_RING_CNT		384
+#define MSK_RX_RING_CNT		512
+#else
+#undef	MSK_64BIT_DMA
 #define MSK_TX_RING_CNT		256
 #define MSK_RX_RING_CNT		256
+#endif
 #define	MSK_RX_BUF_ALIGN	8
 #define MSK_JUMBO_RX_RING_CNT	MSK_RX_RING_CNT
-#define	MSK_STAT_RING_CNT	((1 + 3) * (MSK_TX_RING_CNT + MSK_RX_RING_CNT))
 #define MSK_MAXTXSEGS		32
 #define	MSK_TSO_MAXSGSIZE	4096
 #define	MSK_TSO_MAXSIZE		(65535 + sizeof(struct ether_vlan_header))
 
 /*
- * It seems that the hardware requires extra decriptors(LEs) to offload
- * TCP/UDP checksum, VLAN hardware tag inserstion and TSO.
+ * It seems that the hardware requires extra descriptors(LEs) to offload
+ * TCP/UDP checksum, VLAN hardware tag insertion and TSO.
  *
  * 1 descriptor for TCP/UDP checksum offload.
  * 1 descriptor VLAN hardware tag insertion.
  * 1 descriptor for TSO(TCP Segmentation Offload)
- * 1 descriptor for 64bits DMA : Not applicatable due to the use of
- *  BUS_SPACE_MAXADDR_32BIT in parent DMA tag creation.
+ * 1 descriptor for each 64bits DMA transfers 
  */
+#ifdef MSK_64BIT_DMA
+#define	MSK_RESERVED_TX_DESC_CNT	(MSK_MAXTXSEGS + 3)
+#else
 #define	MSK_RESERVED_TX_DESC_CNT	3
+#endif
 
-/*
- * Jumbo buffer stuff. Note that we must allocate more jumbo
- * buffers than there are descriptors in the receive ring. This
- * is because we don't know how long it will take for a packet
- * to be released after we hand it off to the upper protocol
- * layers. To be safe, we allocate 1.5 times the number of
- * receive descriptors.
- */
 #define MSK_JUMBO_FRAMELEN	9022
 #define MSK_JUMBO_MTU		(MSK_JUMBO_FRAMELEN-ETHER_HDR_LEN-ETHER_CRC_LEN)
 #define MSK_MAX_FRAMELEN		\
@@ -2380,6 +2393,7 @@ struct msk_chain_data {
 	bus_dmamap_t		msk_jumbo_rx_sparemap;
 	uint16_t		msk_tso_mtu;
 	uint32_t		msk_last_csum;
+	uint32_t		msk_tx_high_addr;
 	int			msk_tx_prod;
 	int			msk_tx_cons;
 	int			msk_tx_cnt;
@@ -2411,10 +2425,17 @@ struct msk_ring_data {
     (sizeof(struct msk_rx_desc) * MSK_RX_RING_CNT)
 #define MSK_JUMBO_RX_RING_SZ		\
     (sizeof(struct msk_rx_desc) * MSK_JUMBO_RX_RING_CNT)
-#define MSK_STAT_RING_SZ		\
-    (sizeof(struct msk_stat_desc) * MSK_STAT_RING_CNT)
 
 #define MSK_INC(x, y)	(x) = (x + 1) % y
+#ifdef MSK_64BIT_DMA
+#define MSK_RX_INC(x, y)	(x) = (x + 2) % y
+#define MSK_RX_BUF_CNT		(MSK_RX_RING_CNT / 2)
+#define MSK_JUMBO_RX_BUF_CNT	(MSK_JUMBO_RX_RING_CNT / 2)
+#else
+#define MSK_RX_INC(x, y)	(x) = (x + 1) % y
+#define MSK_RX_BUF_CNT		MSK_RX_RING_CNT
+#define MSK_JUMBO_RX_BUF_CNT	MSK_JUMBO_RX_RING_CNT
+#endif
 
 #define	MSK_PCI_BUS	0
 #define	MSK_PCIX_BUS	1
@@ -2519,6 +2540,7 @@ struct msk_softc {
 	int			msk_int_holdoff;
 	int			msk_process_limit;
 	int			msk_stat_cons;
+	int			msk_stat_count;
 	struct mtx		msk_mtx;
 };
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201042150.q04LoxpY097898>