Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 24 Aug 2010 12:26:52 -0700
From:      Pyun YongHyeon <pyunyh@gmail.com>
To:        Adrian Chadd <adrian.chadd@gmail.com>
Cc:        freebsd-net@freebsd.org, Andre Oppermann <andre@freebsd.org>
Subject:   Re: 8.0-RELEASE-p3: 4k jumbo mbuf cluster exhaustion
Message-ID:  <20100824192652.GD7118@michelle.cdnetworks.com>
In-Reply-To: <AANLkTikBHiQ15CFKhsP4Z=9bRJEP-1_RAJAS4Y3U1GLT@mail.gmail.com>
References:  <AANLkTikrbCFHz-CnuYcgH2JzpeH5hob0Aa2y5dwn3Hvv@mail.gmail.com> <AANLkTikYMU=wML_z=HDnkUF1PGYMVa1q-QWTrkxD%2B7EP@mail.gmail.com> <20100822222746.GC6013@michelle.cdnetworks.com> <AANLkTi=t%2BnG8isp1nf2aBec%2BFwomApNt0NBPO8LqZ%2B=9@mail.gmail.com> <4C724AD9.5020000@freebsd.org> <AANLkTikBHiQ15CFKhsP4Z=9bRJEP-1_RAJAS4Y3U1GLT@mail.gmail.com>

next in thread | previous in thread | raw e-mail | index | archive | help

--zx4FCpZtqtKETZ7O
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline

On Tue, Aug 24, 2010 at 08:37:52PM +0800, Adrian Chadd wrote:
> On 23 August 2010 18:18, Andre Oppermann <andre@freebsd.org> wrote:
> > It seems the 4k clusters do not get freed back to the pool after they've
> > been sent by the NIC and dropped from the socket buffer after the ACK has
> > arrived. ?The leak must occur in one of these two places. ?The socket
> > buffer is unlikely as it would affect not just you but everyone else too.
> > Thus the mbuf freeing after DMA/tx in the bce(4) driver is the prime
> > suspect.
> 
> They don't stay leaked though. Killing the offending process sees
> mbuf's eventually returned.
> It isn't immediate though. It may be related to timing out existing
> socket connections or something?
> 
> I haven't yet brought up the second box enough to start passing test
> traffic, so I can't provide any further details than this.
> 

Here is patch that fixes TX/RX related issues. The patch was
generated against HEAD. I'm not sure you can apply this patch to
8.0-RELEASE but you can see the point of TX issues of driver.
I'm still waiting for David's opinion on this patch but it seems
he's busy to address other issues of Broadcom controllers which
might be triggered by me.

> Adrian

--zx4FCpZtqtKETZ7O
Content-Type: text/x-diff; charset=us-ascii
Content-Disposition: attachment; filename="bce.misc3.patch"

Index: sys/dev/bce/if_bce.c
===================================================================
--- sys/dev/bce/if_bce.c	(revision 210298)
+++ sys/dev/bce/if_bce.c	(working copy)
@@ -4995,7 +4995,7 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *
     u16 *chain_prod, u32 *prod_bseq)
 {
 	bus_dmamap_t map;
-	bus_dma_segment_t segs[BCE_MAX_SEGMENTS];
+	bus_dma_segment_t segs[1];
 	struct mbuf *m_new = NULL;
 	struct rx_bd *rxbd;
 	int nsegs, error, rc = 0;
@@ -5067,9 +5067,10 @@ bce_get_rx_buf(struct bce_softc *sc, struct mbuf *
 
 	/* Handle any mapping errors. */
 	if (error) {
+#ifdef	BCE_DEBUG
 		BCE_PRINTF("%s(%d): Error mapping mbuf into RX "
 		    "chain (%d)!\n", __FILE__, __LINE__, error);
-
+#endif
 		sc->dma_map_addr_rx_failed_count++;
 		m_freem(m_new);
 
@@ -5183,9 +5184,10 @@ bce_get_pg_buf(struct bce_softc *sc, struct mbuf *
 
 	/* Handle any mapping errors. */
 	if (error) {
+#ifdef	BCE_DEBUG
 		BCE_PRINTF("%s(%d): Error mapping mbuf into page chain!\n",
 		    __FILE__, __LINE__);
-
+#endif
 		m_freem(m_new);
 		DBRUN(sc->debug_pg_mbuf_alloc--);
 
@@ -5323,6 +5325,9 @@ bce_init_tx_chain(struct bce_softc *sc)
 		txbd->tx_bd_haddr_hi = htole32(BCE_ADDR_HI(sc->tx_bd_chain_paddr[j]));
 		txbd->tx_bd_haddr_lo = htole32(BCE_ADDR_LO(sc->tx_bd_chain_paddr[j]));
 	}
+	for (i = 0; i < TX_PAGES; i++)
+		bus_dmamap_sync(sc->tx_bd_chain_tag, sc->tx_bd_chain_map[i],
+		    BUS_DMASYNC_PREWRITE);
 
 	bce_init_tx_context(sc);
 
@@ -5360,8 +5365,11 @@ bce_free_tx_chain(struct bce_softc *sc)
 	}
 
 	/* Clear each TX chain page. */
-	for (i = 0; i < TX_PAGES; i++)
+	for (i = 0; i < TX_PAGES; i++) {
 		bzero((char *)sc->tx_bd_chain[i], BCE_TX_CHAIN_PAGE_SZ);
+		bus_dmamap_sync(sc->tx_bd_chain_tag, sc->tx_bd_chain_map[i],
+		    BUS_DMASYNC_PREWRITE);
+	}
 
 	sc->used_tx_bd = 0;
 
@@ -5497,10 +5505,6 @@ bce_init_rx_chain(struct bce_softc *sc)
 
 	DBRUN(sc->rx_low_watermark = USABLE_RX_BD);
 	DBRUN(sc->rx_empty_count = 0);
-	for (i = 0; i < RX_PAGES; i++) {
-		bus_dmamap_sync(sc->rx_bd_chain_tag, sc->rx_bd_chain_map[i],
-		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-	}
 
 	bce_init_rx_context(sc);
 
@@ -5526,6 +5530,7 @@ bce_fill_rx_chain(struct bce_softc *sc)
 {
 	u16 prod, prod_idx;
 	u32 prod_bseq;
+	int i;
 
 	DBENTER(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD |
 	    BCE_VERBOSE_CTX);
@@ -5544,6 +5549,11 @@ bce_fill_rx_chain(struct bce_softc *sc)
 		prod = NEXT_RX_BD(prod);
 	}
 
+	for (i = 0; i < RX_PAGES; i++) {
+		bus_dmamap_sync(sc->rx_bd_chain_tag, sc->rx_bd_chain_map[i],
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	}
+
 	/* Save the RX chain producer indices. */
 	sc->rx_prod      = prod;
 	sc->rx_prod_bseq = prod_bseq;
@@ -5651,6 +5661,9 @@ bce_init_pg_chain(struct bce_softc *sc)
 		pgbd->rx_bd_haddr_lo = htole32(BCE_ADDR_LO(sc->pg_bd_chain_paddr[j]));
 	}
 
+	/* Fill up the page chain. */
+	bce_fill_pg_chain(sc);
+
 	/* Setup the MQ BIN mapping for host_pg_bidx. */
 	if ((BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5709)	||
 		(BCE_CHIP_NUM(sc) == BCE_CHIP_NUM_5716))
@@ -5672,14 +5685,6 @@ bce_init_pg_chain(struct bce_softc *sc)
 	val = BCE_ADDR_LO(sc->pg_bd_chain_paddr[0]);
 	CTX_WR(sc, GET_CID_ADDR(RX_CID), BCE_L2CTX_RX_NX_PG_BDHADDR_LO, val);
 
-	/* Fill up the page chain. */
-	bce_fill_pg_chain(sc);
-
-	for (i = 0; i < PG_PAGES; i++) {
-		bus_dmamap_sync(sc->pg_bd_chain_tag, sc->pg_bd_chain_map[i],
-		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
-	}
-
 	DBRUNMSG(BCE_EXTREME_RECV, bce_dump_pg_chain(sc, 0, TOTAL_PG_BD));
 	DBEXIT(BCE_VERBOSE_RESET | BCE_VERBOSE_RECV | BCE_VERBOSE_LOAD |
 		BCE_VERBOSE_CTX);
@@ -5698,6 +5703,7 @@ static void
 bce_fill_pg_chain(struct bce_softc *sc)
 {
 	u16 prod, prod_idx;
+	int i;
 
 	DBENTER(BCE_VERBOSE_RESET | BCE_EXTREME_RECV | BCE_VERBOSE_LOAD |
 	    BCE_VERBOSE_CTX);
@@ -5715,6 +5721,11 @@ bce_fill_pg_chain(struct bce_softc *sc)
 		prod = NEXT_PG_BD(prod);
 	}
 
+	for (i = 0; i < PG_PAGES; i++) {
+		bus_dmamap_sync(sc->pg_bd_chain_tag, sc->pg_bd_chain_map[i],
+		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
+	}
+
 	/* Save the page chain producer index. */
 	sc->pg_prod = prod;
 
@@ -5961,6 +5972,7 @@ bce_rx_intr(struct bce_softc *sc)
 	unsigned int rem_len;
 	u16 sw_pg_cons, sw_pg_cons_idx;
 #endif
+	int i;
 
 	DBENTER(BCE_VERBOSE_RECV | BCE_VERBOSE_INTR);
 	DBRUN(sc->interrupts_rx++);
@@ -5969,15 +5981,17 @@ bce_rx_intr(struct bce_softc *sc)
 	    __FUNCTION__, sc->rx_prod, sc->rx_cons, sc->rx_prod_bseq);
 
 	/* Prepare the RX chain pages to be accessed by the host CPU. */
-	for (int i = 0; i < RX_PAGES; i++)
+	for (i = 0; i < RX_PAGES; i++)
 		bus_dmamap_sync(sc->rx_bd_chain_tag,
-		    sc->rx_bd_chain_map[i], BUS_DMASYNC_POSTREAD);
+		    sc->rx_bd_chain_map[i],
+		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 
 #ifdef BCE_JUMBO_HDRSPLIT
 	/* Prepare the page chain pages to be accessed by the host CPU. */
-	for (int i = 0; i < PG_PAGES; i++)
+	for (i = 0; i < PG_PAGES; i++)
 		bus_dmamap_sync(sc->pg_bd_chain_tag,
-		    sc->pg_bd_chain_map[i], BUS_DMASYNC_POSTREAD);
+		    sc->pg_bd_chain_map[i],
+		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
 #endif
 
 	/* Get the hardware's view of the RX consumer index. */
@@ -6291,17 +6305,6 @@ bce_rx_int_next_rx:
 	sc->rx_cons = sw_rx_cons;
 	bce_fill_rx_chain(sc);
 
-	/* Prepare the page chain pages to be accessed by the NIC. */
-	for (int i = 0; i < RX_PAGES; i++)
-		bus_dmamap_sync(sc->rx_bd_chain_tag,
-		    sc->rx_bd_chain_map[i], BUS_DMASYNC_PREWRITE);
-
-#ifdef BCE_JUMBO_HDRSPLIT
-	for (int i = 0; i < PG_PAGES; i++)
-		bus_dmamap_sync(sc->pg_bd_chain_tag,
-		    sc->pg_bd_chain_map[i], BUS_DMASYNC_PREWRITE);
-#endif
-
 	DBPRINT(sc, BCE_EXTREME_RECV, "%s(exit): rx_prod = 0x%04X, "
 	    "rx_cons = 0x%04X, rx_prod_bseq = 0x%08X\n",
 	    __FUNCTION__, sc->rx_prod, sc->rx_cons, sc->rx_prod_bseq);
@@ -6341,6 +6344,7 @@ bce_tx_intr(struct bce_softc *sc)
 {
 	struct ifnet *ifp = sc->bce_ifp;
 	u16 hw_tx_cons, sw_tx_cons, sw_tx_chain_cons;
+	int i;
 
 	DBENTER(BCE_VERBOSE_SEND | BCE_VERBOSE_INTR);
 	DBRUN(sc->interrupts_tx++);
@@ -6358,6 +6362,10 @@ bce_tx_intr(struct bce_softc *sc)
 	bus_space_barrier(sc->bce_btag, sc->bce_bhandle, 0, 0,
 	    BUS_SPACE_BARRIER_READ);
 
+	for (i = 0; i < TX_PAGES; i++)
+		bus_dmamap_sync(sc->tx_bd_chain_tag, sc->tx_bd_chain_map[i],
+		    BUS_DMASYNC_POSTWRITE);
+
 	/* Cycle through any completed TX chain page entries. */
 	while (sw_tx_cons != hw_tx_cons) {
 #ifdef BCE_DEBUG
@@ -6405,6 +6413,10 @@ bce_tx_intr(struct bce_softc *sc)
 			    "from tx_bd[0x%04X]\n", __FUNCTION__,
 			    sw_tx_chain_cons));
 
+			bus_dmamap_sync(sc->tx_mbuf_tag,
+			    sc->tx_mbuf_map[sw_tx_chain_cons],
+			    BUS_DMASYNC_POSTWRITE);
+
 			/* Unmap the mbuf. */
 			bus_dmamap_unload(sc->tx_mbuf_tag,
 			    sc->tx_mbuf_map[sw_tx_chain_cons]);
@@ -6429,7 +6441,8 @@ bce_tx_intr(struct bce_softc *sc)
 	}
 
 	/* Clear the TX timeout timer. */
-	sc->watchdog_timer = 0;
+	if (sc->used_tx_bd == 0)
+		sc->watchdog_timer = 0;
 
 	/* Clear the tx hardware queue full flag. */
 	if (sc->used_tx_bd < sc->max_tx_bd) {
@@ -6563,7 +6576,7 @@ bce_init_locked(struct bce_softc *sc)
 	/* Program the mtu, enabling jumbo frame support if necessary. */
 	if (ether_mtu > (ETHER_MAX_LEN + ETHER_VLAN_ENCAP_LEN))
 		REG_WR(sc, BCE_EMAC_RX_MTU_SIZE,
-		    min(ether_mtu, BCE_MAX_JUMBO_ETHER_MTU) |
+		    min(ether_mtu, BCE_MAX_JUMBO_ETHER_MTU_VLAN) |
 		    BCE_EMAC_RX_MTU_SIZE_JUMBO_ENA);
 	else
 		REG_WR(sc, BCE_EMAC_RX_MTU_SIZE, ether_mtu);
@@ -6796,7 +6809,7 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m
 	bus_dmamap_t map;
 	struct tx_bd *txbd = NULL;
 	struct mbuf *m0;
-	u16 prod, chain_prod, mss = 0, vlan_tag = 0, flags = 0;
+	u16 prod, chain_prod, mss = 0, vlan_tag = 0, flags = 0, si;
 	u32 prod_bseq;
 
 #ifdef BCE_DEBUG
@@ -6837,7 +6850,7 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m
 
 	/* Map the mbuf into DMAable memory. */
 	prod = sc->tx_prod;
-	chain_prod = TX_CHAIN_IDX(prod);
+	si = chain_prod = TX_CHAIN_IDX(prod);
 	map = sc->tx_mbuf_map[chain_prod];
 
 	/* Map the mbuf into our DMA address space. */
@@ -6872,8 +6885,10 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m
 			goto bce_tx_encap_exit;
 		} else if (error != 0) {
 			/* Release it and return an error. */
+#ifdef	BCE_DEBUG
 			BCE_PRINTF("%s(%d): Unknown error mapping mbuf into "
 			    "TX chain!\n", __FILE__, __LINE__);
+#endif
 			m_freem(m0);
 			*m_head = NULL;
 			sc->dma_map_addr_tx_failed_count++;
@@ -6899,6 +6914,7 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m
 		rc = ENOBUFS;
 		goto bce_tx_encap_exit;
 	}
+	bus_dmamap_sync(sc->tx_mbuf_tag, map, BUS_DMASYNC_PREWRITE);
 
 	/* prod points to an empty tx_bd at this point. */
 	prod_bseq  = sc->tx_prod_bseq;
@@ -6954,6 +6970,9 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m
 	 * have been freed.
 	 */
 	sc->tx_mbuf_ptr[chain_prod] = m0;
+	/* Swap DMA maps. */
+	sc->tx_mbuf_map[si] = sc->tx_mbuf_map[chain_prod];
+	sc->tx_mbuf_map[chain_prod] = map;
 	sc->used_tx_bd += nsegs;
 
 	/* Update some debug statistic counters */
@@ -6968,12 +6987,6 @@ bce_tx_encap(struct bce_softc *sc, struct mbuf **m
 	sc->tx_prod = prod;
 	sc->tx_prod_bseq = prod_bseq;
 
-	/* Tell the chip about the waiting TX frames. */
-	REG_WR16(sc, MB_GET_CID_ADDR(TX_CID) +
-	    BCE_L2MQ_TX_HOST_BIDX, sc->tx_prod);
-	REG_WR(sc, MB_GET_CID_ADDR(TX_CID) +
-	    BCE_L2MQ_TX_HOST_BSEQ, sc->tx_prod_bseq);
-
 bce_tx_encap_exit:
 	DBEXIT(BCE_VERBOSE_SEND);
 	return(rc);
@@ -6991,7 +7004,7 @@ bce_start_locked(struct ifnet *ifp)
 {
 	struct bce_softc *sc = ifp->if_softc;
 	struct mbuf *m_head = NULL;
-	int count = 0;
+	int count = 0, i;
 	u16 tx_prod, tx_chain_prod;
 
 	DBENTER(BCE_VERBOSE_SEND | BCE_VERBOSE_CTX);
@@ -7008,8 +7021,10 @@ bce_start_locked(struct ifnet *ifp)
 	    __FUNCTION__, tx_prod, tx_chain_prod, sc->tx_prod_bseq);
 
 	/* If there's no link or the transmit queue is empty then just exit. */
-	if (sc->bce_link_up == FALSE) {
-		DBPRINT(sc, BCE_INFO_SEND, "%s(): No link.\n",
+	if (sc->bce_link_up == FALSE ||
+	    (ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
+	    IFF_DRV_RUNNING) {
+		DBPRINT(sc, BCE_INFO_SEND, "%s(): No link or TX busy.\n",
 		    __FUNCTION__);
 		goto bce_start_locked_exit;
 	}
@@ -7064,6 +7079,16 @@ bce_start_locked(struct ifnet *ifp)
 	DBPRINT(sc, BCE_VERBOSE_SEND, "%s(): Inserted %d frames into "
 	    "send queue.\n", __FUNCTION__, count);
 
+	for (i = 0; i < TX_PAGES; i++)
+		bus_dmamap_sync(sc->tx_bd_chain_tag, sc->tx_bd_chain_map[i],
+		    BUS_DMASYNC_PREWRITE);
+
+	/* Tell the chip about the waiting TX frames. */
+	REG_WR16(sc, MB_GET_CID_ADDR(TX_CID) + BCE_L2MQ_TX_HOST_BIDX,
+	    sc->tx_prod);
+	REG_WR(sc, MB_GET_CID_ADDR(TX_CID) + BCE_L2MQ_TX_HOST_BSEQ,
+	    sc->tx_prod_bseq);
+
 	/* Set the tx timeout. */
 	sc->watchdog_timer = BCE_TX_TIMEOUT;
 

--zx4FCpZtqtKETZ7O--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20100824192652.GD7118>