Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Nov 2006 18:16:33 -0800
From:      "Jack Vogel" <jfvogel@gmail.com>
To:        freebsd-stable@freebsd.org, freebsd-net <freebsd-net@freebsd.org>
Subject:   New em patch for 6.2 BETA 3
Message-ID:  <2a41acea0611031816n1af99819rdc6b99e9dd2deb7c@mail.gmail.com>

next in thread | raw e-mail | index | archive | help

[-- Attachment #1 --]
I have been hard at work trying to understand and fix the
remaining issues with the em driver. What I have here is
a patch that has gotten rid of any issues that I can reproduce.

It solves the intermittent watchdogs that have been happening.
It also fixes the problem noted with jumbo frame tx

I, and re, would very much appreciate any test feedback you can
give on this code. I am happy with the changes, I hope these get
rid of everyone's issues.

Thanks to Gleb and Scott and John for all the help!

Jack

[-- Attachment #2 --]
diff -Naur stable/dev/em/if_em.c jfv/dev/em/if_em.c
--- stable/dev/em/if_em.c	Fri Nov  3 02:02:29 2006
+++ jfv/dev/em/if_em.c	Sat Nov  4 09:18:48 2006
@@ -204,7 +204,7 @@
 static void	em_start(struct ifnet *);
 static void	em_start_locked(struct ifnet *ifp);
 static int	em_ioctl(struct ifnet *, u_long, caddr_t);
-static void	em_watchdog(struct ifnet *);
+static void	em_watchdog(void *);
 static void	em_init(void *);
 static void	em_init_locked(struct adapter *);
 static void	em_stop(void *);
@@ -253,8 +253,7 @@
 static int	em_82547_fifo_workaround(struct adapter *, int);
 static void	em_82547_update_fifo_head(struct adapter *, int);
 static int	em_82547_tx_fifo_reset(struct adapter *);
-static void	em_82547_move_tail(void *arg);
-static void	em_82547_move_tail_locked(struct adapter *);
+static void	em_82547_move_tail(void *);
 static int	em_dma_malloc(struct adapter *, bus_size_t,
 		struct em_dma_alloc *, int);
 static void	em_dma_free(struct adapter *, struct em_dma_alloc *);
@@ -406,8 +405,9 @@
 	    OID_AUTO, "stats", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
 	    em_sysctl_stats, "I", "Statistics");
 
-	callout_init(&adapter->timer, CALLOUT_MPSAFE);
-	callout_init(&adapter->tx_fifo_timer, CALLOUT_MPSAFE);
+	callout_init_mtx(&adapter->timer, &adapter->mtx, 0);
+	callout_init_mtx(&adapter->watchdog, &adapter->mtx, 0);
+	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->mtx, 0);
 
 	/* Determine hardware revision */
 	em_identify_hardware(adapter);
@@ -622,6 +622,9 @@
 	EM_UNLOCK(adapter);
 	ether_ifdetach(adapter->ifp);
 
+	callout_drain(&adapter->timer);
+	callout_drain(&adapter->tx_fifo_timer);
+
 	em_free_pci_resources(adapter);
 	bus_generic_detach(dev);
 	if_free(ifp);
@@ -739,7 +742,7 @@
 		BPF_MTAP(ifp, m_head);
 
 		/* Set timeout in case hardware has problems transmitting. */
-		ifp->if_timer = EM_TX_TIMEOUT;
+		callout_reset(&adapter->watchdog, 5 * hz, em_watchdog, adapter);
 	}
 }
 
@@ -947,17 +950,28 @@
  **********************************************************************/
 
 static void
-em_watchdog(struct ifnet *ifp)
+em_watchdog(void *arg)
 {
-	struct adapter *adapter = ifp->if_softc;
+	struct adapter *adapter = arg;
+	struct ifnet	*ifp = adapter->ifp;
+
+	/*
+	** If we are above the clean threshold disarm callback
+	** else rearm for 5 secs.
+	*/
+	if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) {
+		callout_stop(&adapter->watchdog);
+		return;
+	} else if (adapter->num_tx_desc_avail > EM_TX_OP_THRESHOLD) {
+		callout_reset(&adapter->watchdog, 5 * hz, em_watchdog, adapter);
+		return;
+	}
 
-	EM_LOCK(adapter);
 	/* If we are in this routine because of pause frames, then
 	 * don't reset the hardware.
 	 */
 	if (E1000_READ_REG(&adapter->hw, STATUS) & E1000_STATUS_TXOFF) {
-		ifp->if_timer = EM_TX_TIMEOUT;
-		EM_UNLOCK(adapter);
+		callout_stop(&adapter->watchdog);
 		return;
 	}
 
@@ -968,7 +982,6 @@
 	adapter->watchdog_events++;
 
 	em_init_locked(adapter);
-	EM_UNLOCK(adapter);
 }
 
 /*********************************************************************
@@ -1154,6 +1167,8 @@
  *  Interrupt Service routine  
  *
  *********************************************************************/
+#define EM_MAX_INTR 10
+
 static void
 em_intr(void *arg)
 {
@@ -1162,7 +1177,6 @@
 	uint32_t	reg_icr;
 
 	EM_LOCK(adapter);
-
 	ifp = adapter->ifp;
 
 #ifdef DEVICE_POLLING
@@ -1172,28 +1186,24 @@
 	}
 #endif /* DEVICE_POLLING */
 
-	for (;;) {
-		reg_icr = E1000_READ_REG(&adapter->hw, ICR);
-		if (adapter->hw.mac_type >= em_82571 &&
-		    (reg_icr & E1000_ICR_INT_ASSERTED) == 0)
-			break;
-		else if (reg_icr == 0)
-			break;
+	reg_icr = E1000_READ_REG(&adapter->hw, ICR);
 
-		/*
-		 * XXX: some laptops trigger several spurious interrupts
-		 * on em(4) when in the resume cycle. The ICR register
-		 * reports all-ones value in this case. Processing such
-		 * interrupts would lead to a freeze. I don't know why.
-		 */
-		if (reg_icr == 0xffffffff)
-			break;
+	if ((reg_icr == 0) || (adapter->hw.mac_type >= em_82571 &&
+	    (reg_icr & E1000_ICR_INT_ASSERTED) == 0) ||
+	/*
+	 * XXX: some laptops trigger several spurious interrupts
+	 * on em(4) when in the resume cycle. The ICR register
+	 * reports all-ones value in this case. Processing such
+	 * interrupts would lead to a freeze. I don't know why.
+	 */
+	    (reg_icr == 0xffffffff))
+		goto leaving;
 
+	for (int i = 0;i < EM_MAX_INTR; ++i) {
 		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-			em_rxeof(adapter, -1);
+			em_rxeof(adapter, 100);
 			em_txeof(adapter);
 		}
-
 		/* Link status change */
 		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
 			callout_stop(&adapter->timer);
@@ -1208,10 +1218,10 @@
 			adapter->rx_overruns++;
 	}
 
+leaving:
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
 	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
 		em_start_locked(ifp);
-
 	EM_UNLOCK(adapter);
 }
 
@@ -1368,20 +1378,13 @@
          */
 	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
 		em_txeof(adapter);
-		if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
+		/* Now do we at least have a minimal? */
+		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
 			adapter->no_tx_desc_avail1++;
 			return(ENOBUFS);
 		}
         }
 
-	/*
-	 * Capture the first descriptor index,
-	 * this descriptor will have the index
-	 * of the EOP which is the only one that
-	 * now gets a DONE bit writeback.
-	 */
-	first = adapter->next_avail_tx_desc;
-
         /* Find out if we are in vlan mode */
         mtag = VLAN_OUTPUT_TAG(ifp, m_head);
 
@@ -1433,6 +1436,14 @@
 			return (ENOBUFS);
 	}
 
+	/*
+	 * Capture the first descriptor index,
+	 * this descriptor will have the index
+	 * of the EOP which is the only one that
+	 * now gets a DONE bit writeback.
+	 */
+	first = adapter->next_avail_tx_desc;
+
         /*
          * Map the packet for DMA.
          */
@@ -1452,6 +1463,7 @@
 			return (ENOBUFS);
 		}
 		*m_headp = m;
+
 		/* Try it again */
 		error = bus_dmamap_load_mbuf_sg(adapter->txtag, tx_buffer->map,
 		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
@@ -1638,7 +1650,7 @@
 	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
 	if (adapter->hw.mac_type == em_82547 &&
 	    adapter->link_duplex == HALF_DUPLEX)
-		em_82547_move_tail_locked(adapter);
+		em_82547_move_tail(adapter);
 	else {
 		E1000_WRITE_REG(&adapter->hw, TDT, i);
 		if (adapter->hw.mac_type == em_82547)
@@ -1662,8 +1674,9 @@
  *
  **********************************************************************/
 static void
-em_82547_move_tail_locked(struct adapter *adapter)
+em_82547_move_tail(void *arg)
 {
+	struct adapter *adapter = arg;
 	uint16_t hw_tdt;
 	uint16_t sw_tdt;
 	struct em_tx_desc *tx_desc;
@@ -1696,15 +1709,6 @@
 	}	
 }
 
-static void
-em_82547_move_tail(void *arg)
-{
-	struct adapter *adapter = arg;
-
-	EM_LOCK(adapter);
-	em_82547_move_tail_locked(adapter);
-	EM_UNLOCK(adapter);
-}
 
 static int
 em_82547_fifo_workaround(struct adapter *adapter, int len)
@@ -1893,7 +1897,7 @@
 	struct adapter	*adapter = arg;
 	struct ifnet	*ifp = adapter->ifp;
 
-	EM_LOCK(adapter);
+	EM_LOCK_ASSERT(adapter);
 
 	em_check_for_link(&adapter->hw);
 	em_update_link_status(adapter);
@@ -1904,7 +1908,6 @@
 
 	callout_reset(&adapter->timer, hz, em_local_timer, adapter);
 
-	EM_UNLOCK(adapter);
 }
 
 static void
@@ -2229,7 +2232,8 @@
 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_ioctl = em_ioctl;
 	ifp->if_start = em_start;
-	ifp->if_watchdog = em_watchdog;
+	ifp->if_timer = 0;	/* Disable net layer watchdog */
+	ifp->if_watchdog = NULL;
 	IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1);
 	ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1;
 	IFQ_SET_READY(&ifp->if_snd);
@@ -2453,26 +2457,19 @@
 {
 	device_t dev = adapter->dev;
 	struct em_buffer *tx_buffer;
-	bus_size_t size, segsize;
 	int error, i;
 
-#ifdef EM_TSO
-	size = EM_TSO_SIZE;
-	segsize = PAGE_SIZE;
-#else
-	segsize = size = roundup2(adapter->hw.max_frame_size, MCLBYTES);
-#endif
 	/*
-	 * Setup DMA descriptor areas.
+	 * Create DMA tags for tx descriptors
 	 */
 	if ((error = bus_dma_tag_create(NULL,		/* parent */
 				1, 0,			/* alignment, bounds */
 				BUS_SPACE_MAXADDR,	/* lowaddr */
 				BUS_SPACE_MAXADDR,	/* highaddr */
 				NULL, NULL,		/* filter, filterarg */
-				size,			/* maxsize */
+				EM_TSO_SIZE,		/* maxsize */
 				EM_MAX_SCATTER,		/* nsegments */
-				segsize,		/* maxsegsize */
+				PAGE_SIZE,		/* maxsegsize */
 				0,			/* flags */
 				NULL,		/* lockfunc */
 				NULL,		/* lockarg */
@@ -2489,6 +2486,7 @@
 		goto fail;
 	}
 
+	/* Create the descriptor buffer dma maps */
 	tx_buffer = adapter->tx_buffer_area;
 	for (i = 0; i < adapter->num_tx_desc; i++) {
 		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
@@ -2517,6 +2515,7 @@
 	struct em_buffer *tx_buffer;
 	int i;
 
+	/* Clear the old ring contents */
 	bzero(adapter->tx_desc_base,
 	    (sizeof(struct em_tx_desc)) * adapter->num_tx_desc);
 
@@ -2857,8 +2856,10 @@
         eop_desc = &adapter->tx_desc_base[last];
 
 	/*
-	 * Now caculate the terminating index
-	 * for the cleanup loop below
+	 * What this does is get the index of the
+	 * first descriptor AFTER the EOP of the 
+	 * first packet, that way we can do the
+	 * simple comparison on the inner while loop.
 	 */
 	if (++last == adapter->num_tx_desc) last = 0;
 	done = last;
@@ -2916,9 +2917,10 @@
         if (num_avail > EM_TX_CLEANUP_THRESHOLD) {                
                 ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
                 if (num_avail == adapter->num_tx_desc)
-                        ifp->if_timer = 0;
+			callout_stop(&adapter->watchdog);
                 else if (num_avail != adapter->num_tx_desc_avail)
-                        ifp->if_timer = EM_TX_TIMEOUT;
+			callout_reset(&adapter->watchdog, 5 * hz,
+			    em_watchdog, adapter);
         }
         adapter->num_tx_desc_avail = num_avail;
         return;
@@ -3796,6 +3798,8 @@
 	    adapter->mbuf_cluster_failed);
 	device_printf(dev, "Driver dropped packets = %ld\n",
 	    adapter->dropped_pkts);
+	device_printf(dev, "Driver tx dma failure in encap = %ld\n",
+		adapter->no_tx_dma_setup);
 }
 
 static void
diff -Naur stable/dev/em/if_em.h jfv/dev/em/if_em.h
--- stable/dev/em/if_em.h	Fri Nov  3 02:02:39 2006
+++ jfv/dev/em/if_em.h	Sat Nov  4 06:26:58 2006
@@ -154,6 +154,7 @@
  * transmit descriptors.
  */
 #define EM_TX_CLEANUP_THRESHOLD		(adapter->num_tx_desc / 8)
+#define EM_TX_OP_THRESHOLD		8
 
 /*
  * This parameter controls whether or not autonegotation is enabled.
@@ -328,6 +329,7 @@
 	struct ifmedia	media;
 	struct callout	timer;
 	struct callout	tx_fifo_timer;
+	struct callout	watchdog;
 	int		io_rid;
 	int		if_flags;
 	struct mtx	mtx;

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?2a41acea0611031816n1af99819rdc6b99e9dd2deb7c>