Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 28 Sep 2009 23:48:16 +0000 (UTC)
From:      Andrew Gallatin <gallatin@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r197607 - in stable/8/sys: . amd64/include/xen cddl/contrib/opensolaris contrib/dev/acpica contrib/pf dev/mxge dev/xen/xenpci
Message-ID:  <200909282348.n8SNmGd7069858@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gallatin
Date: Mon Sep 28 23:48:16 2009
New Revision: 197607
URL: http://svn.freebsd.org/changeset/base/197607

Log:
  MFC 197395: Improve mxge watchdog routine's ability to reliably reset a failed NIC
  
  Approved by: re (kib)

Modified:
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/mxge/if_mxge.c
  stable/8/sys/dev/xen/xenpci/   (props changed)

Modified: stable/8/sys/dev/mxge/if_mxge.c
==============================================================================
--- stable/8/sys/dev/mxge/if_mxge.c	Mon Sep 28 22:41:28 2009	(r197606)
+++ stable/8/sys/dev/mxge/if_mxge.c	Mon Sep 28 23:48:16 2009	(r197607)
@@ -143,7 +143,7 @@ MODULE_DEPEND(mxge, zlib, 1, 1, 1);
 
 static int mxge_load_firmware(mxge_softc_t *sc, int adopt);
 static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data);
-static int mxge_close(mxge_softc_t *sc);
+static int mxge_close(mxge_softc_t *sc, int down);
 static int mxge_open(mxge_softc_t *sc);
 static void mxge_tick(void *arg);
 
@@ -1305,8 +1305,7 @@ mxge_reset(mxge_softc_t *sc, int interru
 		ss->lro_queued = 0;
 		ss->lro_flushed = 0;
 		if (ss->fw_stats != NULL) {
-			ss->fw_stats->valid = 0;
-			ss->fw_stats->send_done_count = 0;
+			bzero(ss->fw_stats, sizeof *ss->fw_stats);
 		}
 	}
 	sc->rdma_tags_available = 15;
@@ -1379,7 +1378,7 @@ mxge_change_lro_locked(mxge_softc_t *sc,
 		ifp->if_capenable |= IFCAP_LRO;
 	sc->lro_cnt = lro_cnt;
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		mxge_close(sc);
+		mxge_close(sc, 0);
 		err = mxge_open(sc);
 	}
 	return err;
@@ -1495,6 +1494,10 @@ mxge_add_sysctls(mxge_softc_t *sc)
 		       "read_write_dma_MBs",
 		       CTLFLAG_RD, &sc->read_write_dma,
 		       0, "DMA concurrent Read/Write speed in MB/s");
+	SYSCTL_ADD_INT(ctx, children, OID_AUTO, 
+		       "watchdog_resets",
+		       CTLFLAG_RD, &sc->watchdog_resets,
+		       0, "Number of times NIC was reset");
 
 
 	/* performance related tunables */
@@ -3600,7 +3603,7 @@ abort:
 }
 
 static int
-mxge_close(mxge_softc_t *sc)
+mxge_close(mxge_softc_t *sc, int down)
 {
 	mxge_cmd_t cmd;
 	int err, old_down_cnt;
@@ -3617,21 +3620,23 @@ mxge_close(mxge_softc_t *sc)
 	}
 #endif
 	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	old_down_cnt = sc->down_cnt;
-	wmb();
-	err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
-	if (err) {
-		device_printf(sc->dev, "Couldn't bring down link\n");
-	}
-	if (old_down_cnt == sc->down_cnt) {
-		/* wait for down irq */
-		DELAY(10 * sc->intr_coal_delay);
-	}
-	wmb();
-	if (old_down_cnt == sc->down_cnt) {
-		device_printf(sc->dev, "never got down irq\n");
+	if (!down) {
+		old_down_cnt = sc->down_cnt;
+		wmb();
+		err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd);
+		if (err) {
+			device_printf(sc->dev,
+				      "Couldn't bring down link\n");
+		}
+		if (old_down_cnt == sc->down_cnt) {
+			/* wait for down irq */
+			DELAY(10 * sc->intr_coal_delay);
+		}
+		wmb();
+		if (old_down_cnt == sc->down_cnt) {
+			device_printf(sc->dev, "never got down irq\n");
+		}
 	}
-
 	mxge_free_mbufs(sc);
 
 	return 0;
@@ -3684,8 +3689,9 @@ static int
 mxge_watchdog_reset(mxge_softc_t *sc, int slice)
 {
 	struct pci_devinfo *dinfo;
+	struct mxge_slice_state *ss;
 	mxge_tx_ring_t *tx;
-	int err;
+	int err, running, s, num_tx_slices = 1;
 	uint32_t reboot;
 	uint16_t cmd;
 
@@ -3719,6 +3725,30 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
 		reboot = mxge_read_reboot(sc);
 		device_printf(sc->dev, "NIC rebooted, status = 0x%x\n",
 			      reboot);
+		running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING;
+		if (running) {
+
+			/* 
+			 * quiesce NIC so that TX routines will not try to
+			 * xmit after restoration of BAR
+			 */
+
+			/* Mark the link as down */
+			if (sc->link_state) {
+				sc->link_state = 0;
+				if_link_state_change(sc->ifp,
+						     LINK_STATE_DOWN);
+			}
+#ifdef IFNET_BUF_RING
+			num_tx_slices = sc->num_slices;
+#endif
+			/* grab all TX locks to ensure no tx  */
+			for (s = 0; s < num_tx_slices; s++) {
+				ss = &sc->ss[s];
+				mtx_lock(&ss->tx.mtx);
+			}
+			mxge_close(sc, 1);
+		}
 		/* restore PCI configuration space */
 		dinfo = device_get_ivars(sc->dev);
 		pci_cfg_restore(sc->dev, dinfo);
@@ -3726,10 +3756,22 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
 		/* and redo any changes we made to our config space */
 		mxge_setup_cfg_space(sc);
 
-		if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) {
-			mxge_close(sc);
-			err = mxge_open(sc);
+		/* reload f/w */
+		err = mxge_load_firmware(sc, 0);
+		if (err) {
+			device_printf(sc->dev,
+				      "Unable to re-load f/w\n");
 		}
+		if (running) {
+			if (!err)
+				err = mxge_open(sc);
+			/* release all TX locks */
+			for (s = 0; s < num_tx_slices; s++) {
+				ss = &sc->ss[s];
+				mtx_unlock(&ss->tx.mtx);
+			}
+		}
+		sc->watchdog_resets++;
 	} else {
 		tx = &sc->ss[slice].tx;
 		device_printf(sc->dev,
@@ -3745,6 +3787,9 @@ mxge_watchdog_reset(mxge_softc_t *sc, in
 			      be32toh(sc->ss->fw_stats->send_done_count));
 		device_printf(sc->dev, "not resetting\n");
 	}
+	if (err)
+		device_printf(sc->dev, "watchdog reset failed\n");
+
 	return (err);
 }
 
@@ -3860,11 +3905,11 @@ mxge_change_mtu(mxge_softc_t *sc, int mt
 	old_mtu = ifp->if_mtu;
 	ifp->if_mtu = mtu;
 	if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-		mxge_close(sc);
+		mxge_close(sc, 0);
 		err = mxge_open(sc);
 		if (err != 0) {
 			ifp->if_mtu = old_mtu;
-			mxge_close(sc);
+			mxge_close(sc, 0);
 			(void) mxge_open(sc);
 		}
 	}
@@ -3922,7 +3967,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com
 			}
 		} else {
 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
-				mxge_close(sc);
+				mxge_close(sc, 0);
 			}
 		}
 		mtx_unlock(&sc->driver_mtx);
@@ -4645,7 +4690,7 @@ mxge_detach(device_t dev)
 	mtx_lock(&sc->driver_mtx);
 	sc->dying = 1;
 	if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING)
-		mxge_close(sc);
+		mxge_close(sc, 0);
 	mtx_unlock(&sc->driver_mtx);
 	ether_ifdetach(sc->ifp);
 	callout_drain(&sc->co_hdl);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200909282348.n8SNmGd7069858>