From owner-svn-src-all@FreeBSD.ORG Mon Sep 28 15:11:49 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id AAD4C106566C; Mon, 28 Sep 2009 15:11:49 +0000 (UTC) (envelope-from gallatin@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 982EE8FC13; Mon, 28 Sep 2009 15:11:49 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n8SFBn0i058215; Mon, 28 Sep 2009 15:11:49 GMT (envelope-from gallatin@svn.freebsd.org) Received: (from gallatin@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n8SFBn7B058213; Mon, 28 Sep 2009 15:11:49 GMT (envelope-from gallatin@svn.freebsd.org) Message-Id: <200909281511.n8SFBn7B058213@svn.freebsd.org> From: Andrew Gallatin Date: Mon, 28 Sep 2009 15:11:49 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-6@freebsd.org X-SVN-Group: stable-6 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r197578 - in stable/6/sys: . conf contrib/pf dev/cxgb dev/mxge X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 28 Sep 2009 15:11:49 -0000 Author: gallatin Date: Mon Sep 28 15:11:49 2009 New Revision: 197578 URL: http://svn.freebsd.org/changeset/base/197578 Log: MFC 197395: Improve mxge watchdog routine's ability to reliably reset a failed NIC Modified: stable/6/sys/ (props changed) stable/6/sys/conf/ (props changed) stable/6/sys/contrib/pf/ (props changed) stable/6/sys/dev/cxgb/ (props changed) stable/6/sys/dev/mxge/if_mxge.c Modified: stable/6/sys/dev/mxge/if_mxge.c ============================================================================== --- stable/6/sys/dev/mxge/if_mxge.c Mon Sep 28 15:10:08 2009 (r197577) +++ stable/6/sys/dev/mxge/if_mxge.c Mon Sep 28 15:11:49 2009 (r197578) @@ -135,7 +135,7 @@ MODULE_DEPEND(mxge, zlib, 1, 1, 1); static int mxge_load_firmware(mxge_softc_t *sc, int adopt); static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); -static int mxge_close(mxge_softc_t *sc); +static int mxge_close(mxge_softc_t *sc, int down); static int mxge_open(mxge_softc_t *sc); static void mxge_tick(void *arg); @@ -1291,8 +1291,7 @@ mxge_reset(mxge_softc_t *sc, int interru ss->lro_queued = 0; ss->lro_flushed = 0; if (ss->fw_stats != NULL) { - ss->fw_stats->valid = 0; - ss->fw_stats->send_done_count = 0; + bzero(ss->fw_stats, sizeof *ss->fw_stats); } } sc->rdma_tags_available = 15; @@ -1365,7 +1364,7 @@ mxge_change_lro_locked(mxge_softc_t *sc, ifp->if_capenable |= IFCAP_LRO; sc->lro_cnt = lro_cnt; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); + mxge_close(sc, 0); err = mxge_open(sc); } return err; @@ -1481,6 +1480,10 @@ mxge_add_sysctls(mxge_softc_t *sc) "read_write_dma_MBs", CTLFLAG_RD, &sc->read_write_dma, 0, "DMA concurrent Read/Write speed in MB/s"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "watchdog_resets", + CTLFLAG_RD, &sc->watchdog_resets, + 0, "Number of times NIC was reset"); /* performance related tunables */ @@ -3377,28 +3380,30 @@ abort: } static int -mxge_close(mxge_softc_t *sc) +mxge_close(mxge_softc_t *sc, int down) { mxge_cmd_t cmd; int err, old_down_cnt; callout_stop(&sc->co_hdl); sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - old_down_cnt = sc->down_cnt; - mb(); - err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); - if (err) { - device_printf(sc->dev, "Couldn't bring down link\n"); - } - if (old_down_cnt == sc->down_cnt) { - /* wait for down irq */ - DELAY(10 * sc->intr_coal_delay); - } - mb(); - if (old_down_cnt == sc->down_cnt) { - device_printf(sc->dev, "never got down irq\n"); + if (!down) { + old_down_cnt = sc->down_cnt; + mb(); + err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); + if (err) { + device_printf(sc->dev, + "Couldn't bring down link\n"); + } + if (old_down_cnt == sc->down_cnt) { + /* wait for down irq */ + DELAY(10 * sc->intr_coal_delay); + } + mb(); + if (old_down_cnt == sc->down_cnt) { + device_printf(sc->dev, "never got down irq\n"); + } } - mxge_free_mbufs(sc); return 0; @@ -3451,7 +3456,8 @@ static int mxge_watchdog_reset(mxge_softc_t *sc) { struct pci_devinfo *dinfo; - int err; + struct mxge_slice_state *ss; + int err, running, s, num_tx_slices = 1; uint32_t reboot; uint16_t cmd; @@ -3485,6 +3491,30 @@ mxge_watchdog_reset(mxge_softc_t *sc) reboot = mxge_read_reboot(sc); device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", reboot); + running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; + if (running) { + + /* + * quiesce NIC so that TX routines will not try to + * xmit after restoration of BAR + */ + + /* Mark the link as down */ + if (sc->link_state) { + sc->link_state = 0; + if_link_state_change(sc->ifp, + LINK_STATE_DOWN); + } +#ifdef IFNET_BUF_RING + num_tx_slices = sc->num_slices; +#endif + /* grab all TX locks to ensure no tx */ + for (s = 0; s < num_tx_slices; s++) { + ss = &sc->ss[s]; + mtx_lock(&ss->tx.mtx); + } + mxge_close(sc, 1); + } /* restore PCI configuration space */ dinfo = device_get_ivars(sc->dev); pci_cfg_restore(sc->dev, dinfo); @@ -3492,10 +3522,22 @@ mxge_watchdog_reset(mxge_softc_t *sc) /* and redo any changes we made to our config space */ mxge_setup_cfg_space(sc); - if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); - err = mxge_open(sc); + /* reload f/w */ + err = mxge_load_firmware(sc, 0); + if (err) { + device_printf(sc->dev, + "Unable to re-load f/w\n"); } + if (running) { + if (!err) + err = mxge_open(sc); + /* release all TX locks */ + for (s = 0; s < num_tx_slices; s++) { + ss = &sc->ss[s]; + mtx_unlock(&ss->tx.mtx); + } + } + sc->watchdog_resets++; } else { device_printf(sc->dev, "NIC did not reboot, ring state:\n"); device_printf(sc->dev, "tx.req=%d tx.done=%d\n", @@ -3505,6 +3547,9 @@ mxge_watchdog_reset(mxge_softc_t *sc) be32toh(sc->ss->fw_stats->send_done_count)); device_printf(sc->dev, "not resetting\n"); } + if (err) + device_printf(sc->dev, "watchdog reset failed\n"); + return (err); } @@ -3590,11 +3635,11 @@ mxge_change_mtu(mxge_softc_t *sc, int mt old_mtu = ifp->if_mtu; ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); + mxge_close(sc, 0); err = mxge_open(sc); if (err != 0) { ifp->if_mtu = old_mtu; - mxge_close(sc); + mxge_close(sc, 0); (void) mxge_open(sc); } } @@ -3648,7 +3693,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); + mxge_close(sc, 0); } } mtx_unlock(&sc->driver_mtx); @@ -4345,7 +4390,7 @@ mxge_detach(device_t dev) } mtx_lock(&sc->driver_mtx); if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) - mxge_close(sc); + mxge_close(sc, 0); mtx_unlock(&sc->driver_mtx); ether_ifdetach(sc->ifp); callout_drain(&sc->co_hdl);