From owner-svn-src-all@FreeBSD.ORG Mon Sep 28 23:48:16 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id AF9D4106566C; Mon, 28 Sep 2009 23:48:16 +0000 (UTC) (envelope-from gallatin@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 9C6018FC1E; Mon, 28 Sep 2009 23:48:16 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n8SNmGD8069860; Mon, 28 Sep 2009 23:48:16 GMT (envelope-from gallatin@svn.freebsd.org) Received: (from gallatin@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n8SNmGd7069858; Mon, 28 Sep 2009 23:48:16 GMT (envelope-from gallatin@svn.freebsd.org) Message-Id: <200909282348.n8SNmGd7069858@svn.freebsd.org> From: Andrew Gallatin Date: Mon, 28 Sep 2009 23:48:16 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org X-SVN-Group: stable-8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r197607 - in stable/8/sys: . amd64/include/xen cddl/contrib/opensolaris contrib/dev/acpica contrib/pf dev/mxge dev/xen/xenpci X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 28 Sep 2009 23:48:16 -0000 Author: gallatin Date: Mon Sep 28 23:48:16 2009 New Revision: 197607 URL: http://svn.freebsd.org/changeset/base/197607 Log: MFC 197395: Improve mxge watchdog routine's ability to reliably reset a failed NIC Approved by: re (kib) Modified: stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/mxge/if_mxge.c stable/8/sys/dev/xen/xenpci/ (props changed) Modified: stable/8/sys/dev/mxge/if_mxge.c ============================================================================== --- stable/8/sys/dev/mxge/if_mxge.c Mon Sep 28 22:41:28 2009 (r197606) +++ stable/8/sys/dev/mxge/if_mxge.c Mon Sep 28 23:48:16 2009 (r197607) @@ -143,7 +143,7 @@ MODULE_DEPEND(mxge, zlib, 1, 1, 1); static int mxge_load_firmware(mxge_softc_t *sc, int adopt); static int mxge_send_cmd(mxge_softc_t *sc, uint32_t cmd, mxge_cmd_t *data); -static int mxge_close(mxge_softc_t *sc); +static int mxge_close(mxge_softc_t *sc, int down); static int mxge_open(mxge_softc_t *sc); static void mxge_tick(void *arg); @@ -1305,8 +1305,7 @@ mxge_reset(mxge_softc_t *sc, int interru ss->lro_queued = 0; ss->lro_flushed = 0; if (ss->fw_stats != NULL) { - ss->fw_stats->valid = 0; - ss->fw_stats->send_done_count = 0; + bzero(ss->fw_stats, sizeof *ss->fw_stats); } } sc->rdma_tags_available = 15; @@ -1379,7 +1378,7 @@ mxge_change_lro_locked(mxge_softc_t *sc, ifp->if_capenable |= IFCAP_LRO; sc->lro_cnt = lro_cnt; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); + mxge_close(sc, 0); err = mxge_open(sc); } return err; @@ -1495,6 +1494,10 @@ mxge_add_sysctls(mxge_softc_t *sc) "read_write_dma_MBs", CTLFLAG_RD, &sc->read_write_dma, 0, "DMA concurrent Read/Write speed in MB/s"); + SYSCTL_ADD_INT(ctx, children, OID_AUTO, + "watchdog_resets", + CTLFLAG_RD, &sc->watchdog_resets, + 0, "Number of times NIC was reset"); /* performance related tunables */ @@ -3600,7 +3603,7 @@ abort: } static int -mxge_close(mxge_softc_t *sc) +mxge_close(mxge_softc_t *sc, int down) { mxge_cmd_t cmd; int err, old_down_cnt; @@ -3617,21 +3620,23 @@ mxge_close(mxge_softc_t *sc) } #endif sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - old_down_cnt = sc->down_cnt; - wmb(); - err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); - if (err) { - device_printf(sc->dev, "Couldn't bring down link\n"); - } - if (old_down_cnt == sc->down_cnt) { - /* wait for down irq */ - DELAY(10 * sc->intr_coal_delay); - } - wmb(); - if (old_down_cnt == sc->down_cnt) { - device_printf(sc->dev, "never got down irq\n"); + if (!down) { + old_down_cnt = sc->down_cnt; + wmb(); + err = mxge_send_cmd(sc, MXGEFW_CMD_ETHERNET_DOWN, &cmd); + if (err) { + device_printf(sc->dev, + "Couldn't bring down link\n"); + } + if (old_down_cnt == sc->down_cnt) { + /* wait for down irq */ + DELAY(10 * sc->intr_coal_delay); + } + wmb(); + if (old_down_cnt == sc->down_cnt) { + device_printf(sc->dev, "never got down irq\n"); + } } - mxge_free_mbufs(sc); return 0; @@ -3684,8 +3689,9 @@ static int mxge_watchdog_reset(mxge_softc_t *sc, int slice) { struct pci_devinfo *dinfo; + struct mxge_slice_state *ss; mxge_tx_ring_t *tx; - int err; + int err, running, s, num_tx_slices = 1; uint32_t reboot; uint16_t cmd; @@ -3719,6 +3725,30 @@ mxge_watchdog_reset(mxge_softc_t *sc, in reboot = mxge_read_reboot(sc); device_printf(sc->dev, "NIC rebooted, status = 0x%x\n", reboot); + running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; + if (running) { + + /* + * quiesce NIC so that TX routines will not try to + * xmit after restoration of BAR + */ + + /* Mark the link as down */ + if (sc->link_state) { + sc->link_state = 0; + if_link_state_change(sc->ifp, + LINK_STATE_DOWN); + } +#ifdef IFNET_BUF_RING + num_tx_slices = sc->num_slices; +#endif + /* grab all TX locks to ensure no tx */ + for (s = 0; s < num_tx_slices; s++) { + ss = &sc->ss[s]; + mtx_lock(&ss->tx.mtx); + } + mxge_close(sc, 1); + } /* restore PCI configuration space */ dinfo = device_get_ivars(sc->dev); pci_cfg_restore(sc->dev, dinfo); @@ -3726,10 +3756,22 @@ mxge_watchdog_reset(mxge_softc_t *sc, in /* and redo any changes we made to our config space */ mxge_setup_cfg_space(sc); - if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); - err = mxge_open(sc); + /* reload f/w */ + err = mxge_load_firmware(sc, 0); + if (err) { + device_printf(sc->dev, + "Unable to re-load f/w\n"); } + if (running) { + if (!err) + err = mxge_open(sc); + /* release all TX locks */ + for (s = 0; s < num_tx_slices; s++) { + ss = &sc->ss[s]; + mtx_unlock(&ss->tx.mtx); + } + } + sc->watchdog_resets++; } else { tx = &sc->ss[slice].tx; device_printf(sc->dev, @@ -3745,6 +3787,9 @@ mxge_watchdog_reset(mxge_softc_t *sc, in be32toh(sc->ss->fw_stats->send_done_count)); device_printf(sc->dev, "not resetting\n"); } + if (err) + device_printf(sc->dev, "watchdog reset failed\n"); + return (err); } @@ -3860,11 +3905,11 @@ mxge_change_mtu(mxge_softc_t *sc, int mt old_mtu = ifp->if_mtu; ifp->if_mtu = mtu; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); + mxge_close(sc, 0); err = mxge_open(sc); if (err != 0) { ifp->if_mtu = old_mtu; - mxge_close(sc); + mxge_close(sc, 0); (void) mxge_open(sc); } } @@ -3922,7 +3967,7 @@ mxge_ioctl(struct ifnet *ifp, u_long com } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - mxge_close(sc); + mxge_close(sc, 0); } } mtx_unlock(&sc->driver_mtx); @@ -4645,7 +4690,7 @@ mxge_detach(device_t dev) mtx_lock(&sc->driver_mtx); sc->dying = 1; if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) - mxge_close(sc); + mxge_close(sc, 0); mtx_unlock(&sc->driver_mtx); ether_ifdetach(sc->ifp); callout_drain(&sc->co_hdl);