From owner-svn-src-all@FreeBSD.ORG Tue Oct 20 18:58:29 2009 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34]) by hub.freebsd.org (Postfix) with ESMTP id 2DF0E1065679; Tue, 20 Oct 2009 18:58:29 +0000 (UTC) (envelope-from gallatin@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:4f8:fff6::2c]) by mx1.freebsd.org (Postfix) with ESMTP id 1CBF88FC08; Tue, 20 Oct 2009 18:58:29 +0000 (UTC) Received: from svn.freebsd.org (localhost [127.0.0.1]) by svn.freebsd.org (8.14.3/8.14.3) with ESMTP id n9KIwSKB075458; Tue, 20 Oct 2009 18:58:28 GMT (envelope-from gallatin@svn.freebsd.org) Received: (from gallatin@localhost) by svn.freebsd.org (8.14.3/8.14.3/Submit) id n9KIwSDS075456; Tue, 20 Oct 2009 18:58:28 GMT (envelope-from gallatin@svn.freebsd.org) Message-Id: <200910201858.n9KIwSDS075456@svn.freebsd.org> From: Andrew Gallatin Date: Tue, 20 Oct 2009 18:58:28 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Subject: svn commit: r198303 - head/sys/dev/mxge X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Tue, 20 Oct 2009 18:58:29 -0000 Author: gallatin Date: Tue Oct 20 18:58:28 2009 New Revision: 198303 URL: http://svn.freebsd.org/changeset/base/198303 Log: Make mxge do a better job recovering from NIC h/w faults by checking PCI config space when the NIC is not transmitting. Previously, a h/w fault would not have been detected if the NIC was down, or handling an RX only workload. Modified: head/sys/dev/mxge/if_mxge.c Modified: head/sys/dev/mxge/if_mxge.c ============================================================================== --- head/sys/dev/mxge/if_mxge.c Tue Oct 20 18:54:51 2009 (r198302) +++ head/sys/dev/mxge/if_mxge.c Tue Oct 20 18:58:28 2009 (r198303) @@ -3640,7 +3640,6 @@ mxge_open(mxge_softc_t *sc) #endif sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); return 0; @@ -3661,7 +3660,6 @@ mxge_close(mxge_softc_t *sc, int down) int slice; #endif - callout_stop(&sc->co_hdl); #ifdef IFNET_BUF_RING for (slice = 0; slice < sc->num_slices; slice++) { ss = &sc->ss[slice]; @@ -3836,9 +3834,9 @@ mxge_watchdog_reset(mxge_softc_t *sc) if (err) { device_printf(sc->dev, "watchdog reset failed\n"); } else { - if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) - callout_reset(&sc->co_hdl, mxge_ticks, - mxge_tick, sc); + if (sc->dying == 2) + sc->dying = 0; + callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); } } @@ -3909,10 +3907,11 @@ mxge_watchdog(mxge_softc_t *sc) return (err); } -static void +static u_long mxge_update_stats(mxge_softc_t *sc) { struct mxge_slice_state *ss; + u_long pkts = 0; u_long ipackets = 0; u_long opackets = 0; #ifdef IFNET_BUF_RING @@ -3934,6 +3933,8 @@ mxge_update_stats(mxge_softc_t *sc) #endif oerrors += ss->oerrors; } + pkts = (ipackets - sc->ifp->if_ipackets); + pkts += (opackets - sc->ifp->if_opackets); sc->ifp->if_ipackets = ipackets; sc->ifp->if_opackets = opackets; #ifdef IFNET_BUF_RING @@ -3942,23 +3943,45 @@ mxge_update_stats(mxge_softc_t *sc) sc->ifp->if_snd.ifq_drops = odrops; #endif sc->ifp->if_oerrors = oerrors; + return pkts; } static void mxge_tick(void *arg) { mxge_softc_t *sc = arg; + u_long pkts = 0; int err = 0; + int running, ticks; + uint16_t cmd; - /* aggregate stats from different slices */ - mxge_update_stats(sc); - if (!sc->watchdog_countdown) { - err = mxge_watchdog(sc); - sc->watchdog_countdown = 4; + ticks = mxge_ticks; + mtx_lock(&sc->driver_mtx); + running = sc->ifp->if_drv_flags & IFF_DRV_RUNNING; + mtx_unlock(&sc->driver_mtx); + if (running) { + /* aggregate stats from different slices */ + pkts = mxge_update_stats(sc); + if (!sc->watchdog_countdown) { + err = mxge_watchdog(sc); + sc->watchdog_countdown = 4; + } + sc->watchdog_countdown--; + } + if (pkts == 0) { + /* ensure NIC did not suffer h/w fault while idle */ + cmd = pci_read_config(sc->dev, PCIR_COMMAND, 2); + if ((cmd & PCIM_CMD_BUSMASTEREN) == 0) { + sc->dying = 2; + taskqueue_enqueue(sc->tq, &sc->watchdog_task); + err = ENXIO; + } + /* look less often if NIC is idle */ + ticks *= 4; } - sc->watchdog_countdown--; + if (err == 0) - callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); + callout_reset(&sc->co_hdl, ticks, mxge_tick, sc); } @@ -4747,6 +4770,7 @@ mxge_attach(device_t dev) ifp->if_transmit = mxge_transmit; ifp->if_qflush = mxge_qflush; #endif + callout_reset(&sc->co_hdl, mxge_ticks, mxge_tick, sc); return 0; abort_with_rings: