Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 14 Nov 2011 16:07:36 -0800
From:      Adrian Chadd <adrian.chadd@gmail.com>
To:        freebsd-wireless@freebsd.org
Subject:   [patch] teach ath(4) to not flush frames during reset or run reset/tx/rx concurrently
Message-ID:  <CAJ-VmokEYWi=CWsbYN-RewU6noEmtjr0RxTGMzJJ_-CA3S=uFQ@mail.gmail.com>

index | next in thread | raw e-mail

[-- Attachment #1 --]
Hi,

One of my major outstanding issues with the atheros 11n tx/rx support
is to not flush frames out of the tx and rx queue during a queue reset
(eg during fatal errors, stuck beacon, etc.)
I can't (yet) do much when operational changes occur. I'll work on
that in subsequent commits.

I'd appreciate testing and feedback before I merge this into -HEAD.

Thanks,


Adrian

[-- Attachment #2 --]
diff --git a/sys/dev/ath/if_ath.c b/sys/dev/ath/if_ath.c
index c3485e4..39ba4ef 100644
--- a/sys/dev/ath/if_ath.c
+++ b/sys/dev/ath/if_ath.c
@@ -1819,6 +1819,42 @@ ath_stop_locked(struct ifnet *ifp)
 }
 
 static void
+ath_txrx_stop(struct ath_softc *sc)
+{
+	int i = 1000;
+
+	ATH_UNLOCK_ASSERT(sc);
+
+	/* Stop any new TX/RX from occuring */
+	taskqueue_block(sc->sc_tq);
+
+	ATH_LOCK(sc);
+
+	/*
+	 * Sleep until all the pending operations have completed.
+	 *
+	 * The caller must ensure that reset has been incremented
+	 * or this won't ever occur.
+	 */
+	while (sc->sc_rxproc_cnt || sc->sc_txproc_cnt ||
+	    sc->sc_txstart_cnt) {
+	    	if (i <= 0)
+			break;
+		msleep(sc, &sc->sc_mtx, 0, "ath_txrx_stop", 1);
+		i--;
+	}
+
+	ATH_UNLOCK(sc);
+}
+
+static void
+ath_txrx_start(struct ath_softc *sc)
+{
+
+	taskqueue_unblock(sc->sc_tq);
+}
+
+static void
 ath_stop(struct ifnet *ifp)
 {
 	struct ath_softc *sc = ifp->if_softc;
@@ -1842,17 +1878,41 @@ ath_reset(struct ifnet *ifp, ATH_RESET_TYPE reset_type)
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
+	int i;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 
+	/* XXX ensure ATH_LOCK isn't held; ath_rx_proc can't be locked */
+	ATH_UNLOCK_ASSERT(sc);
+	ATH_LOCK(sc);
+	/* XXX if we're already inside a reset, print out a big warning */
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev,
+		    "%s: concurrent ath_reset()! Danger!\n",
+		    __func__);
+	}
+	sc->sc_inreset_cnt++;
+	ATH_UNLOCK(sc);
+
+	/*
+	 * XXX should now wait for pending TX/RX to complete
+	 * and block future ones from occuring.
+	 */
+	ath_txrx_stop(sc);
+
 	ath_hal_intrset(ah, 0);		/* disable interrupts */
 	ath_draintxq(sc, reset_type);	/* stop xmit side */
 	/*
-	 * XXX Don't flush if ATH_RESET_NOLOSS;but we have to first
-	 * XXX need to ensure this doesn't race with an outstanding
-	 * XXX taskqueue call.
+	 * If we're doing a flush-less dequeue, then stop PCU
+	 * receive so the FIFO isn't overrun, then handle
+	 * whatever frames are in the queue.
 	 */
-	ath_stoprecv(sc);		/* stop recv side */
+	if (reset_type == ATH_RESET_NOLOSS) {
+		ath_hal_stoppcurecv(ah);
+		ath_hal_setrxfilter(ah, 0);
+		ath_rx_proc(sc, 0);
+	} else
+		ath_stoprecv(sc);		/* stop recv side */
 	ath_settkipmic(sc);		/* configure TKIP MIC handling */
 	/* NB: indicate channel change so we do a full reset */
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_TRUE, &status))
@@ -1881,6 +1941,26 @@ ath_reset(struct ifnet *ifp, ATH_RESET_TYPE reset_type)
 	}
 	ath_hal_intrset(ah, sc->sc_imask);
 
+	ATH_LOCK(sc);
+	sc->sc_inreset_cnt--;
+	ATH_UNLOCK(sc);
+
+	/* Restart TX/RX as needed */
+	ath_txrx_start(sc);
+
+	/* XXX Restart TX completion and pending TX */
+	if (reset_type == ATH_RESET_NOLOSS) {
+		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
+			if (ATH_TXQ_SETUP(sc, i)) {
+				ATH_TXQ_LOCK(&sc->sc_txq[i]);
+				ath_txq_restart_dma(sc, &sc->sc_txq[i]);
+				ath_txq_sched(sc, &sc->sc_txq[i]);
+				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
+			}
+		}
+	}
+
+	/* Handle any frames in the TX queue */
 	ath_start(ifp);			/* restart xmit */
 	return 0;
 }
@@ -2028,6 +2108,18 @@ ath_start(struct ifnet *ifp)
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid)
 		return;
+
+	/* XXX is it ok to hold the ATH_LOCK here? */
+	ATH_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev,
+		    "%s: sc_inreset_cnt > 0; bailing\n", __func__);
+		ATH_UNLOCK(sc);
+		return;
+	}
+	sc->sc_txstart_cnt++;
+	ATH_UNLOCK(sc);
+
 	for (;;) {
 		/*
 		 * Grab a TX buffer and associated resources.
@@ -2111,6 +2203,10 @@ ath_start(struct ifnet *ifp)
 
 		sc->sc_wd_timer = 5;
 	}
+
+	ATH_LOCK(sc);
+	sc->sc_txstart_cnt--;
+	ATH_UNLOCK(sc);
 }
 
 static int
@@ -3688,6 +3784,14 @@ ath_rx_tasklet(void *arg, int npending)
 
 	CTR1(ATH_KTR_INTR, "ath_rx_proc: pending=%d", npending);
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: pending %u\n", __func__, npending);
+	ATH_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev,
+		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
+		ATH_UNLOCK(sc);
+		return;
+	}
+	ATH_UNLOCK(sc);
 	ath_rx_proc(sc, 1);
 }
 
@@ -3711,6 +3815,13 @@ ath_rx_proc(struct ath_softc *sc, int resched)
 	u_int64_t tsf;
 	int npkts = 0;
 
+	/* XXX we must not hold the ATH_LOCK here */
+	ATH_UNLOCK_ASSERT(sc);
+
+	ATH_LOCK(sc);
+	sc->sc_rxproc_cnt++;
+	ATH_UNLOCK(sc);
+
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: called\n", __func__);
 	ngood = 0;
 	nf = ath_hal_getchannoise(ah, sc->sc_curchan);
@@ -4082,6 +4193,10 @@ rx_next:
 			ath_start(ifp);
 	}
 #undef PA2DESC
+
+	ATH_LOCK(sc);
+	sc->sc_rxproc_cnt--;
+	ATH_UNLOCK(sc);
 }
 
 static void
@@ -4589,6 +4704,7 @@ ath_tx_proc_q0(void *arg, int npending)
 	uint32_t txqs;
 
 	ATH_LOCK(sc);
+	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_UNLOCK(sc);
@@ -4604,6 +4720,10 @@ ath_tx_proc_q0(void *arg, int npending)
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
+	ATH_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_UNLOCK(sc);
+
 	ath_start(ifp);
 }
 
@@ -4620,6 +4740,7 @@ ath_tx_proc_q0123(void *arg, int npending)
 	uint32_t txqs;
 
 	ATH_LOCK(sc);
+	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_UNLOCK(sc);
@@ -4647,6 +4768,10 @@ ath_tx_proc_q0123(void *arg, int npending)
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
+	ATH_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_UNLOCK(sc);
+
 	ath_start(ifp);
 }
 
@@ -4662,6 +4787,7 @@ ath_tx_proc(void *arg, int npending)
 	uint32_t txqs;
 
 	ATH_LOCK(sc);
+	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
 	ATH_UNLOCK(sc);
@@ -4682,6 +4808,10 @@ ath_tx_proc(void *arg, int npending)
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
+	ATH_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_UNLOCK(sc);
+
 	ath_start(ifp);
 }
 #undef	TXQACTIVE
@@ -4863,9 +4993,14 @@ ath_draintxq(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
 
 	(void) ath_stoptxdma(sc);
 
-	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
-		if (ATH_TXQ_SETUP(sc, i))
-			ath_tx_draintxq(sc, &sc->sc_txq[i]);
+	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
+		if (ATH_TXQ_SETUP(sc, i)) {
+			if (reset_type == ATH_RESET_NOLOSS)
+				ath_tx_processq(sc, &sc->sc_txq[i], 0);
+			else
+				ath_tx_draintxq(sc, &sc->sc_txq[i]);
+		}
+	}
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & ATH_DEBUG_RESET) {
 		struct ath_buf *bf = TAILQ_FIRST(&sc->sc_bbuf);
@@ -4984,6 +5119,16 @@ ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
+	int ret = 0;
+
+	/* Treat this as an interface reset */
+	ATH_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0)
+		device_printf(sc->sc_dev, "%s: danger! concurrent reset!\n",
+		    __func__);
+	sc->sc_inreset_cnt++;
+	ATH_UNLOCK(sc);
+	ath_txrx_stop(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %u (%u MHz, flags 0x%x)\n",
 	    __func__, ieee80211_chan2ieee(ic, chan),
@@ -5004,7 +5149,8 @@ ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 			    "channel %u (%u MHz, flags 0x%x), hal status %u\n",
 			    __func__, ieee80211_chan2ieee(ic, chan),
 			    chan->ic_freq, chan->ic_flags, status);
-			return EIO;
+			ret = EIO;
+			goto finish;
 		}
 		sc->sc_diversity = ath_hal_getdiversity(ah);
 
@@ -5017,7 +5163,8 @@ ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 		if (ath_startrecv(sc) != 0) {
 			if_printf(ifp, "%s: unable to restart recv logic\n",
 			    __func__);
-			return EIO;
+			ret = EIO;
+			goto finish;
 		}
 
 		/*
@@ -5044,7 +5191,14 @@ ath_chan_set(struct ath_softc *sc, struct ieee80211_channel *chan)
 		 */
 		ath_hal_intrset(ah, sc->sc_imask);
 	}
-	return 0;
+
+finish:
+	ATH_LOCK(sc);
+	sc->sc_inreset_cnt--;
+	ATH_UNLOCK(sc);
+	ath_txrx_start(sc);
+
+	return ret;
 }
 
 /*
@@ -5087,7 +5241,18 @@ ath_calibrate(void *arg)
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 				"%s: rfgain change\n", __func__);
 			sc->sc_stats.ast_per_rfgain++;
+			/*
+			 * Drop lock - we can't hold it across the
+			 * ath_reset() call. Instead, we'll drop
+			 * out here, do a reset, then reschedule
+			 * the callout.
+			 */
+			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
+			sc->sc_resetcal = 0;
+			sc->sc_doresetcal = AH_TRUE;
+			ATH_UNLOCK(sc);
 			ath_reset(ifp, ATH_RESET_NOLOSS);
+			return;
 		}
 		/*
 		 * If this long cal is after an idle period, then
@@ -5742,6 +5907,7 @@ static void
 ath_watchdog(void *arg)
 {
 	struct ath_softc *sc = arg;
+	int do_reset = 0;
 
 	if (sc->sc_wd_timer != 0 && --sc->sc_wd_timer == 0) {
 		struct ifnet *ifp = sc->sc_ifp;
@@ -5753,10 +5919,20 @@ ath_watchdog(void *arg)
 			    hangs & 0xff ? "bb" : "mac", hangs);
 		} else
 			if_printf(ifp, "device timeout\n");
-		ath_reset(ifp, ATH_RESET_NOLOSS);
+		do_reset = 1;
 		ifp->if_oerrors++;
 		sc->sc_stats.ast_watchdog++;
 	}
+
+	/*
+	 * We can't hold the lock across the ath_reset() call.
+	 */
+	if (do_reset) {
+		ATH_UNLOCK(sc);
+		ath_reset(sc->sc_ifp, ATH_RESET_NOLOSS);
+		ATH_LOCK(sc);
+	}
+
 	callout_schedule(&sc->sc_wd_ch, hz);
 }
 
diff --git a/sys/dev/ath/if_ath_tx.c b/sys/dev/ath/if_ath_tx.c
index b8c4c80..1a8397f 100644
--- a/sys/dev/ath/if_ath_tx.c
+++ b/sys/dev/ath/if_ath_tx.c
@@ -477,8 +477,6 @@ ath_tx_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
 	txq->axq_link = &bf->bf_lastds->ds_link;
 }
 
-
-
 /*
  * Hand-off packet to a hardware queue.
  */
@@ -501,6 +499,29 @@ ath_tx_handoff_hw(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf)
 	KASSERT(txq->axq_qnum != ATH_TXQ_SWQ,
 	     ("ath_tx_handoff_hw called for mcast queue"));
 
+	ATH_LOCK(sc);
+	if (sc->sc_inreset_cnt) {
+		ATH_UNLOCK(sc);
+		DPRINTF(sc, ATH_DEBUG_RESET,
+		    "%s: called with sc_in_reset != 0\n",
+		    __func__);
+		DPRINTF(sc, ATH_DEBUG_XMIT,
+		    "%s: queued: TXDP[%u] = %p (%p) depth %d\n",
+		    __func__, txq->axq_qnum,
+		    (caddr_t)bf->bf_daddr, bf->bf_desc,
+		    txq->axq_depth);
+		ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
+		if (bf->bf_state.bfs_aggr)
+			txq->axq_aggr_depth++;
+		/*
+		 * There's no need to update axq_link; the hardware
+		 * is in reset and once the reset is complete, any
+		 * non-empty queues will simply have DMA restarted.
+		 */
+		return;
+	}
+	ATH_UNLOCK(sc);
+
 	/* For now, so not to generate whitespace diffs */
 	if (1) {
 #ifdef IEEE80211_SUPPORT_TDMA
@@ -1688,6 +1709,17 @@ ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
 	struct ath_buf *bf;
 	int error;
 
+	ATH_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev, "%s: sc_inreset_cnt > 0; bailing\n",
+		    __func__);
+		error = EIO;
+		ATH_UNLOCK(sc);
+		goto bad0;
+	}
+	sc->sc_txstart_cnt++;
+	ATH_UNLOCK(sc);
+
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__,
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ?
@@ -1730,15 +1762,24 @@ ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
 	ifp->if_opackets++;
 	sc->sc_stats.ast_tx_raw++;
 
+	ATH_LOCK(sc);
+	sc->sc_txstart_cnt--;
+	ATH_UNLOCK(sc);
+
 	return 0;
 bad2:
 	ATH_TXBUF_LOCK(sc);
 	TAILQ_INSERT_HEAD(&sc->sc_txbuf, bf, bf_list);
 	ATH_TXBUF_UNLOCK(sc);
 bad:
+	ATH_LOCK(sc);
+	sc->sc_txstart_cnt--;
+	ATH_UNLOCK(sc);
+bad0:
 	ifp->if_oerrors++;
 	sc->sc_stats.ast_tx_raw_fail++;
 	ieee80211_free_node(ni);
+
 	return error;
 }
 
diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h
index 21829a62..44762a0 100644
--- a/sys/dev/ath/if_athvar.h
+++ b/sys/dev/ath/if_athvar.h
@@ -424,6 +424,7 @@ struct ath_softc {
 	u_int			sc_txantenna;	/* tx antenna (fixed or auto) */
 
 	HAL_INT			sc_imask;	/* interrupt mask copy */
+
 	/*
 	 * These are modified in the interrupt handler as well as
 	 * the task queues and other contexts. Thus these must be
@@ -434,6 +435,11 @@ struct ath_softc {
 	 */
 	uint32_t		sc_txq_active;	/* bitmap of active TXQs */
 	uint32_t		sc_kickpcu;	/* whether to kick the PCU */
+	uint32_t		sc_rxproc_cnt;	/* In RX processing */
+	uint32_t		sc_txproc_cnt;	/* In TX processing */
+	uint32_t		sc_txstart_cnt;	/* In TX output (raw/start) */
+	uint32_t		sc_inreset_cnt;	/* In active reset/chanchange */
+	uint32_t		sc_txrx_cnt;	/* refcount on stop/start'ing TX */
 
 	u_int			sc_keymax;	/* size of key cache */
 	u_int8_t		sc_keymap[ATH_KEYBYTES];/* key use bit map */
@@ -553,6 +559,7 @@ struct ath_softc {
 #define	ATH_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define	ATH_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
 #define	ATH_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
+#define	ATH_UNLOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED)
 
 /*
  * The PCU lock is non-recursive and should be treated as a spinlock.
help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?CAJ-VmokEYWi=CWsbYN-RewU6noEmtjr0RxTGMzJJ_-CA3S=uFQ>