Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 18 Nov 2011 05:06:30 +0000 (UTC)
From:      Adrian Chadd <adrian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r227651 - head/sys/dev/ath
Message-ID:  <201111180506.pAI56UYD051422@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: adrian
Date: Fri Nov 18 05:06:30 2011
New Revision: 227651
URL: http://svn.freebsd.org/changeset/base/227651

Log:
  Flesh out some slightly dirty reset/channel change serialisation code
  for the ath(4) driver.
  
  Currently, there's nothing stopping reset, channel change and general
  TX/RX from overlapping with each other. This wasn't a big deal with
  pre-11n traffic as it just results in some dropped frames.
  It's possible this may have also caused some inconsistencies and
  badly-setup hardware.
  
  Since locks can't be held across all of this (the Linux solution)
  due to LORs with the network stack locks, some state counter
  variables are used to track what parts of the code the driver is
  currently in.
  
  When the hardware is being reset, it disables the taskqueue and
  waits for pending interrupts, tx, rx and tx completion before
  it begins the reset or channel change.
  
  TX and RX both abort if called during an active reset or channel
  change.
  
  Finally, the reset path now doesn't flush frames if ATH_RESET_NOLOSS
  is set. Instead, completed TX and RX frames are passed back up to
  net80211 before the reset occurs.
  
  This is not without problems:
  
  * Raw frame xmit are just dropped, rather than placed on a queue.
    The net80211 stack should be the one which queues these frames
    rather than the driver.
  
  * It's all very messy. It'd be better if these hardware operations
    were serialised on some kind of work queue, rather than hoping
    they can be run in parallel.
  
  * The taskqueue block/unblock may occur in parallel with the
    newstate() function - which shuts down the taskqueue and restarts
    it once the new state is known. It's likely these operations should
    be refcounted so the taskqueue is restored once no other areas
    in the code wish to suspend operations.
  
  * .. interrupt disable/enable should likely be refcounted as well.
  
  With this work, the driver does not drop frames during stuck beacon
  or fatal errors and thus 11n traffic continues to run correctly.
  Default and full resets however do still drop frames and it's possible
  this may occur, causing traffic loss and session stalls.
  
  Sponsored by:	Hobnob, Inc.

Modified:
  head/sys/dev/ath/if_ath.c
  head/sys/dev/ath/if_ath_tx.c
  head/sys/dev/ath/if_athvar.h

Modified: head/sys/dev/ath/if_ath.c
==============================================================================
--- head/sys/dev/ath/if_ath.c	Fri Nov 18 03:05:20 2011	(r227650)
+++ head/sys/dev/ath/if_ath.c	Fri Nov 18 05:06:30 2011	(r227651)
@@ -1359,16 +1359,36 @@ ath_intr(void *arg)
 	HAL_INT status = 0;
 	uint32_t txqs;
 
+	/*
+	 * If we're inside a reset path, just print a warning and
+	 * clear the ISR. The reset routine will finish it for us.
+	 */
+	ATH_PCU_LOCK(sc);
+	if (sc->sc_inreset_cnt) {
+		HAL_INT status;
+		ath_hal_getisr(ah, &status);	/* clear ISR */
+		ath_hal_intrset(ah, 0);		/* disable further intr's */
+		DPRINTF(sc, ATH_DEBUG_ANY,
+		    "%s: in reset, ignoring: status=0x%x\n",
+		    __func__, status);
+		ATH_PCU_UNLOCK(sc);
+		return;
+	}
+
 	if (sc->sc_invalid) {
 		/*
 		 * The hardware is not ready/present, don't touch anything.
 		 * Note this can happen early on if the IRQ is shared.
 		 */
 		DPRINTF(sc, ATH_DEBUG_ANY, "%s: invalid; ignored\n", __func__);
+		ATH_PCU_UNLOCK(sc);
 		return;
 	}
-	if (!ath_hal_intrpend(ah))		/* shared irq, not for us */
+	if (!ath_hal_intrpend(ah)) {		/* shared irq, not for us */
+		ATH_PCU_UNLOCK(sc);
 		return;
+	}
+
 	if ((ifp->if_flags & IFF_UP) == 0 ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
 		HAL_INT status;
@@ -1377,8 +1397,10 @@ ath_intr(void *arg)
 			__func__, ifp->if_flags);
 		ath_hal_getisr(ah, &status);	/* clear ISR */
 		ath_hal_intrset(ah, 0);		/* disable further intr's */
+		ATH_PCU_UNLOCK(sc);
 		return;
 	}
+
 	/*
 	 * Figure out the reason(s) for the interrupt.  Note
 	 * that the hal returns a pseudo-ISR that may include
@@ -1400,9 +1422,23 @@ ath_intr(void *arg)
 	status &= sc->sc_imask;			/* discard unasked for bits */
 
 	/* Short-circuit un-handled interrupts */
-	if (status == 0x0)
+	if (status == 0x0) {
+		ATH_PCU_UNLOCK(sc);
 		return;
+	}
+
+	/*
+	 * Take a note that we're inside the interrupt handler, so
+	 * the reset routines know to wait.
+	 */
+	sc->sc_intr_cnt++;
+	ATH_PCU_UNLOCK(sc);
 
+	/*
+	 * Handle the interrupt. We won't run concurrent with the reset
+	 * or channel change routines as they'll wait for sc_intr_cnt
+	 * to be 0 before continuing.
+	 */
 	if (status & HAL_INT_FATAL) {
 		sc->sc_stats.ast_hardware++;
 		ath_hal_intrset(ah, 0);		/* disable intr's until reset */
@@ -1443,6 +1479,7 @@ ath_intr(void *arg)
 		if (status & HAL_INT_RXEOL) {
 			int imask;
 			CTR0(ATH_KTR_ERR, "ath_intr: RXEOL");
+			ATH_PCU_LOCK(sc);
 			/*
 			 * NB: the hardware should re-read the link when
 			 *     RXE bit is written, but it doesn't work at
@@ -1458,7 +1495,6 @@ ath_intr(void *arg)
 			 * by a call to ath_reset() somehow, the
 			 * interrupt mask will be correctly reprogrammed.
 			 */
-			ATH_LOCK(sc);
 			imask = sc->sc_imask;
 			imask &= ~(HAL_INT_RXEOL | HAL_INT_RXORN);
 			ath_hal_intrset(ah, imask);
@@ -1476,13 +1512,13 @@ ath_intr(void *arg)
 			if (! sc->sc_kickpcu)
 				sc->sc_rxlink = NULL;
 			sc->sc_kickpcu = 1;
-			ATH_UNLOCK(sc);
 			/*
 			 * Enqueue an RX proc, to handled whatever
 			 * is in the RX queue.
 			 * This will then kick the PCU.
 			 */
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask);
+			ATH_PCU_UNLOCK(sc);
 		}
 		if (status & HAL_INT_TXURN) {
 			sc->sc_stats.ast_txurn++;
@@ -1500,12 +1536,12 @@ ath_intr(void *arg)
 			 * and blank them. This is the only place we should be
 			 * doing this.
 			 */
-			ATH_LOCK(sc);
+			ATH_PCU_LOCK(sc);
 			txqs = 0xffffffff;
 			ath_hal_gettxintrtxqs(sc->sc_ah, &txqs);
 			sc->sc_txq_active |= txqs;
-			ATH_UNLOCK(sc);
 			taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask);
+			ATH_PCU_UNLOCK(sc);
 		}
 		if (status & HAL_INT_BMISS) {
 			sc->sc_stats.ast_bmiss++;
@@ -1517,6 +1553,7 @@ ath_intr(void *arg)
 			sc->sc_stats.ast_tx_cst++;
 		if (status & HAL_INT_MIB) {
 			sc->sc_stats.ast_mib++;
+			ATH_PCU_LOCK(sc);
 			/*
 			 * Disable interrupts until we service the MIB
 			 * interrupt; otherwise it will continue to fire.
@@ -1533,10 +1570,9 @@ ath_intr(void *arg)
 			 * RXEOL before the rxproc has had a chance
 			 * to run.
 			 */
-			ATH_LOCK(sc);
 			if (sc->sc_kickpcu == 0)
 				ath_hal_intrset(ah, sc->sc_imask);
-			ATH_UNLOCK(sc);
+			ATH_PCU_UNLOCK(sc);
 		}
 		if (status & HAL_INT_RXORN) {
 			/* NB: hal marks HAL_INT_FATAL when RXORN is fatal */
@@ -1544,6 +1580,9 @@ ath_intr(void *arg)
 			sc->sc_stats.ast_rxorn++;
 		}
 	}
+	ATH_PCU_LOCK(sc);
+	sc->sc_intr_cnt--;
+	ATH_PCU_UNLOCK(sc);
 }
 
 static void
@@ -1818,6 +1857,48 @@ ath_stop_locked(struct ifnet *ifp)
 	}
 }
 
+#define	MAX_TXRX_ITERATIONS	1000
+static void
+ath_txrx_stop(struct ath_softc *sc)
+{
+	int i = MAX_TXRX_ITERATIONS;
+
+	ATH_UNLOCK_ASSERT(sc);
+
+	/* Stop any new TX/RX from occuring */
+	taskqueue_block(sc->sc_tq);
+
+	ATH_PCU_LOCK(sc);
+
+	/*
+	 * Sleep until all the pending operations have completed.
+	 *
+	 * The caller must ensure that reset has been incremented
+	 * or the pending operations may continue being queued.
+	 */
+	while (sc->sc_rxproc_cnt || sc->sc_txproc_cnt ||
+	    sc->sc_txstart_cnt || sc->sc_intr_cnt) {
+	    	if (i <= 0)
+			break;
+		msleep(sc, &sc->sc_mtx, 0, "ath_txrx_stop", 1);
+		i--;
+	}
+	ATH_PCU_UNLOCK(sc);
+
+	if (i <= 0)
+		device_printf(sc->sc_dev,
+		    "%s: didn't finish after %d iterations\n",
+		    __func__, MAX_TXRX_ITERATIONS);
+}
+#undef	MAX_TXRX_ITERATIONS
+
+static void
+ath_txrx_start(struct ath_softc *sc)
+{
+
+	taskqueue_unblock(sc->sc_tq);
+}
+
 static void
 ath_stop(struct ifnet *ifp)
 {
@@ -1842,17 +1923,51 @@ ath_reset(struct ifnet *ifp, ATH_RESET_T
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
 	HAL_STATUS status;
+	int i;
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
 
+	/* XXX ensure ATH_LOCK isn't held; ath_rx_proc can't be locked */
+	ATH_PCU_UNLOCK_ASSERT(sc);
+	ATH_UNLOCK_ASSERT(sc);
+
+	ATH_PCU_LOCK(sc);
+	/* XXX if we're already inside a reset, print out a big warning */
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev,
+		    "%s: concurrent ath_reset()! Danger!\n",
+		    __func__);
+	}
+	sc->sc_inreset_cnt++;
 	ath_hal_intrset(ah, 0);		/* disable interrupts */
+	ATH_PCU_UNLOCK(sc);
+
+	/*
+	 * XXX should now wait for pending TX/RX to complete
+	 * and block future ones from occuring.
+	 */
+	ath_txrx_stop(sc);
+
 	ath_draintxq(sc, reset_type);	/* stop xmit side */
+
 	/*
-	 * XXX Don't flush if ATH_RESET_NOLOSS;but we have to first
-	 * XXX need to ensure this doesn't race with an outstanding
-	 * XXX taskqueue call.
+	 * Regardless of whether we're doing a no-loss flush or
+	 * not, stop the PCU and handle what's in the RX queue.
+	 * That way frames aren't dropped which shouldn't be.
+	 */
+	ath_hal_stoppcurecv(ah);
+	ath_hal_setrxfilter(ah, 0);
+	ath_rx_proc(sc, 0);
+
+	/*
+	 * If we're not doing a noloss reset, now call ath_stoprecv().
+	 * This fully stops all of the RX machinery and flushes whatever
+	 * frames are in the RX ring buffer. Hopefully all completed
+	 * frames have been handled at this point.
 	 */
-	ath_stoprecv(sc);		/* stop recv side */
+	if (reset_type != ATH_RESET_NOLOSS)
+		ath_stoprecv(sc);		/* stop recv side */
+
 	ath_settkipmic(sc);		/* configure TKIP MIC handling */
 	/* NB: indicate channel change so we do a full reset */
 	if (!ath_hal_reset(ah, sc->sc_opmode, ic->ic_curchan, AH_TRUE, &status))
@@ -1879,8 +1994,59 @@ ath_reset(struct ifnet *ifp, ATH_RESET_T
 #endif
 			ath_beacon_config(sc, NULL);
 	}
+
+	/*
+	 * Release the reset lock and re-enable interrupts here.
+	 * If an interrupt was being processed in ath_intr(),
+	 * it would disable interrupts at this point. So we have
+	 * to atomically enable interrupts and decrement the
+	 * reset counter - this way ath_intr() doesn't end up
+	 * disabling interrupts without a corresponding enable
+	 * in the rest or channel change path.
+	 */
+	ATH_PCU_LOCK(sc);
+	sc->sc_inreset_cnt--;
+	/* XXX only do this if sc_inreset_cnt == 0? */
 	ath_hal_intrset(ah, sc->sc_imask);
+	ATH_PCU_UNLOCK(sc);
+
+	/*
+	 * TX and RX can be started here. If it were started with
+	 * sc_inreset_cnt > 0, the TX and RX path would abort.
+	 * Thus if this is a nested call through the reset or
+	 * channel change code, TX completion will occur but
+	 * RX completion and ath_start / ath_tx_start will not
+	 * run.
+	 */
+
+	/* Restart TX/RX as needed */
+	ath_txrx_start(sc);
+
+	/* XXX Restart TX completion and pending TX */
+	if (reset_type == ATH_RESET_NOLOSS) {
+		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
+			if (ATH_TXQ_SETUP(sc, i)) {
+				ATH_TXQ_LOCK(&sc->sc_txq[i]);
+				ath_txq_restart_dma(sc, &sc->sc_txq[i]);
+				ath_txq_sched(sc, &sc->sc_txq[i]);
+				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
+			}
+		}
+	}
+
+	/*
+	 * This may have been set during an ath_start() call which
+	 * set this once it detected a concurrent TX was going on.
+	 * So, clear it.
+	 */
+	/* XXX do this inside of IF_LOCK? */
+	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 
+	/* Handle any frames in the TX queue */
+	/*
+	 * XXX should this be done by the caller, rather than
+	 * ath_reset() ?
+	 */
 	ath_start(ifp);			/* restart xmit */
 	return 0;
 }
@@ -2011,6 +2177,7 @@ ath_getbuf(struct ath_softc *sc)
 
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: stop queue\n", __func__);
 		sc->sc_stats.ast_tx_qstop++;
+		/* XXX do this inside of IF_LOCK? */
 		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
 	}
 	ATH_TXBUF_UNLOCK(sc);
@@ -2028,6 +2195,20 @@ ath_start(struct ifnet *ifp)
 
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid)
 		return;
+
+	/* XXX is it ok to hold the ATH_LOCK here? */
+	ATH_PCU_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev,
+		    "%s: sc_inreset_cnt > 0; bailing\n", __func__);
+		/* XXX do this inside of IF_LOCK? */
+		ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+		ATH_PCU_UNLOCK(sc);
+		return;
+	}
+	sc->sc_txstart_cnt++;
+	ATH_PCU_UNLOCK(sc);
+
 	for (;;) {
 		/*
 		 * Grab a TX buffer and associated resources.
@@ -2111,6 +2292,10 @@ ath_start(struct ifnet *ifp)
 
 		sc->sc_wd_timer = 5;
 	}
+
+	ATH_PCU_LOCK(sc);
+	sc->sc_txstart_cnt--;
+	ATH_PCU_UNLOCK(sc);
 }
 
 static int
@@ -3688,6 +3873,14 @@ ath_rx_tasklet(void *arg, int npending)
 
 	CTR1(ATH_KTR_INTR, "ath_rx_proc: pending=%d", npending);
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: pending %u\n", __func__, npending);
+	ATH_PCU_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev,
+		    "%s: sc_inreset_cnt > 0; skipping\n", __func__);
+		ATH_PCU_UNLOCK(sc);
+		return;
+	}
+	ATH_PCU_UNLOCK(sc);
 	ath_rx_proc(sc, 1);
 }
 
@@ -3711,6 +3904,14 @@ ath_rx_proc(struct ath_softc *sc, int re
 	u_int64_t tsf;
 	int npkts = 0;
 
+	/* XXX we must not hold the ATH_LOCK here */
+	ATH_UNLOCK_ASSERT(sc);
+	ATH_PCU_UNLOCK_ASSERT(sc);
+
+	ATH_PCU_LOCK(sc);
+	sc->sc_rxproc_cnt++;
+	ATH_PCU_UNLOCK(sc);
+
 	DPRINTF(sc, ATH_DEBUG_RX_PROC, "%s: called\n", __func__);
 	ngood = 0;
 	nf = ath_hal_getchannoise(ah, sc->sc_curchan);
@@ -4056,24 +4257,29 @@ rx_next:
 	 * need to be handled, kick the PCU if there's
 	 * been an RXEOL condition.
 	 */
+	ATH_PCU_LOCK(sc);
 	if (resched && sc->sc_kickpcu) {
 		CTR0(ATH_KTR_ERR, "ath_rx_proc: kickpcu");
 		device_printf(sc->sc_dev, "%s: kickpcu; handled %d packets\n",
 		    __func__, npkts);
 
 		/* XXX rxslink? */
+		/*
+		 * XXX can we hold the PCU lock here?
+		 * Are there any net80211 buffer calls involved?
+		 */
 		bf = TAILQ_FIRST(&sc->sc_rxbuf);
 		ath_hal_putrxbuf(ah, bf->bf_daddr);
 		ath_hal_rxena(ah);		/* enable recv descriptors */
 		ath_mode_init(sc);		/* set filters, etc. */
 		ath_hal_startpcurecv(ah);	/* re-enable PCU/DMA engine */
 
-		ATH_LOCK(sc);
 		ath_hal_intrset(ah, sc->sc_imask);
 		sc->sc_kickpcu = 0;
-		ATH_UNLOCK(sc);
 	}
+	ATH_PCU_UNLOCK(sc);
 
+	/* XXX check this inside of IF_LOCK? */
 	if (resched && (ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
 #ifdef IEEE80211_SUPPORT_SUPERG
 		ieee80211_ff_age_all(ic, 100);
@@ -4082,6 +4288,10 @@ rx_next:
 			ath_start(ifp);
 	}
 #undef PA2DESC
+
+	ATH_PCU_LOCK(sc);
+	sc->sc_rxproc_cnt--;
+	ATH_PCU_UNLOCK(sc);
 }
 
 static void
@@ -4588,22 +4798,28 @@ ath_tx_proc_q0(void *arg, int npending)
 	struct ifnet *ifp = sc->sc_ifp;
 	uint32_t txqs;
 
-	ATH_LOCK(sc);
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
-	ATH_UNLOCK(sc);
+	ATH_PCU_UNLOCK(sc);
 
 	if (TXQACTIVE(txqs, 0) && ath_tx_processq(sc, &sc->sc_txq[0], 1))
 		/* XXX why is lastrx updated in tx code? */
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 	if (TXQACTIVE(txqs, sc->sc_cabq->axq_qnum))
 		ath_tx_processq(sc, sc->sc_cabq, 1);
+	/* XXX check this inside of IF_LOCK? */
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_PCU_UNLOCK(sc);
+
 	ath_start(ifp);
 }
 
@@ -4619,10 +4835,11 @@ ath_tx_proc_q0123(void *arg, int npendin
 	int nacked;
 	uint32_t txqs;
 
-	ATH_LOCK(sc);
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
-	ATH_UNLOCK(sc);
+	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Process each active queue.
@@ -4641,12 +4858,17 @@ ath_tx_proc_q0123(void *arg, int npendin
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
+	/* XXX check this inside of IF_LOCK? */
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_PCU_UNLOCK(sc);
+
 	ath_start(ifp);
 }
 
@@ -4661,10 +4883,11 @@ ath_tx_proc(void *arg, int npending)
 	int i, nacked;
 	uint32_t txqs;
 
-	ATH_LOCK(sc);
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt++;
 	txqs = sc->sc_txq_active;
 	sc->sc_txq_active &= ~txqs;
-	ATH_UNLOCK(sc);
+	ATH_PCU_UNLOCK(sc);
 
 	/*
 	 * Process each active queue.
@@ -4676,12 +4899,17 @@ ath_tx_proc(void *arg, int npending)
 	if (nacked)
 		sc->sc_lastrx = ath_hal_gettsf64(sc->sc_ah);
 
+	/* XXX check this inside of IF_LOCK? */
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	sc->sc_wd_timer = 0;
 
 	if (sc->sc_softled)
 		ath_led_event(sc, sc->sc_txrix);
 
+	ATH_PCU_LOCK(sc);
+	sc->sc_txproc_cnt--;
+	ATH_PCU_UNLOCK(sc);
+
 	ath_start(ifp);
 }
 #undef	TXQACTIVE
@@ -4863,9 +5091,18 @@ ath_draintxq(struct ath_softc *sc, ATH_R
 
 	(void) ath_stoptxdma(sc);
 
-	for (i = 0; i < HAL_NUM_TX_QUEUES; i++)
-		if (ATH_TXQ_SETUP(sc, i))
-			ath_tx_draintxq(sc, &sc->sc_txq[i]);
+	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
+		/*
+		 * XXX TODO: should we just handle the completed TX frames
+		 * here, whether or not the reset is a full one or not?
+		 */
+		if (ATH_TXQ_SETUP(sc, i)) {
+			if (reset_type == ATH_RESET_NOLOSS)
+				ath_tx_processq(sc, &sc->sc_txq[i], 0);
+			else
+				ath_tx_draintxq(sc, &sc->sc_txq[i]);
+		}
+	}
 #ifdef ATH_DEBUG
 	if (sc->sc_debug & ATH_DEBUG_RESET) {
 		struct ath_buf *bf = TAILQ_FIRST(&sc->sc_bbuf);
@@ -4879,6 +5116,7 @@ ath_draintxq(struct ath_softc *sc, ATH_R
 		}
 	}
 #endif /* ATH_DEBUG */
+	/* XXX check this inside of IF_LOCK? */
 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
 	sc->sc_wd_timer = 0;
 }
@@ -4984,6 +5222,22 @@ ath_chan_set(struct ath_softc *sc, struc
 	struct ifnet *ifp = sc->sc_ifp;
 	struct ieee80211com *ic = ifp->if_l2com;
 	struct ath_hal *ah = sc->sc_ah;
+	int ret = 0;
+	int dointr = 0;
+
+	/* Treat this as an interface reset */
+	ATH_PCU_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0)
+		device_printf(sc->sc_dev, "%s: danger! concurrent reset!\n",
+		    __func__);
+	sc->sc_inreset_cnt++;
+	if (chan != sc->sc_curchan) {
+		dointr = 1;
+		/* XXX only do this if inreset_cnt is 1? */
+		ath_hal_intrset(ah, 0);
+	}
+	ATH_PCU_UNLOCK(sc);
+	ath_txrx_stop(sc);
 
 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: %u (%u MHz, flags 0x%x)\n",
 	    __func__, ieee80211_chan2ieee(ic, chan),
@@ -4996,7 +5250,9 @@ ath_chan_set(struct ath_softc *sc, struc
 		 * hardware at the new frequency, and then re-enable
 		 * the relevant bits of the h/w.
 		 */
+#if 0
 		ath_hal_intrset(ah, 0);		/* disable interrupts */
+#endif
 		ath_draintxq(sc, ATH_RESET_FULL);	/* clear pending tx frames */
 		ath_stoprecv(sc);		/* turn off frame recv */
 		if (!ath_hal_reset(ah, sc->sc_opmode, chan, AH_TRUE, &status)) {
@@ -5004,7 +5260,8 @@ ath_chan_set(struct ath_softc *sc, struc
 			    "channel %u (%u MHz, flags 0x%x), hal status %u\n",
 			    __func__, ieee80211_chan2ieee(ic, chan),
 			    chan->ic_freq, chan->ic_flags, status);
-			return EIO;
+			ret = EIO;
+			goto finish;
 		}
 		sc->sc_diversity = ath_hal_getdiversity(ah);
 
@@ -5017,7 +5274,8 @@ ath_chan_set(struct ath_softc *sc, struc
 		if (ath_startrecv(sc) != 0) {
 			if_printf(ifp, "%s: unable to restart recv logic\n",
 			    __func__);
-			return EIO;
+			ret = EIO;
+			goto finish;
 		}
 
 		/*
@@ -5039,12 +5297,28 @@ ath_chan_set(struct ath_softc *sc, struc
 			ath_beacon_config(sc, NULL);
 		}
 
+#if 0
 		/*
 		 * Re-enable interrupts.
 		 */
 		ath_hal_intrset(ah, sc->sc_imask);
+#endif
 	}
-	return 0;
+
+finish:
+	ATH_PCU_LOCK(sc);
+	sc->sc_inreset_cnt--;
+	/* XXX only do this if sc_inreset_cnt == 0? */
+	if (dointr)
+		ath_hal_intrset(ah, sc->sc_imask);
+	ATH_PCU_UNLOCK(sc);
+
+	/* XXX do this inside of IF_LOCK? */
+	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	ath_txrx_start(sc);
+	/* XXX ath_start? */
+
+	return ret;
 }
 
 /*
@@ -5087,7 +5361,18 @@ ath_calibrate(void *arg)
 			DPRINTF(sc, ATH_DEBUG_CALIBRATE,
 				"%s: rfgain change\n", __func__);
 			sc->sc_stats.ast_per_rfgain++;
+			/*
+			 * Drop lock - we can't hold it across the
+			 * ath_reset() call. Instead, we'll drop
+			 * out here, do a reset, then reschedule
+			 * the callout.
+			 */
+			callout_reset(&sc->sc_cal_ch, 1, ath_calibrate, sc);
+			sc->sc_resetcal = 0;
+			sc->sc_doresetcal = AH_TRUE;
+			ATH_UNLOCK(sc);
 			ath_reset(ifp, ATH_RESET_NOLOSS);
+			return;
 		}
 		/*
 		 * If this long cal is after an idle period, then
@@ -5742,6 +6027,7 @@ static void
 ath_watchdog(void *arg)
 {
 	struct ath_softc *sc = arg;
+	int do_reset = 0;
 
 	if (sc->sc_wd_timer != 0 && --sc->sc_wd_timer == 0) {
 		struct ifnet *ifp = sc->sc_ifp;
@@ -5753,10 +6039,20 @@ ath_watchdog(void *arg)
 			    hangs & 0xff ? "bb" : "mac", hangs);
 		} else
 			if_printf(ifp, "device timeout\n");
-		ath_reset(ifp, ATH_RESET_NOLOSS);
+		do_reset = 1;
 		ifp->if_oerrors++;
 		sc->sc_stats.ast_watchdog++;
 	}
+
+	/*
+	 * We can't hold the lock across the ath_reset() call.
+	 */
+	if (do_reset) {
+		ATH_UNLOCK(sc);
+		ath_reset(sc->sc_ifp, ATH_RESET_NOLOSS);
+		ATH_LOCK(sc);
+	}
+
 	callout_schedule(&sc->sc_wd_ch, hz);
 }
 

Modified: head/sys/dev/ath/if_ath_tx.c
==============================================================================
--- head/sys/dev/ath/if_ath_tx.c	Fri Nov 18 03:05:20 2011	(r227650)
+++ head/sys/dev/ath/if_ath_tx.c	Fri Nov 18 05:06:30 2011	(r227651)
@@ -477,8 +477,6 @@ ath_tx_handoff_mcast(struct ath_softc *s
 	txq->axq_link = &bf->bf_lastds->ds_link;
 }
 
-
-
 /*
  * Hand-off packet to a hardware queue.
  */
@@ -501,6 +499,36 @@ ath_tx_handoff_hw(struct ath_softc *sc, 
 	KASSERT(txq->axq_qnum != ATH_TXQ_SWQ,
 	     ("ath_tx_handoff_hw called for mcast queue"));
 
+#if 0
+	/*
+	 * This causes a LOR. Find out where the PCU lock is being
+	 * held whilst the TXQ lock is grabbed - that shouldn't
+	 * be occuring.
+	 */
+	ATH_PCU_LOCK(sc);
+	if (sc->sc_inreset_cnt) {
+		ATH_PCU_UNLOCK(sc);
+		DPRINTF(sc, ATH_DEBUG_RESET,
+		    "%s: called with sc_in_reset != 0\n",
+		    __func__);
+		DPRINTF(sc, ATH_DEBUG_XMIT,
+		    "%s: queued: TXDP[%u] = %p (%p) depth %d\n",
+		    __func__, txq->axq_qnum,
+		    (caddr_t)bf->bf_daddr, bf->bf_desc,
+		    txq->axq_depth);
+		ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
+		if (bf->bf_state.bfs_aggr)
+			txq->axq_aggr_depth++;
+		/*
+		 * There's no need to update axq_link; the hardware
+		 * is in reset and once the reset is complete, any
+		 * non-empty queues will simply have DMA restarted.
+		 */
+		return;
+		}
+	ATH_PCU_UNLOCK(sc);
+#endif
+
 	/* For now, so not to generate whitespace diffs */
 	if (1) {
 #ifdef IEEE80211_SUPPORT_TDMA
@@ -1688,6 +1716,17 @@ ath_raw_xmit(struct ieee80211_node *ni, 
 	struct ath_buf *bf;
 	int error;
 
+	ATH_PCU_LOCK(sc);
+	if (sc->sc_inreset_cnt > 0) {
+		device_printf(sc->sc_dev, "%s: sc_inreset_cnt > 0; bailing\n",
+		    __func__);
+		error = EIO;
+		ATH_PCU_UNLOCK(sc);
+		goto bad0;
+	}
+	sc->sc_txstart_cnt++;
+	ATH_PCU_UNLOCK(sc);
+
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) {
 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__,
 		    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ?
@@ -1730,15 +1769,24 @@ ath_raw_xmit(struct ieee80211_node *ni, 
 	ifp->if_opackets++;
 	sc->sc_stats.ast_tx_raw++;
 
+	ATH_PCU_LOCK(sc);
+	sc->sc_txstart_cnt--;
+	ATH_PCU_UNLOCK(sc);
+
 	return 0;
 bad2:
 	ATH_TXBUF_LOCK(sc);
 	TAILQ_INSERT_HEAD(&sc->sc_txbuf, bf, bf_list);
 	ATH_TXBUF_UNLOCK(sc);
 bad:
+	ATH_PCU_LOCK(sc);
+	sc->sc_txstart_cnt--;
+	ATH_PCU_UNLOCK(sc);
+bad0:
 	ifp->if_oerrors++;
 	sc->sc_stats.ast_tx_raw_fail++;
 	ieee80211_free_node(ni);
+
 	return error;
 }
 

Modified: head/sys/dev/ath/if_athvar.h
==============================================================================
--- head/sys/dev/ath/if_athvar.h	Fri Nov 18 03:05:20 2011	(r227650)
+++ head/sys/dev/ath/if_athvar.h	Fri Nov 18 05:06:30 2011	(r227651)
@@ -424,6 +424,7 @@ struct ath_softc {
 	u_int			sc_txantenna;	/* tx antenna (fixed or auto) */
 
 	HAL_INT			sc_imask;	/* interrupt mask copy */
+
 	/*
 	 * These are modified in the interrupt handler as well as
 	 * the task queues and other contexts. Thus these must be
@@ -434,6 +435,12 @@ struct ath_softc {
 	 */
 	uint32_t		sc_txq_active;	/* bitmap of active TXQs */
 	uint32_t		sc_kickpcu;	/* whether to kick the PCU */
+	uint32_t		sc_rxproc_cnt;	/* In RX processing */
+	uint32_t		sc_txproc_cnt;	/* In TX processing */
+	uint32_t		sc_txstart_cnt;	/* In TX output (raw/start) */
+	uint32_t		sc_inreset_cnt;	/* In active reset/chanchange */
+	uint32_t		sc_txrx_cnt;	/* refcount on stop/start'ing TX */
+	uint32_t		sc_intr_cnt;	/* refcount on interrupt handling */
 
 	u_int			sc_keymax;	/* size of key cache */
 	u_int8_t		sc_keymap[ATH_KEYBYTES];/* key use bit map */
@@ -553,6 +560,7 @@ struct ath_softc {
 #define	ATH_LOCK(_sc)		mtx_lock(&(_sc)->sc_mtx)
 #define	ATH_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_mtx)
 #define	ATH_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
+#define	ATH_UNLOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED)
 
 /*
  * The PCU lock is non-recursive and should be treated as a spinlock.
@@ -584,6 +592,8 @@ struct ath_softc {
 #define	ATH_PCU_UNLOCK(_sc)		mtx_unlock(&(_sc)->sc_pcu_mtx)
 #define	ATH_PCU_LOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_pcu_mtx,	\
 		MA_OWNED)
+#define	ATH_PCU_UNLOCK_ASSERT(_sc)	mtx_assert(&(_sc)->sc_pcu_mtx,	\
+		MA_NOTOWNED)
 
 #define	ATH_TXQ_SETUP(sc, i)	((sc)->sc_txqsetup & (1<<i))
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201111180506.pAI56UYD051422>