Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 19 Jun 2016 07:31:02 +0000 (UTC)
From:      Adrian Chadd <adrian@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r302018 - head/sys/net80211
Message-ID:  <201606190731.u5J7V22Q063663@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: adrian
Date: Sun Jun 19 07:31:02 2016
New Revision: 302018
URL: https://svnweb.freebsd.org/changeset/base/302018

Log:
  [net80211] remove node scan lock / generation number + fix few LORs
  
  Drop scan generation number and node table scan lock - the only place
  where ni_scangen is checked is in ieee80211_timeout_stations() (and it
  is used to prevent duplicate checking of the same node); node scan lock
  protects only this variable + node table scan generation number.
  
  This will fix (at least) next LOR (hostap mode):
  
  lock order reversal:
  1st 0xc175f84c urtwm0_scan_loc (urtwm0_scan_loc) @ /usr/src/sys/modules/wlan/../../net80211/ieee80211_node.c:2019
  2nd 0xc175e018 urtwm0_com_lock (urtwm0_com_lock) @ /usr/src/sys/modules/wlan/../../net80211/ieee80211_node.c:2693
  stack backtrace:
  #0 0xa070d1c5 at witness_debugger+0x75
  #1 0xa070d0f6 at witness_checkorder+0xd46
  #2 0xa0694cce at __mtx_lock_flags+0x9e
  #3 0xb03ad9ef at ieee80211_node_leave+0x12f
  #4 0xb03afd13 at ieee80211_timeout_stations+0x483
  #5 0xb03aa1c2 at ieee80211_node_timeout+0x42
  #6 0xa06c6fa1 at softclock_call_cc+0x1e1
  #7 0xa06c7518 at softclock+0xc8
  #8 0xa06789ae at intr_event_execute_handlers+0x8e
  #9 0xa0678fa0 at ithread_loop+0x90
  #10 0xa0675fbe at fork_exit+0x7e
  #11 0xa08af910 at fork_trampoline+0x8
  
  In addition to the above:
  
  * switch to ieee80211_iterate_nodes();
  * do not assert that node table lock is held, while calling node_age();
    that's not really needed (there are no resources, which can be protected
    by this lock) + this fixes LOR/deadlock between ieee80211_timeout_stations()
    and ieee80211_set_tim() (easy to reproduce in HOSTAP mode while
    sending something to an STA with enabled power management).
  
  Tested:
  
  * (avos) urtwn0, hostap mode
  * (adrian) AR9380, STA mode
  * (adrian) AR9380, AR9331, AR9580, hostap mode
  
  Notes:
  
  * This changes the net80211 internals, so you have to recompile all of it
    and the wifi drivers.
  
  Submitted by:	avos
  Approved by:	re (delphij)
  Differential Revision:	https://reviews.freebsd.org/D6833

Modified:
  head/sys/net80211/ieee80211_ddb.c
  head/sys/net80211/ieee80211_freebsd.h
  head/sys/net80211/ieee80211_node.c
  head/sys/net80211/ieee80211_node.h
  head/sys/net80211/ieee80211_superg.c

Modified: head/sys/net80211/ieee80211_ddb.c
==============================================================================
--- head/sys/net80211/ieee80211_ddb.c	Sun Jun 19 03:45:32 2016	(r302017)
+++ head/sys/net80211/ieee80211_ddb.c	Sun Jun 19 07:31:02 2016	(r302018)
@@ -233,9 +233,8 @@ _db_show_sta(const struct ieee80211_node
 	db_printf("\tvap %p wdsvap %p ic %p table %p\n",
 		ni->ni_vap, ni->ni_wdsvap, ni->ni_ic, ni->ni_table);
 	db_printf("\tflags=%b\n", ni->ni_flags, IEEE80211_NODE_BITS);
-	db_printf("\tscangen %u authmode %u ath_flags 0x%x ath_defkeyix %u\n",
-		ni->ni_scangen, ni->ni_authmode,
-		ni->ni_ath_flags, ni->ni_ath_defkeyix);
+	db_printf("\tauthmode %u ath_flags 0x%x ath_defkeyix %u\n",
+		ni->ni_authmode, ni->ni_ath_flags, ni->ni_ath_defkeyix);
 	db_printf("\tassocid 0x%x txpower %u vlan %u\n",
 		ni->ni_associd, ni->ni_txpower, ni->ni_vlan);
 	db_printf("\tjointime %d (%lu secs) challenge %p\n",
@@ -688,8 +687,6 @@ _db_show_node_table(const char *tag, con
 	db_printf("%s%s@%p:\n", tag, nt->nt_name, nt);
 	db_printf("%s nodelock %p", tag, &nt->nt_nodelock);
 	db_printf(" inact_init %d", nt->nt_inact_init);
-	db_printf(" scanlock %p", &nt->nt_scanlock);
-	db_printf(" scangen %u\n", nt->nt_scangen);
 	db_printf("%s keyixmax %d keyixmap %p\n",
 	    tag, nt->nt_keyixmax, nt->nt_keyixmap);
 	for (i = 0; i < nt->nt_keyixmax; i++) {

Modified: head/sys/net80211/ieee80211_freebsd.h
==============================================================================
--- head/sys/net80211/ieee80211_freebsd.h	Sun Jun 19 03:45:32 2016	(r302017)
+++ head/sys/net80211/ieee80211_freebsd.h	Sun Jun 19 07:31:02 2016	(r302018)
@@ -107,28 +107,6 @@ typedef struct {
 	mtx_assert(IEEE80211_NODE_LOCK_OBJ(_nt), MA_OWNED)
 
 /*
- * Node table iteration locking definitions; this protects the
- * scan generation # used to iterate over the station table
- * while grabbing+releasing the node lock.
- */
-typedef struct {
-	char		name[16];		/* e.g. "ath0_scan_lock" */
-	struct mtx	mtx;
-} ieee80211_scan_lock_t;
-#define	IEEE80211_NODE_ITERATE_LOCK_INIT(_nt, _name) do {		\
-	ieee80211_scan_lock_t *sl = &(_nt)->nt_scanlock;		\
-	snprintf(sl->name, sizeof(sl->name), "%s_scan_lock", _name);	\
-	mtx_init(&sl->mtx, sl->name, NULL, MTX_DEF);			\
-} while (0)
-#define	IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt)	(&(_nt)->nt_scanlock.mtx)
-#define	IEEE80211_NODE_ITERATE_LOCK_DESTROY(_nt) \
-	mtx_destroy(IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt))
-#define	IEEE80211_NODE_ITERATE_LOCK(_nt) \
-	mtx_lock(IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt))
-#define	IEEE80211_NODE_ITERATE_UNLOCK(_nt) \
-	mtx_unlock(IEEE80211_NODE_ITERATE_LOCK_OBJ(_nt))
-
-/*
  * Power-save queue definitions. 
  */
 typedef struct mtx ieee80211_psq_lock_t;

Modified: head/sys/net80211/ieee80211_node.c
==============================================================================
--- head/sys/net80211/ieee80211_node.c	Sun Jun 19 03:45:32 2016	(r302017)
+++ head/sys/net80211/ieee80211_node.c	Sun Jun 19 07:31:02 2016	(r302018)
@@ -1099,8 +1099,6 @@ node_age(struct ieee80211_node *ni)
 {
 	struct ieee80211vap *vap = ni->ni_vap;
 
-	IEEE80211_NODE_LOCK_ASSERT(&vap->iv_ic->ic_sta);
-
 	/*
 	 * Age frames on the power save queue.
 	 */
@@ -1922,10 +1920,8 @@ ieee80211_node_table_init(struct ieee802
 
 	nt->nt_ic = ic;
 	IEEE80211_NODE_LOCK_INIT(nt, ic->ic_name);
-	IEEE80211_NODE_ITERATE_LOCK_INIT(nt, ic->ic_name);
 	TAILQ_INIT(&nt->nt_node);
 	nt->nt_name = name;
-	nt->nt_scangen = 1;
 	nt->nt_inact_init = inact;
 	nt->nt_keyixmax = keyixmax;
 	if (nt->nt_keyixmax > 0) {
@@ -1994,159 +1990,137 @@ ieee80211_node_table_cleanup(struct ieee
 		IEEE80211_FREE(nt->nt_keyixmap, M_80211_NODE);
 		nt->nt_keyixmap = NULL;
 	}
-	IEEE80211_NODE_ITERATE_LOCK_DESTROY(nt);
 	IEEE80211_NODE_LOCK_DESTROY(nt);
 }
 
-/*
- * Timeout inactive stations and do related housekeeping.
- * Note that we cannot hold the node lock while sending a
- * frame as this would lead to a LOR.  Instead we use a
- * generation number to mark nodes that we've scanned and
- * drop the lock and restart a scan if we have to time out
- * a node.  Since we are single-threaded by virtue of
- * controlling the inactivity timer we can be sure this will
- * process each node only once.
- */
 static void
-ieee80211_timeout_stations(struct ieee80211com *ic)
+timeout_stations(void *arg __unused, struct ieee80211_node *ni)
 {
-	struct ieee80211_node_table *nt = &ic->ic_sta;
-	struct ieee80211vap *vap;
-	struct ieee80211_node *ni;
-	int gen = 0;
+	struct ieee80211com *ic = ni->ni_ic;
+	struct ieee80211vap *vap = ni->ni_vap;
 
-	IEEE80211_NODE_ITERATE_LOCK(nt);
-	gen = ++nt->nt_scangen;
-restart:
-	IEEE80211_NODE_LOCK(nt);
-	TAILQ_FOREACH(ni, &nt->nt_node, ni_list) {
-		if (ni->ni_scangen == gen)	/* previously handled */
-			continue;
-		ni->ni_scangen = gen;
-		/*
-		 * Ignore entries for which have yet to receive an
-		 * authentication frame.  These are transient and
-		 * will be reclaimed when the last reference to them
-		 * goes away (when frame xmits complete).
-		 */
-		vap = ni->ni_vap;
-		/*
-		 * Only process stations when in RUN state.  This
-		 * insures, for example, that we don't timeout an
-		 * inactive station during CAC.  Note that CSA state
-		 * is actually handled in ieee80211_node_timeout as
-		 * it applies to more than timeout processing.
-		 */
-		if (vap->iv_state != IEEE80211_S_RUN)
-			continue;
-		/* XXX can vap be NULL? */
-		if ((vap->iv_opmode == IEEE80211_M_HOSTAP ||
-		     vap->iv_opmode == IEEE80211_M_STA) &&
-		    (ni->ni_flags & IEEE80211_NODE_AREF) == 0)
-			continue;
+	/*
+	 * Only process stations when in RUN state.  This
+	 * insures, for example, that we don't timeout an
+	 * inactive station during CAC.  Note that CSA state
+	 * is actually handled in ieee80211_node_timeout as
+	 * it applies to more than timeout processing.
+	 */
+	if (vap->iv_state != IEEE80211_S_RUN)
+		return;
+	/*
+	 * Ignore entries for which have yet to receive an
+	 * authentication frame.  These are transient and
+	 * will be reclaimed when the last reference to them
+	 * goes away (when frame xmits complete).
+	 */
+	if ((vap->iv_opmode == IEEE80211_M_HOSTAP ||
+	     vap->iv_opmode == IEEE80211_M_STA) &&
+	    (ni->ni_flags & IEEE80211_NODE_AREF) == 0)
+		return;
+	/*
+	 * Free fragment if not needed anymore
+	 * (last fragment older than 1s).
+	 * XXX doesn't belong here, move to node_age
+	 */
+	if (ni->ni_rxfrag[0] != NULL &&
+	    ticks > ni->ni_rxfragstamp + hz) {
+		m_freem(ni->ni_rxfrag[0]);
+		ni->ni_rxfrag[0] = NULL;
+	}
+	if (ni->ni_inact > 0) {
+		ni->ni_inact--;
+		IEEE80211_NOTE(vap, IEEE80211_MSG_INACT, ni,
+		    "%s: inact %u inact_reload %u nrates %u",
+		    __func__, ni->ni_inact, ni->ni_inact_reload,
+		    ni->ni_rates.rs_nrates);
+	}
+	/*
+	 * Special case ourself; we may be idle for extended periods
+	 * of time and regardless reclaiming our state is wrong.
+	 * XXX run ic_node_age
+	 */
+	/* XXX before inact decrement? */
+	if (ni == vap->iv_bss)
+		return;
+	if (ni->ni_associd != 0 || 
+	    (vap->iv_opmode == IEEE80211_M_IBSS ||
+	     vap->iv_opmode == IEEE80211_M_AHDEMO)) {
 		/*
-		 * Free fragment if not needed anymore
-		 * (last fragment older than 1s).
-		 * XXX doesn't belong here, move to node_age
+		 * Age/drain resources held by the station.
 		 */
-		if (ni->ni_rxfrag[0] != NULL &&
-		    ticks > ni->ni_rxfragstamp + hz) {
-			m_freem(ni->ni_rxfrag[0]);
-			ni->ni_rxfrag[0] = NULL;
-		}
-		if (ni->ni_inact > 0) {
-			ni->ni_inact--;
-			IEEE80211_NOTE(vap, IEEE80211_MSG_INACT, ni,
-			    "%s: inact %u inact_reload %u nrates %u",
-			    __func__, ni->ni_inact, ni->ni_inact_reload,
-			    ni->ni_rates.rs_nrates);
-		}
+		ic->ic_node_age(ni);
 		/*
-		 * Special case ourself; we may be idle for extended periods
-		 * of time and regardless reclaiming our state is wrong.
-		 * XXX run ic_node_age
+		 * Probe the station before time it out.  We
+		 * send a null data frame which may not be
+		 * universally supported by drivers (need it
+		 * for ps-poll support so it should be...).
+		 *
+		 * XXX don't probe the station unless we've
+		 *     received a frame from them (and have
+		 *     some idea of the rates they are capable
+		 *     of); this will get fixed more properly
+		 *     soon with better handling of the rate set.
 		 */
-		if (ni == vap->iv_bss)
-			continue;
-		if (ni->ni_associd != 0 || 
-		    (vap->iv_opmode == IEEE80211_M_IBSS ||
-		     vap->iv_opmode == IEEE80211_M_AHDEMO)) {
-			/*
-			 * Age/drain resources held by the station.
-			 */
-			ic->ic_node_age(ni);
-			/*
-			 * Probe the station before time it out.  We
-			 * send a null data frame which may not be
-			 * universally supported by drivers (need it
-			 * for ps-poll support so it should be...).
-			 *
-			 * XXX don't probe the station unless we've
-			 *     received a frame from them (and have
-			 *     some idea of the rates they are capable
-			 *     of); this will get fixed more properly
-			 *     soon with better handling of the rate set.
-			 */
-			if ((vap->iv_flags_ext & IEEE80211_FEXT_INACT) &&
-			    (0 < ni->ni_inact &&
-			     ni->ni_inact <= vap->iv_inact_probe) &&
-			    ni->ni_rates.rs_nrates != 0) {
-				IEEE80211_NOTE(vap,
-				    IEEE80211_MSG_INACT | IEEE80211_MSG_NODE,
-				    ni, "%s",
-				    "probe station due to inactivity");
-				/*
-				 * Grab a reference before unlocking the table
-				 * so the node cannot be reclaimed before we
-				 * send the frame. ieee80211_send_nulldata
-				 * understands we've done this and reclaims the
-				 * ref for us as needed.
-				 */
-				ieee80211_ref_node(ni);
-				IEEE80211_NODE_UNLOCK(nt);
-				ieee80211_send_nulldata(ni);
-				/* XXX stat? */
-				goto restart;
-			}
-		}
 		if ((vap->iv_flags_ext & IEEE80211_FEXT_INACT) &&
-		    ni->ni_inact <= 0) {
+		    (0 < ni->ni_inact &&
+		     ni->ni_inact <= vap->iv_inact_probe) &&
+		    ni->ni_rates.rs_nrates != 0) {
 			IEEE80211_NOTE(vap,
-			    IEEE80211_MSG_INACT | IEEE80211_MSG_NODE, ni,
-			    "station timed out due to inactivity "
-			    "(refcnt %u)", ieee80211_node_refcnt(ni));
+			    IEEE80211_MSG_INACT | IEEE80211_MSG_NODE,
+			    ni, "%s",
+			    "probe station due to inactivity");
 			/*
-			 * Send a deauthenticate frame and drop the station.
-			 * This is somewhat complicated due to reference counts
-			 * and locking.  At this point a station will typically
-			 * have a reference count of 1.  ieee80211_node_leave
-			 * will do a "free" of the node which will drop the
-			 * reference count.  But in the meantime a reference
-			 * wil be held by the deauth frame.  The actual reclaim
-			 * of the node will happen either after the tx is
-			 * completed or by ieee80211_node_leave.
-			 *
-			 * Separately we must drop the node lock before sending
-			 * in case the driver takes a lock, as this can result
-			 * in a LOR between the node lock and the driver lock.
+			 * Grab a reference so the node cannot
+			 * be reclaimed before we send the frame.
+			 * ieee80211_send_nulldata understands
+			 * we've done this and reclaims the
+			 * ref for us as needed.
 			 */
+			/* XXX fix this (not required anymore). */
 			ieee80211_ref_node(ni);
-			IEEE80211_NODE_UNLOCK(nt);
-			if (ni->ni_associd != 0) {
-				IEEE80211_SEND_MGMT(ni,
-				    IEEE80211_FC0_SUBTYPE_DEAUTH,
-				    IEEE80211_REASON_AUTH_EXPIRE);
-			}
-			ieee80211_node_leave(ni);
-			ieee80211_free_node(ni);
-			vap->iv_stats.is_node_timeout++;
-			goto restart;
+			/* XXX useless */
+			ieee80211_send_nulldata(ni);
+			/* XXX stat? */
+			return;
 		}
 	}
-	IEEE80211_NODE_UNLOCK(nt);
+	if ((vap->iv_flags_ext & IEEE80211_FEXT_INACT) &&
+	    ni->ni_inact <= 0) {
+		IEEE80211_NOTE(vap,
+		    IEEE80211_MSG_INACT | IEEE80211_MSG_NODE, ni,
+		    "station timed out due to inactivity "
+		    "(refcnt %u)", ieee80211_node_refcnt(ni));
+		/*
+		 * Send a deauthenticate frame and drop the station.
+		 * This is somewhat complicated due to reference counts
+		 * and locking.  At this point a station will typically
+		 * have a reference count of 2.  ieee80211_node_leave
+		 * will do a "free" of the node which will drop the
+		 * reference count.  But in the meantime a reference
+		 * wil be held by the deauth frame.  The actual reclaim
+		 * of the node will happen either after the tx is
+		 * completed or by ieee80211_node_leave.
+		 */
+		if (ni->ni_associd != 0) {
+			IEEE80211_SEND_MGMT(ni,
+			    IEEE80211_FC0_SUBTYPE_DEAUTH,
+			    IEEE80211_REASON_AUTH_EXPIRE);
+		}
+		ieee80211_node_leave(ni);
+		vap->iv_stats.is_node_timeout++;
+	}
+}
 
-	IEEE80211_NODE_ITERATE_UNLOCK(nt);
+/*
+ * Timeout inactive stations and do related housekeeping.
+ */
+static void
+ieee80211_timeout_stations(struct ieee80211com *ic)
+{
+	struct ieee80211_node_table *nt = &ic->ic_sta;
+
+	ieee80211_iterate_nodes(nt, timeout_stations, NULL);
 }
 
 /*
@@ -2247,23 +2221,12 @@ int
 ieee80211_iterate_nt(struct ieee80211_node_table *nt,
     struct ieee80211_node **ni_arr, uint16_t max_aid)
 {
-	u_int gen;
 	int i, j, ret;
 	struct ieee80211_node *ni;
 
-	IEEE80211_NODE_ITERATE_LOCK(nt);
 	IEEE80211_NODE_LOCK(nt);
 
-	gen = ++nt->nt_scangen;
 	i = ret = 0;
-
-	/*
-	 * We simply assume here that since the node
-	 * scan generation doesn't change (as
-	 * we are holding both the node table and
-	 * node table iteration locks), we can simply
-	 * assign it to the node here.
-	 */
 	TAILQ_FOREACH(ni, &nt->nt_node, ni_list) {
 		if (i >= max_aid) {
 			ret = E2BIG;
@@ -2272,7 +2235,6 @@ ieee80211_iterate_nt(struct ieee80211_no
 			break;
 		}
 		ni_arr[i] = ieee80211_ref_node(ni);
-		ni_arr[i]->ni_scangen = gen;
 		i++;
 	}
 
@@ -2287,7 +2249,6 @@ ieee80211_iterate_nt(struct ieee80211_no
 	 * ieee80211_free_node().
 	 */
 	IEEE80211_NODE_UNLOCK(nt);
-	IEEE80211_NODE_ITERATE_UNLOCK(nt);
 
 	/*
 	 * If ret is non-zero, we hit some kind of error.
@@ -2362,8 +2323,8 @@ ieee80211_dump_node(struct ieee80211_nod
 {
 	printf("0x%p: mac %s refcnt %d\n", ni,
 		ether_sprintf(ni->ni_macaddr), ieee80211_node_refcnt(ni));
-	printf("\tscangen %u authmode %u flags 0x%x\n",
-		ni->ni_scangen, ni->ni_authmode, ni->ni_flags);
+	printf("\tauthmode %u flags 0x%x\n",
+		ni->ni_authmode, ni->ni_flags);
 	printf("\tassocid 0x%x txpower %u vlan %u\n",
 		ni->ni_associd, ni->ni_txpower, ni->ni_vlan);
 	printf("\ttxseq %u rxseq %u fragno %u rxfragstamp %u\n",

Modified: head/sys/net80211/ieee80211_node.h
==============================================================================
--- head/sys/net80211/ieee80211_node.h	Sun Jun 19 03:45:32 2016	(r302017)
+++ head/sys/net80211/ieee80211_node.h	Sun Jun 19 07:31:02 2016	(r302018)
@@ -115,7 +115,6 @@ struct ieee80211_node {
 	TAILQ_ENTRY(ieee80211_node) ni_list;	/* list of all nodes */
 	LIST_ENTRY(ieee80211_node) ni_hash;	/* hash collision list */
 	u_int			ni_refcnt;	/* count of held references */
-	u_int			ni_scangen;	/* gen# for timeout scan */
 	u_int			ni_flags;
 #define	IEEE80211_NODE_AUTH	0x000001	/* authorized for data */
 #define	IEEE80211_NODE_QOS	0x000002	/* QoS enabled */
@@ -360,8 +359,6 @@ struct ieee80211_node_table {
 	struct ieee80211_node	**nt_keyixmap;	/* key ix -> node map */
 	int			nt_keyixmax;	/* keyixmap size */
 	const char		*nt_name;	/* table name for debug msgs */
-	ieee80211_scan_lock_t	nt_scanlock;	/* on nt_scangen */
-	u_int			nt_scangen;	/* gen# for iterators */
 	int			nt_inact_init;	/* initial node inact setting */
 };
 

Modified: head/sys/net80211/ieee80211_superg.c
==============================================================================
--- head/sys/net80211/ieee80211_superg.c	Sun Jun 19 03:45:32 2016	(r302017)
+++ head/sys/net80211/ieee80211_superg.c	Sun Jun 19 07:31:02 2016	(r302018)
@@ -912,6 +912,12 @@ ieee80211_ff_node_init(struct ieee80211_
 	ieee80211_ff_node_cleanup(ni);
 }
 
+/*
+ * Note: this comlock acquisition LORs with the node lock:
+ *
+ * 1: sta_join1 -> NODE_LOCK -> node_free -> node_cleanup -> ff_node_cleanup -> COM_LOCK
+ * 2: TBD
+ */
 void
 ieee80211_ff_node_cleanup(struct ieee80211_node *ni)
 {



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201606190731.u5J7V22Q063663>