Date: Sun, 18 Jul 2010 21:21:25 -0700 (PDT) From: PseudoCylon <moonlightakkiy@yahoo.ca> To: Hans Petter Selasky <hselasky@c2i.net> Cc: Sam Leffler <sam@freebsd.org>, freebsd-current@freebsd.org, freebsd-usb@freebsd.org Subject: Re: [panic] Race in IEEE802.11 layer towards device drivers Message-ID: <964771.49833.qm@web51808.mail.re2.yahoo.com> In-Reply-To: <201007141511.46190.hselasky@c2i.net> References: <201007141511.46190.hselasky@c2i.net>
next in thread | previous in thread | raw e-mail | index | archive | help
[NB] Obviously, I didn't click "reply ALL" last time, so here are missing part > > > > -if(vap->iv_opmode == IEEE80211_M_HOSTAP){ > > > > > > > > -RUN_LOCK(sc); > > > > +if (vap->iv_opmode == IEEE80211_M_HOSTAP) > > > > > > > > sc->cmdq_key_set = RUN_CMDQ_GO; > > > > > > > > -RUN_UNLOCK(sc); > > > > -} > > > > > > > > > > > > Why are you removing these locks? > > > > It is simple assignment, it must be atomic. > > Not necessarily. If you don't put lock statements around it or use the > volatile keyword, the compiler can re-organize the execution order. I will > have a look at it. > > > > > > Another question: > > > i = RUN_CMDQ_GET(&sc->cmdq_store); > > > DPRINTF("cmdq_store=%d\n", i); > > > > > > sc->cmdq[i].func = run_update_beacon_cb; > > > sc->cmdq[i].arg0 = vap; > > > > > > Why is this code and similar places not enclosed with mutexes? > > > > First, I couldn't use a lock in key_delete() because of LoR. So, I use > > atomic instead. RUN_CMDQ_GET is atomic_fetch_add(). Whatever executes that > > code gets unique place (cmdq[i]) to write, so there shouldn't be any race. > > > > Then out of order execution happened. Specially, when key_set() overtakes > > key_delete(), encryption fails. So, all deferred processes are called back > > via run_cmdq_cb() to maintain the order. Because cmdq functions are first > > written for key_delete() where lock causes LoR. So, lock isn't needed. > > > > run_cmdq_cb() uses lock. But it is for calling callback functions locked. > > So that, functions just call another function locked, like > > ##_callback() > > { > > LOCK(); > > ##_locked(); > > UNLOCK(); > > } > > won't be needed. > > If the run_cmdq_cb() is running at the same time which you are queuing > elements, then I note that you set .func before .arg0. The ##_callback() code > only checks if .func has been set. Actually the "i" increment should be after > that you filled out the data, and then you see that you cannot use atomic. I > think the most simple solution is to add another mutex, sc->sc_cmdq_mtx, which > protects the queue and it's associated data. Also, what do you do if the queue > wraps around? You should have a mechanism to prevent that, because you then > might start executing commands in random order? > Here is a patch (patch against P4 if_run.c rev 14 and if_runvar.h rev 8) -- begin patch -- diff --git a/dev/usb/wlan/if_run.c b/dev/usb/wlan/if_run.c index 8c96534..c988ad4 100644 --- a/dev/usb/wlan/if_run.c +++ b/dev/usb/wlan/if_run.c @@ -90,12 +90,6 @@ SYSCTL_INT(_hw_usb_run, OID_AUTO, debug, CTLFLAG_RW, &run_debug, 0, #define IEEE80211_HAS_ADDR4(wh) \ (((wh)->i_fc[1] & IEEE80211_FC1_DIR_MASK) == IEEE80211_FC1_DIR_DSTODS) -/* - * Because of LOR in run_key_delete(), use atomic instead. - * '& RUN_CMDQ_MASQ' is to loop cmdq[]. - */ -#define RUN_CMDQ_GET(c)(atomic_fetchadd_32((c), 1) & RUN_CMDQ_MASQ) - static const struct usb_device_id run_devs[] = { { USB_VP(USB_VENDOR_ABOCOM,USB_PRODUCT_ABOCOM_RT2770) }, { USB_VP(USB_VENDOR_ABOCOM,USB_PRODUCT_ABOCOM_RT2870) }, @@ -554,6 +548,8 @@ run_attach(device_t self) mtx_init(&sc->sc_mtx, device_get_nameunit(sc->sc_dev), MTX_NETWORK_LOCK, MTX_DEF); +mtx_init(&sc->sc_cmdq_mtx, device_get_nameunit(sc->sc_dev), + MTX_NETWORK_LOCK, MTX_DEF); iface_index = RT2860_IFACE_INDEX; @@ -737,6 +733,7 @@ run_detach(device_t self) } mtx_destroy(&sc->sc_mtx); +mtx_destroy(&sc->sc_cmdq_mtx); return (0); } @@ -830,9 +827,6 @@ run_vap_create(struct ieee80211com *ic, if(sc->rvp_cnt++ == 0) ic->ic_opmode = opmode; -if(opmode == IEEE80211_M_HOSTAP) -sc->cmdq_run = RUN_CMDQ_GO; - DPRINTF("rvp_id=%d bmap=%x rvp_cnt=%d\n", rvp->rvp_id, sc->rvp_bmap, sc->rvp_cnt); @@ -889,27 +883,31 @@ run_cmdq_cb(void *arg, int pending) struct run_softc *sc = arg; uint8_t i; -/* call cmdq[].func locked */ -RUN_LOCK(sc); -for(i = sc->cmdq_exec; sc->cmdq[i].func && pending; - i = sc->cmdq_exec, pending--){ +RUN_CMDQ_LOCK(sc); +for (i = sc->cmdq_exec; sc->cmdq[i].func; i = sc->cmdq_exec) { DPRINTFN(6, "cmdq_exec=%d pending=%d\n", i, pending); -if(sc->cmdq_run == RUN_CMDQ_GO){ +if (sc->cmdq_run == RUN_CMDQ_GO || + (sc->cmdq_key_set == RUN_CMDQ_GO && + sc->cmdq[i].func == run_key_set_cb)) { +RUN_CMDQ_UNLOCK(sc); +RUN_LOCK(sc); /* * If arg0 is NULL, callback func needs more * than one arg. So, pass ptr to cmdq struct. */ -if(sc->cmdq[i].arg0) +if (sc->cmdq[i].arg0) sc->cmdq[i].func(sc->cmdq[i].arg0); else sc->cmdq[i].func(&sc->cmdq[i]); +RUN_UNLOCK(sc); +RUN_CMDQ_LOCK(sc); } sc->cmdq[i].arg0 = NULL; sc->cmdq[i].func = NULL; sc->cmdq_exec++; sc->cmdq_exec &= RUN_CMDQ_MASQ; } -RUN_UNLOCK(sc); +RUN_CMDQ_UNLOCK(sc); } static void @@ -1771,6 +1769,19 @@ run_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) case IEEE80211_S_INIT: restart_ratectl = 1; +/* + * When hostapd has set a key, don't clear it. + * But, when the device is being brought down, clear it. + */ +if (sc->cmdq_key_set != RUN_CMDQ_GO || + ostate == IEEE80211_S_RUN) { +/* clear shared key table */ +run_set_region_4(sc, + RT2860_SKEY(rvp->rvp_id, 0), 0, 4 * 32); +/* clear shared key mode */ +run_set_region_4(sc, RT2860_SKEY_MODE_0_7, 0, 4); +} + if (ostate != IEEE80211_S_RUN) break; @@ -1922,12 +1933,24 @@ run_wme_update(struct ieee80211com *ic) struct run_softc *sc = ic->ic_ifp->if_softc; /* sometime called wothout lock */ -if(mtx_owned(&ic->ic_comlock.mtx)){ -uint32_t i = RUN_CMDQ_GET(&sc->cmdq_store); +if (mtx_owned(&ic->ic_comlock.mtx)) { +RUN_CMDQ_LOCK(sc); +uint8_t i = sc->cmdq_store; DPRINTF("cmdq_store=%d\n", i); + +if (sc->cmdq[i].func != NULL) { +DPRINTF("cmdq is full\n"); +RUN_CMDQ_UNLOCK(sc); +return (-1); +} + sc->cmdq[i].func = run_wme_update_cb; sc->cmdq[i].arg0 = ic; +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; ieee80211_runtask(ic, &sc->cmdq_task); + +RUN_CMDQ_UNLOCK(sc); return (0); } @@ -2085,28 +2108,38 @@ run_key_set(struct ieee80211vap *vap, struct ieee80211_key *k, { struct ieee80211com *ic = vap->iv_ic; struct run_softc *sc = ic->ic_ifp->if_softc; -uint32_t i; +uint8_t i; -i = RUN_CMDQ_GET(&sc->cmdq_store); +RUN_CMDQ_LOCK(sc); + +i = sc->cmdq_store; DPRINTF("cmdq_store=%d\n", i); + +if (sc->cmdq[i].func != NULL) { +DPRINTF("cmdq is full\n"); +RUN_CMDQ_UNLOCK(sc); +return (0); +} + sc->cmdq[i].func = run_key_set_cb; sc->cmdq[i].arg0 = NULL; sc->cmdq[i].arg1 = vap; sc->cmdq[i].k = k; IEEE80211_ADDR_COPY(sc->cmdq[i].mac, mac); +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; ieee80211_runtask(ic, &sc->cmdq_task); /* * To make sure key will be set when hostapd * calls iv_key_set() before if_init(). */ -if(vap->iv_opmode == IEEE80211_M_HOSTAP){ -RUN_LOCK(sc); +if (vap->iv_opmode == IEEE80211_M_HOSTAP) sc->cmdq_key_set = RUN_CMDQ_GO; -RUN_UNLOCK(sc); -} -return(1); +RUN_CMDQ_UNLOCK(sc); + +return (1); } /* @@ -2154,16 +2187,23 @@ run_key_delete(struct ieee80211vap *vap, struct ieee80211_key *k) struct ieee80211com *ic = vap->iv_ic; struct run_softc *sc = ic->ic_ifp->if_softc; struct ieee80211_key *k0; -uint32_t i; +uint8_t i; /* * When called back, key might be gone. So, make a copy * of some values need to delete keys before deferring. - * But, because of LOR with node lock, cannot use lock here. - * So, use atomic instead. */ -i = RUN_CMDQ_GET(&sc->cmdq_store); +RUN_CMDQ_LOCK(sc); + +i = sc->cmdq_store; DPRINTF("cmdq_store=%d\n", i); + +if (sc->cmdq[i].func != NULL) { +DPRINTF("cmdq is full\n"); +RUN_CMDQ_UNLOCK(sc); +return (0); +} + sc->cmdq[i].func = run_key_delete_cb; sc->cmdq[i].arg0 = NULL; sc->cmdq[i].arg1 = sc; @@ -2172,9 +2212,13 @@ run_key_delete(struct ieee80211vap *vap, struct ieee80211_key *k) k0->wk_keyix = k->wk_keyix; /* matching wcid was written to wk_pad in run_key_set() */ k0->wk_pad = k->wk_pad; +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; ieee80211_runtask(ic, &sc->cmdq_task); -return (1);/* return fake success */ +RUN_CMDQ_UNLOCK(sc); + +return (1); } static void @@ -2362,19 +2406,31 @@ run_newassoc(struct ieee80211_node *ni, int isnew) } /* only interested in true associations */ -if (isnew && ni->ni_associd != 0){ - +if (isnew && ni->ni_associd != 0) { /* - * This function could is called though timeout function. + * This function could be called though timeout function. * Need to defer. */ -uint32_t cnt = RUN_CMDQ_GET(&sc->cmdq_store); +RUN_CMDQ_LOCK(sc); + +uint8_t cnt = sc->cmdq_store; DPRINTF("cmdq_store=%d\n", cnt); + +if (sc->cmdq[cnt].func != NULL) { +DPRINTF("cmdq is full\n"); +RUN_CMDQ_UNLOCK(sc); +return; +} + sc->cmdq[cnt].func = run_newassoc_cb; sc->cmdq[cnt].arg0 = NULL; sc->cmdq[cnt].arg1 = ni; sc->cmdq[cnt].wcid = wcid; +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; ieee80211_runtask(ic, &sc->cmdq_task); + +RUN_CMDQ_UNLOCK(sc); } DPRINTF("new assoc isnew=%d associd=%x addr=%s\n", @@ -2805,11 +2861,20 @@ tr_setup: if (error != USB_ERR_CANCELLED) { if (error == USB_ERR_TIMEOUT) { device_printf(sc->sc_dev, "device timeout\n"); -uint32_t i = RUN_CMDQ_GET(&sc->cmdq_store); + +RUN_CMDQ_LOCK(sc); +uint8_t i = sc->cmdq_store; DPRINTF("cmdq_store=%d\n", i); -sc->cmdq[i].func = run_usb_timeout_cb; -sc->cmdq[i].arg0 = vap; -ieee80211_runtask(ic, &sc->cmdq_task); +if (sc->cmdq[i].func != NULL) +DPRINTF("cmdq is full\n"); +else { +sc->cmdq[i].func = run_usb_timeout_cb; +sc->cmdq[i].arg0 = vap; +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; +ieee80211_runtask(ic, &sc->cmdq_task); +} +RUN_CMDQ_UNLOCK(sc); } /* @@ -3067,11 +3132,24 @@ run_tx(struct run_softc *sc, struct mbuf *m, struct ieee80211_node *ni) * With multiple vaps or if_bridge, if_start() is called * with a non-sleepable lock, tcpinp. So, need to defer. */ -uint32_t i = RUN_CMDQ_GET(&sc->cmdq_store); +RUN_CMDQ_LOCK(sc); +uint8_t i = sc->cmdq_store; DPRINTFN(6, "cmdq_store=%d\n", i); -sc->cmdq[i].func = run_drain_fifo; -sc->cmdq[i].arg0 = sc; -ieee80211_runtask(ic, &sc->cmdq_task); +if (sc->cmdq[i].func != NULL) { +DPRINTF("cmdq is full\n"); +/* + * Keep going. + * no need to stop Tx + * just because reading Tx stats failed + */ +} else { +sc->cmdq[i].func = run_drain_fifo; +sc->cmdq[i].arg0 = sc; +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; +ieee80211_runtask(ic, &sc->cmdq_task); +} +RUN_CMDQ_UNLOCK(sc); } } @@ -3188,7 +3266,7 @@ run_sendprot(struct run_softc *sc, ackrate = ieee80211_ack_rate(ic->ic_rt, rate); isshort = (ic->ic_flags & IEEE80211_F_SHPREAMBLE) != 0; -dur = ieee80211_compute_duration(ic->ic_rt, pktlen, rate, isshort); +dur = ieee80211_compute_duration(ic->ic_rt, pktlen, rate, isshort) + ieee80211_ack_duration(ic->ic_rt, rate, isshort); wflags = RT2860_TX_FRAG; @@ -3906,14 +3984,27 @@ run_update_beacon(struct ieee80211vap *vap, int item) { struct ieee80211com *ic = vap->iv_ic; struct run_softc *sc = ic->ic_ifp->if_softc; -uint32_t i; +uint8_t i; + +RUN_CMDQ_LOCK(sc); -i = RUN_CMDQ_GET(&sc->cmdq_store); +i = sc->cmdq_store; DPRINTF("cmdq_store=%d\n", i); + +if (sc->cmdq[i].func != NULL) { +DPRINTF("cmdq is full\n"); +RUN_CMDQ_UNLOCK(sc); +return; +} + sc->cmdq[i].func = run_update_beacon_cb; sc->cmdq[i].arg0 = vap; +sc->cmdq_store++; +sc->cmdq_store &= RUN_CMDQ_MASQ; ieee80211_runtask(ic, &sc->cmdq_task); +RUN_CMDQ_UNLOCK(sc); + return; } @@ -4693,14 +4784,6 @@ run_init_locked(struct run_softc *sc) /* clear WCID attribute table */ run_set_region_4(sc, RT2860_WCID_ATTR(0), 0, 8 * 32); -/* hostapd sets a key before init. So, don't clear it. */ -if(sc->cmdq_key_set != RUN_CMDQ_GO){ -/* clear shared key table */ -run_set_region_4(sc, RT2860_SKEY(0, 0), 0, 8 * 32); -/* clear shared key mode */ -run_set_region_4(sc, RT2860_SKEY_MODE_0_7, 0, 4); -} - run_read(sc, RT2860_US_CYC_CNT, &tmp); tmp = (tmp & ~0xff) | 0x1e; run_write(sc, RT2860_US_CYC_CNT, tmp); @@ -4807,7 +4890,7 @@ run_stop(void *arg) ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->ratectl_run = RUN_RATECTL_OFF; -sc->cmdq_run = sc->cmdq_key_set; +sc->cmdq_run = RUN_CMDQ_ABORT; RUN_UNLOCK(sc); diff --git a/dev/usb/wlan/if_runvar.h b/dev/usb/wlan/if_runvar.h index 39addbf..a6fddaa 100644 --- a/dev/usb/wlan/if_runvar.h +++ b/dev/usb/wlan/if_runvar.h @@ -202,6 +202,7 @@ struct run_softc { uint8_tsc_bssid[6]; struct mtxsc_mtx; +struct mtxsc_cmdq_mtx; struct run_endpoint_queuesc_epq[RUN_EP_QUEUES]; @@ -210,12 +211,12 @@ struct run_softc { uint8_tratectl_run; #define RUN_RATECTL_OFF0 -/* need to be power of 2, otherwise RUN_CMDQ_GET fails */ +/* need to be power of 2, otherwise run_cmdq_cb() fails */ #define RUN_CMDQ_MAX16 #define RUN_CMDQ_MASQ(RUN_CMDQ_MAX - 1) struct run_cmdqcmdq[RUN_CMDQ_MAX]; struct taskcmdq_task; -uint32_tcmdq_store; +uint8_tcmdq_store; uint8_tcmdq_exec; uint8_tcmdq_run; uint8_tcmdq_key_set; @@ -255,4 +256,7 @@ struct run_softc { #define RUN_UNLOCK(sc)mtx_unlock(&(sc)->sc_mtx) #define RUN_LOCK_ASSERT(sc, t)mtx_assert(&(sc)->sc_mtx, t) +#define RUN_CMDQ_LOCK(sc)mtx_lock(&(sc)->sc_cmdq_mtx) +#define RUN_CMDQ_UNLOCK(sc)mtx_unlock(&(sc)->sc_cmdq_mtx) + #endif/* _IF_RUNVAR_H_ */ -- end patch --
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?964771.49833.qm>