Date: Tue, 21 Sep 2021 13:48:30 GMT From: Mark Johnston <markj@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: bcc7e68fcf2c - stable/13 - nd6: Make the DAD callout MPSAFE Message-ID: <202109211348.18LDmUrA035616@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by markj: URL: https://cgit.FreeBSD.org/src/commit/?id=bcc7e68fcf2c34b5f090f55923adc4debc7cc91d commit bcc7e68fcf2c34b5f090f55923adc4debc7cc91d Author: Mark Johnston <markj@FreeBSD.org> AuthorDate: 2021-09-07 13:49:47 +0000 Commit: Mark Johnston <markj@FreeBSD.org> CommitDate: 2021-09-21 13:37:52 +0000 nd6: Make the DAD callout MPSAFE Interface addresses with pending duplicate address detection (DAD) live in a global queue. In this case, a callout is associated with each entry. The callout transmits neighbour solicitations until the system decides the address is no longer tentative, or until a duplicate address is discovered. At this point the entry is dequeued and freed. DAD may be manually stopped as well. The callout currently runs (and potentially transmits packets) with Giant held. Reorganize DAD queue locking to interlock properly with the callout: - Configure the callout to acquire the DAD queue lock before running. The lock is dropped before transmitting any packets. Stop protecting the callout with Giant. - When looking up DAD queue entries for an incoming NS or NA, don't bother fiddling with the DAD queue entry reference count. - Split nd6_dad_starttimer() so that the caller is responsible to transmitting a NS if it so desires. - Remove the DAD entry from the queue before stopping the timer. Use a temporary reference to make sure that the entry doesn't get freed by the callout while we're draining. Reported by: mav Reviewed by: bz, hrs Sponsored by: The FreeBSD Foundation (cherry picked from commit 9a94097cd0f46aacf688973d15b3b462c79f0008) --- sys/netinet6/nd6_nbr.c | 116 +++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 51 deletions(-) diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c index 30d73f9d71a9..2100700733ad 100644 --- a/sys/netinet6/nd6_nbr.c +++ b/sys/netinet6/nd6_nbr.c @@ -84,9 +84,9 @@ static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *); static void nd6_dad_add(struct dadq *dp); static void nd6_dad_del(struct dadq *dp); static void nd6_dad_rele(struct dadq *); -static void nd6_dad_starttimer(struct dadq *, int, int); +static void nd6_dad_starttimer(struct dadq *, int); static void nd6_dad_stoptimer(struct dadq *); -static void nd6_dad_timer(struct dadq *); +static void nd6_dad_timer(void *); static void nd6_dad_duplicated(struct ifaddr *, struct dadq *); static void nd6_dad_ns_output(struct dadq *); static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *); @@ -1135,26 +1135,31 @@ VNET_DEFINE_STATIC(struct rwlock, dad_rwlock); #define V_dadq VNET(dadq) #define V_dad_rwlock VNET(dad_rwlock) -#define DADQ_RLOCK() rw_rlock(&V_dad_rwlock) -#define DADQ_RUNLOCK() rw_runlock(&V_dad_rwlock) -#define DADQ_WLOCK() rw_wlock(&V_dad_rwlock) -#define DADQ_WUNLOCK() rw_wunlock(&V_dad_rwlock) +#define DADQ_LOCKPTR() (&V_dad_rwlock) +#define DADQ_LOCK_INIT() rw_init(DADQ_LOCKPTR(), "nd6 DAD queue") +#define DADQ_RLOCK() rw_rlock(DADQ_LOCKPTR()) +#define DADQ_RUNLOCK() rw_runlock(DADQ_LOCKPTR()) +#define DADQ_WLOCK() rw_wlock(DADQ_LOCKPTR()) +#define DADQ_WUNLOCK() rw_wunlock(DADQ_LOCKPTR()) + +#define DADQ_LOCK_ASSERT() rw_assert(DADQ_LOCKPTR(), RA_LOCKED); +#define DADQ_RLOCK_ASSERT() rw_assert(DADQ_LOCKPTR(), RA_RLOCKED); +#define DADQ_WLOCK_ASSERT() rw_assert(DADQ_LOCKPTR(), RA_WLOCKED); static void nd6_dad_add(struct dadq *dp) { + DADQ_WLOCK_ASSERT(); - DADQ_WLOCK(); TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list); dp->dad_ondadq = true; - DADQ_WUNLOCK(); } static void nd6_dad_del(struct dadq *dp) { + DADQ_WLOCK_ASSERT(); - DADQ_WLOCK(); if (dp->dad_ondadq) { /* * Remove dp from the dadq and release the dadq's @@ -1162,10 +1167,8 @@ nd6_dad_del(struct dadq *dp) */ TAILQ_REMOVE(&V_dadq, dp, dad_list); dp->dad_ondadq = false; - DADQ_WUNLOCK(); nd6_dad_rele(dp); - } else - DADQ_WUNLOCK(); + } } static struct dadq * @@ -1173,10 +1176,12 @@ nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n) { struct dadq *dp; - DADQ_RLOCK(); + DADQ_LOCK_ASSERT(); + TAILQ_FOREACH(dp, &V_dadq, dad_list) { if (dp->dad_ifa != ifa) continue; + /* * Skip if the nonce matches the received one. * +2 in the length is required because of type and @@ -1185,42 +1190,35 @@ nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n) if (n != NULL && n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 && memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0], - ND_OPT_NONCE_LEN) == 0) { + ND_OPT_NONCE_LEN) == 0) { dp->dad_ns_lcount++; continue; } - refcount_acquire(&dp->dad_refcnt); break; } - DADQ_RUNLOCK(); return (dp); } static void -nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns) +nd6_dad_starttimer(struct dadq *dp, int ticks) { + DADQ_WLOCK_ASSERT(); - NET_EPOCH_ASSERT(); - - if (send_ns != 0) - nd6_dad_ns_output(dp); - callout_reset(&dp->dad_timer_ch, ticks, - (void (*)(void *))nd6_dad_timer, (void *)dp); + callout_reset(&dp->dad_timer_ch, ticks, nd6_dad_timer, dp); } static void nd6_dad_stoptimer(struct dadq *dp) { - callout_drain(&dp->dad_timer_ch); } static void nd6_dad_rele(struct dadq *dp) { - if (refcount_release(&dp->dad_refcnt)) { + KASSERT(!dp->dad_ondadq, ("dp %p still on DAD queue", dp)); ifa_free(dp->dad_ifa); free(dp, M_IP6NDP); } @@ -1229,8 +1227,7 @@ nd6_dad_rele(struct dadq *dp) void nd6_dad_init(void) { - - rw_init(&V_dad_rwlock, "nd6 DAD queue"); + DADQ_LOCK_INIT(); TAILQ_INIT(&V_dadq); } @@ -1243,7 +1240,6 @@ nd6_dad_start(struct ifaddr *ifa, int delay) struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; struct dadq *dp; char ip6buf[INET6_ADDRSTRLEN]; - struct epoch_tracker et; KASSERT((ia->ia6_flags & IN6_IFF_TENTATIVE) != 0, ("starting DAD on non-tentative address %p", ifa)); @@ -1265,12 +1261,13 @@ nd6_dad_start(struct ifaddr *ifa, int delay) (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED) != 0) return; + DADQ_WLOCK(); if ((dp = nd6_dad_find(ifa, NULL)) != NULL) { /* * DAD is already in progress. Let the existing entry * finish it. */ - nd6_dad_rele(dp); + DADQ_WUNLOCK(); return; } @@ -1282,7 +1279,8 @@ nd6_dad_start(struct ifaddr *ifa, int delay) ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???"); return; } - callout_init(&dp->dad_timer_ch, 0); + callout_init_rw(&dp->dad_timer_ch, DADQ_LOCKPTR(), + CALLOUT_RETURNUNLOCKED); #ifdef VIMAGE dp->dad_vnet = curvnet; #endif @@ -1305,9 +1303,8 @@ nd6_dad_start(struct ifaddr *ifa, int delay) /* Add this to the dadq and add a reference for the dadq. */ refcount_init(&dp->dad_refcnt, 1); nd6_dad_add(dp); - NET_EPOCH_ENTER(et); - nd6_dad_starttimer(dp, delay, 0); - NET_EPOCH_EXIT(et); + nd6_dad_starttimer(dp, delay); + DADQ_WUNLOCK(); } /* @@ -1318,29 +1315,36 @@ nd6_dad_stop(struct ifaddr *ifa) { struct dadq *dp; + DADQ_WLOCK(); dp = nd6_dad_find(ifa, NULL); - if (!dp) { + if (dp == NULL) { + DADQ_WUNLOCK(); /* DAD wasn't started yet */ return; } - nd6_dad_stoptimer(dp); + /* + * Acquire a temporary reference so that we can safely stop the callout. + */ + (void)refcount_acquire(&dp->dad_refcnt); nd6_dad_del(dp); + DADQ_WUNLOCK(); - /* Release this function's reference, acquired by nd6_dad_find(). */ + nd6_dad_stoptimer(dp); nd6_dad_rele(dp); } static void -nd6_dad_timer(struct dadq *dp) +nd6_dad_timer(void *arg) { - CURVNET_SET(dp->dad_vnet); + struct dadq *dp = arg; struct ifaddr *ifa = dp->dad_ifa; struct ifnet *ifp = dp->dad_ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa; char ip6buf[INET6_ADDRSTRLEN]; struct epoch_tracker et; + CURVNET_SET(dp->dad_vnet); KASSERT(ia != NULL, ("DAD entry %p with no address", dp)); NET_EPOCH_ENTER(et); @@ -1381,17 +1385,18 @@ nd6_dad_timer(struct dadq *dp) * We have more NS to go. Send NS packet for DAD. */ nd6_dad_starttimer(dp, - (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1); + (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); + nd6_dad_ns_output(dp); goto done; } else { /* * We have transmitted sufficient number of DAD packets. * See what we've got. */ - if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) + if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) { /* We've seen NS or NA, means DAD has failed. */ nd6_dad_duplicated(ifa, dp); - else if (V_dad_enhanced != 0 && + } else if (V_dad_enhanced != 0 && dp->dad_ns_lcount > 0 && dp->dad_ns_lcount > dp->dad_loopbackprobe) { /* @@ -1412,8 +1417,8 @@ nd6_dad_timer(struct dadq *dp) dp->dad_count = dp->dad_ns_ocount + V_nd6_mmaxtries - 1; nd6_dad_starttimer(dp, - (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, - 1); + (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000); + nd6_dad_ns_output(dp); goto done; } else { /* @@ -1439,6 +1444,7 @@ nd6_dad_timer(struct dadq *dp) } err: nd6_dad_del(dp); + DADQ_WUNLOCK(); done: NET_EPOCH_EXIT(et); CURVNET_RESTORE(); @@ -1498,6 +1504,10 @@ nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp) } } +/* + * Transmit a neighbour solicitation for the purpose of DAD. Returns with the + * DAD queue unlocked. + */ static void nd6_dad_ns_output(struct dadq *dp) { @@ -1505,11 +1515,15 @@ nd6_dad_ns_output(struct dadq *dp) struct ifnet *ifp = dp->dad_ifa->ifa_ifp; int i; + DADQ_WLOCK_ASSERT(); + dp->dad_ns_tcount++; if ((ifp->if_flags & IFF_UP) == 0) { + DADQ_WUNLOCK(); return; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + DADQ_WUNLOCK(); return; } @@ -1526,6 +1540,7 @@ nd6_dad_ns_output(struct dadq *dp) * should work well in almost all cases. */ } + DADQ_WUNLOCK(); nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr, (uint8_t *)&dp->dad_nonce[0]); } @@ -1541,12 +1556,11 @@ nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce) /* Ignore Nonce option when Enhanced DAD is disabled. */ if (V_dad_enhanced == 0) ndopt_nonce = NULL; + DADQ_RLOCK(); dp = nd6_dad_find(ifa, ndopt_nonce); - if (dp == NULL) - return; - - dp->dad_ns_icount++; - nd6_dad_rele(dp); + if (dp != NULL) + dp->dad_ns_icount++; + DADQ_RUNLOCK(); } static void @@ -1557,9 +1571,9 @@ nd6_dad_na_input(struct ifaddr *ifa) if (ifa == NULL) panic("ifa == NULL in nd6_dad_na_input"); + DADQ_RLOCK(); dp = nd6_dad_find(ifa, NULL); - if (dp != NULL) { + if (dp != NULL) dp->dad_na_icount++; - nd6_dad_rele(dp); - } + DADQ_RUNLOCK(); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202109211348.18LDmUrA035616>