From owner-svn-src-head@freebsd.org Fri Oct 7 21:10:55 2016 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 15CE8BEDF22; Fri, 7 Oct 2016 21:10:55 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id CFE70250; Fri, 7 Oct 2016 21:10:54 +0000 (UTC) (envelope-from markj@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u97LAsJN017496; Fri, 7 Oct 2016 21:10:54 GMT (envelope-from markj@FreeBSD.org) Received: (from markj@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u97LAr6G017490; Fri, 7 Oct 2016 21:10:53 GMT (envelope-from markj@FreeBSD.org) Message-Id: <201610072110.u97LAr6G017490@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: markj set sender to markj@FreeBSD.org using -f From: Mark Johnston Date: Fri, 7 Oct 2016 21:10:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r306829 - in head/sys: netinet netinet6 X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 07 Oct 2016 21:10:55 -0000 Author: markj Date: Fri Oct 7 21:10:53 2016 New Revision: 306829 URL: https://svnweb.freebsd.org/changeset/base/306829 Log: Lock the ND prefix list and add refcounting for prefixes. This change extends the nd6 lock to protect the ND prefix list as well as the list of advertising routers associated with each prefix. To handle cases where the nd6 lock must be dropped while iterating over either the prefix or default router lists, a generation counter is used to track modifications to the lists. Additionally, a new mutex is used to serialize prefix on-link/off-link transitions. This mutex must be acquired before the nd6 lock and is held while updating the routing table in nd6_prefix_onlink() and nd6_prefix_offlink(). Reviewed by: ae, tuexen (SCTP bits) Tested by: Jason Wolfe , Larry Rosenman MFC after: 2 months Differential Revision: https://reviews.freebsd.org/D8125 Modified: head/sys/netinet/sctp_output.c head/sys/netinet6/in6.c head/sys/netinet6/in6_ifattach.c head/sys/netinet6/nd6.c head/sys/netinet6/nd6.h head/sys/netinet6/nd6_rtr.c Modified: head/sys/netinet/sctp_output.c ============================================================================== --- head/sys/netinet/sctp_output.c Fri Oct 7 21:03:18 2016 (r306828) +++ head/sys/netinet/sctp_output.c Fri Oct 7 21:10:53 2016 (r306829) @@ -13747,6 +13747,7 @@ sctp_v6src_match_nexthop(struct sockaddr return (0); /* get prefix entry of address */ + ND6_RLOCK(); LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) { if (pfx->ndpr_stateflags & NDPRF_DETACHED) continue; @@ -13756,6 +13757,7 @@ sctp_v6src_match_nexthop(struct sockaddr } /* no prefix entry in the prefix list */ if (pfx == NULL) { + ND6_RUNLOCK(); SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6); return (0); @@ -13777,9 +13779,11 @@ sctp_v6src_match_nexthop(struct sockaddr if (sctp_cmpaddr((struct sockaddr *)&gw6, ro->ro_rt->rt_gateway)) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n"); + ND6_RUNLOCK(); return (1); } } + ND6_RUNLOCK(); SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n"); return (0); } Modified: head/sys/netinet6/in6.c ============================================================================== --- head/sys/netinet6/in6.c Fri Oct 7 21:03:18 2016 (r306828) +++ head/sys/netinet6/in6.c Fri Oct 7 21:10:53 2016 (r306829) @@ -647,6 +647,7 @@ in6_control(struct socket *so, u_long cm } } } + nd6_prefix_rele(pr); /* * this might affect the status of autoconfigured addresses, @@ -694,8 +695,12 @@ aifaddr_out: */ pr = ia->ia6_ndpr; in6_purgeaddr(&ia->ia_ifa); - if (pr && pr->ndpr_addrcnt == 0) - prelist_remove(pr); + if (pr != NULL && pr->ndpr_addrcnt == 0) { + ND6_WLOCK(); + nd6_prefix_unlink(pr, NULL); + ND6_WUNLOCK(); + nd6_prefix_del(pr); + } EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } Modified: head/sys/netinet6/in6_ifattach.c ============================================================================== --- head/sys/netinet6/in6_ifattach.c Fri Oct 7 21:03:18 2016 (r306828) +++ head/sys/netinet6/in6_ifattach.c Fri Oct 7 21:10:53 2016 (r306829) @@ -453,6 +453,7 @@ in6_ifattach_linklocal(struct ifnet *ifp struct in6_ifaddr *ia; struct in6_aliasreq ifra; struct nd_prefixctl pr0; + struct nd_prefix *pr; int error; /* @@ -535,10 +536,11 @@ in6_ifattach_linklocal(struct ifnet *ifp * address, and then reconfigure another one, the prefix is still * valid with referring to the old link-local address. */ - if (nd6_prefix_lookup(&pr0) == NULL) { + if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0) return (error); - } + } else + nd6_prefix_rele(pr); return 0; } @@ -778,15 +780,6 @@ _in6_ifdetach(struct ifnet *ifp, int pur return; /* - * Remove neighbor management table. - * Enabling the nd6_purge will panic on vmove for interfaces on VNET - * teardown as the IPv6 layer is cleaned up already and the locks - * are destroyed. - */ - if (purgeulp) - nd6_purge(ifp); - - /* * nuke any of IPv6 addresses we have * XXX: all addresses should be already removed */ @@ -804,12 +797,10 @@ _in6_ifdetach(struct ifnet *ifp, int pur in6_purgemaddrs(ifp); /* - * remove neighbor management table. we call it twice just to make - * sure we nuke everything. maybe we need just one call. - * XXX: since the first call did not release addresses, some prefixes - * might remain. We should call nd6_purge() again to release the - * prefixes after removing all addresses above. - * (Or can we just delay calling nd6_purge until at this point?) + * Remove neighbor management table. + * Enabling the nd6_purge will panic on vmove for interfaces on VNET + * teardown as the IPv6 layer is cleaned up already and the locks + * are destroyed. */ if (purgeulp) nd6_purge(ifp); Modified: head/sys/netinet6/nd6.c ============================================================================== --- head/sys/netinet6/nd6.c Fri Oct 7 21:03:18 2016 (r306828) +++ head/sys/netinet6/nd6.c Fri Oct 7 21:10:53 2016 (r306829) @@ -38,8 +38,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include +#include #include #include #include @@ -47,7 +49,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -118,6 +119,8 @@ static eventhandler_tag lle_event_eh, if VNET_DEFINE(struct nd_drhead, nd_defrouter); VNET_DEFINE(struct nd_prhead, nd_prefix); VNET_DEFINE(struct rwlock, nd6_lock); +VNET_DEFINE(uint64_t, nd6_list_genid); +VNET_DEFINE(struct mtx, nd6_onlink_mtx); VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) @@ -209,11 +212,10 @@ void nd6_init(void) { - rw_init(&V_nd6_lock, "nd6"); + mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF); + rw_init(&V_nd6_lock, "nd6 list"); LIST_INIT(&V_nd_prefix); - - /* initialization of the default router list */ TAILQ_INIT(&V_nd_defrouter); /* Start timers. */ @@ -245,6 +247,7 @@ nd6_destroy() EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh); } rw_destroy(&V_nd6_lock); + mtx_destroy(&V_nd6_onlink_mtx); } #endif @@ -903,13 +906,15 @@ nd6_timer(void *arg) { CURVNET_SET((struct vnet *) arg); struct nd_drhead drq; + struct nd_prhead prl; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; struct in6_ifaddr *ia6, *nia6; + bool onlink_locked; TAILQ_INIT(&drq); + LIST_INIT(&prl); - /* expire default router list */ ND6_WLOCK(); TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) if (dr->expire && dr->expire < time_uptime) @@ -1016,23 +1021,51 @@ nd6_timer(void *arg) } } - /* expire prefix list */ + ND6_WLOCK(); + onlink_locked = false; +restart: LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { /* - * check prefix lifetime. - * since pltime is just for autoconf, pltime processing for - * prefix is not necessary. + * Expire prefixes. Since the pltime is only used for + * autoconfigured addresses, pltime processing for prefixes is + * not necessary. + * + * Only unlink after all derived addresses have expired. This + * may not occur until two hours after the prefix has expired + * per RFC 4862. If the prefix expires before its derived + * addresses, mark it off-link. This will be done automatically + * after unlinking if no address references remain. */ - if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && - time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) { + if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME || + time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime) + continue; - /* - * address expiration and prefix expiration are - * separate. NEVER perform in6_purgeaddr here. - */ - prelist_remove(pr); + if (pr->ndpr_addrcnt == 0) { + nd6_prefix_unlink(pr, &prl); + continue; + } + if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { + if (!onlink_locked) { + onlink_locked = ND6_ONLINK_TRYLOCK(); + if (!onlink_locked) { + ND6_WUNLOCK(); + ND6_ONLINK_LOCK(); + onlink_locked = true; + ND6_WLOCK(); + goto restart; + } + } + (void)nd6_prefix_offlink(pr); } } + ND6_WUNLOCK(); + if (onlink_locked) + ND6_ONLINK_UNLOCK(); + + while ((pr = LIST_FIRST(&prl)) != NULL) { + LIST_REMOVE(pr, ndpr_entry); + nd6_prefix_del(pr); + } callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, nd6_timer, curvnet); @@ -1118,10 +1151,12 @@ void nd6_purge(struct ifnet *ifp) { struct nd_drhead drq; + struct nd_prhead prl; struct nd_defrouter *dr, *ndr; struct nd_prefix *pr, *npr; TAILQ_INIT(&drq); + LIST_INIT(&prl); /* * Nuke default router list entries toward ifp. @@ -1136,33 +1171,31 @@ nd6_purge(struct ifnet *ifp) if (dr->ifp == ifp) defrouter_unlink(dr, &drq); } - TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { if (!dr->installed) continue; if (dr->ifp == ifp) defrouter_unlink(dr, &drq); } + + /* + * Remove prefixes on ifp. We should have already removed addresses on + * this interface, so no addresses should be referencing these prefixes. + */ + LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { + if (pr->ndpr_ifp == ifp) + nd6_prefix_unlink(pr, &prl); + } ND6_WUNLOCK(); + /* Delete the unlinked router and prefix objects. */ while ((dr = TAILQ_FIRST(&drq)) != NULL) { TAILQ_REMOVE(&drq, dr, dr_entry); defrouter_del(dr); } - - /* Nuke prefix list entries toward ifp */ - LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { - if (pr->ndpr_ifp == ifp) { - /* - * Because if_detach() does *not* release prefixes - * while purging addresses the reference count will - * still be above zero. We therefore reset it to - * make sure that the prefix really gets purged. - */ - pr->ndpr_addrcnt = 0; - - prelist_remove(pr); - } + while ((pr = LIST_FIRST(&prl)) != NULL) { + LIST_REMOVE(pr, ndpr_entry); + nd6_prefix_del(pr); } /* cancel default outgoing interface setting */ @@ -1228,7 +1261,8 @@ nd6_is_new_addr_neighbor(const struct so struct rt_addrinfo info; struct sockaddr_in6 rt_key; const struct sockaddr *dst6; - int fibnum; + uint64_t genid; + int error, fibnum; /* * A link-local address is always a neighbor. @@ -1266,19 +1300,29 @@ nd6_is_new_addr_neighbor(const struct so * If the address matches one of our on-link prefixes, it should be a * neighbor. */ + ND6_RLOCK(); +restart: LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { if (pr->ndpr_ifp != ifp) continue; - if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { - + if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { /* Always use the default FIB here. */ dst6 = (const struct sockaddr *)&pr->ndpr_prefix; + genid = V_nd6_list_genid; + ND6_RUNLOCK(); + /* Restore length field before retrying lookup */ rt_key.sin6_len = sizeof(rt_key); - if (rib_lookup_info(fibnum, dst6, 0, 0, &info) != 0) + error = rib_lookup_info(fibnum, dst6, 0, 0, &info); + + ND6_RLOCK(); + if (genid != V_nd6_list_genid) + goto restart; + if (error != 0) continue; + /* * This is the case where multiple interfaces * have the same prefix, but only one is installed @@ -1290,14 +1334,17 @@ nd6_is_new_addr_neighbor(const struct so * differ. */ if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, - &rt_key.sin6_addr)) + &rt_key.sin6_addr)) continue; } if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, - &addr->sin6_addr, &pr->ndpr_mask)) + &addr->sin6_addr, &pr->ndpr_mask)) { + ND6_RUNLOCK(); return (1); + } } + ND6_RUNLOCK(); /* * If the address is assigned on the node of the other side of @@ -1728,15 +1775,22 @@ nd6_ioctl(u_long cmd, caddr_t data, stru case SIOCSPFXFLUSH_IN6: { /* flush all the prefix advertised by routers */ + struct in6_ifaddr *ia, *ia_next; struct nd_prefix *pr, *next; + struct nd_prhead prl; - LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { - struct in6_ifaddr *ia, *ia_next; + LIST_INIT(&prl); + ND6_WLOCK(); + LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) continue; /* XXX */ + nd6_prefix_unlink(pr, &prl); + } + ND6_WUNLOCK(); - /* do we really have to remove addresses as well? */ + while ((pr = LIST_FIRST(&prl)) != NULL) { + LIST_REMOVE(pr, ndpr_entry); /* XXXRW: in6_ifaddrhead locking. */ TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, ia_next) { @@ -1746,7 +1800,7 @@ nd6_ioctl(u_long cmd, caddr_t data, stru if (ia->ia6_ndpr == pr) in6_purgeaddr(&ia->ia_ifa); } - prelist_remove(pr); + nd6_prefix_del(pr); } break; } @@ -2690,9 +2744,10 @@ nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) ip6_sprintf(ip6buf, &pfr->router->rtaddr)); error = SYSCTL_OUT(req, &s6, sizeof(s6)); if (error != 0) - break; + goto out; } } +out: ND6_RUNLOCK(); return (error); } Modified: head/sys/netinet6/nd6.h ============================================================================== --- head/sys/netinet6/nd6.h Fri Oct 7 21:03:18 2016 (r306828) +++ head/sys/netinet6/nd6.h Fri Oct 7 21:10:53 2016 (r306829) @@ -256,7 +256,7 @@ struct nd_prefixctl { struct prf_ra ndpr_flags; }; - +LIST_HEAD(nd_prhead, nd_prefix); struct nd_prefix { struct ifnet *ndpr_ifp; LIST_ENTRY(nd_prefix) ndpr_entry; @@ -276,6 +276,7 @@ struct nd_prefix { LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs; u_char ndpr_plen; int ndpr_addrcnt; /* count of derived addresses */ + volatile u_int ndpr_refcnt; }; #define ndpr_raf ndpr_flags @@ -314,8 +315,6 @@ struct nd_pfxrouter { struct nd_defrouter *router; }; -LIST_HEAD(nd_prhead, nd_prefix); - #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IP6NDP); #endif @@ -346,17 +345,30 @@ VNET_DECLARE(int, nd6_onlink_ns_rfc4861) /* Lock for the prefix and default router lists. */ VNET_DECLARE(struct rwlock, nd6_lock); +VNET_DECLARE(uint64_t, nd6_list_genid); #define V_nd6_lock VNET(nd6_lock) +#define V_nd6_list_genid VNET(nd6_list_genid) #define ND6_RLOCK() rw_rlock(&V_nd6_lock) #define ND6_RUNLOCK() rw_runlock(&V_nd6_lock) #define ND6_WLOCK() rw_wlock(&V_nd6_lock) #define ND6_WUNLOCK() rw_wunlock(&V_nd6_lock) +#define ND6_TRY_UPGRADE() rw_try_upgrade(&V_nd6_lock) #define ND6_WLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_WLOCKED) #define ND6_RLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_RLOCKED) #define ND6_LOCK_ASSERT() rw_assert(&V_nd6_lock, RA_LOCKED) #define ND6_UNLOCK_ASSERT() rw_assert(&V_nd6_lock, RA_UNLOCKED) +/* Mutex for prefix onlink/offlink transitions. */ +VNET_DECLARE(struct mtx, nd6_onlink_mtx); +#define V_nd6_onlink_mtx VNET(nd6_onlink_mtx) + +#define ND6_ONLINK_LOCK() mtx_lock(&V_nd6_onlink_mtx) +#define ND6_ONLINK_TRYLOCK() mtx_trylock(&V_nd6_onlink_mtx) +#define ND6_ONLINK_UNLOCK() mtx_unlock(&V_nd6_onlink_mtx) +#define ND6_ONLINK_LOCK_ASSERT() mtx_assert(&V_nd6_onlink_mtx, MA_OWNED) +#define ND6_ONLINK_UNLOCK_ASSERT() mtx_assert(&V_nd6_onlink_mtx, MA_NOTOWNED) + #define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0) /* nd6_rtr.c */ @@ -463,9 +475,14 @@ void defrouter_rele(struct nd_defrouter bool defrouter_remove(struct in6_addr *, struct ifnet *); void defrouter_unlink(struct nd_defrouter *, struct nd_drhead *); void defrouter_del(struct nd_defrouter *); -void prelist_remove(struct nd_prefix *); int nd6_prelist_add(struct nd_prefixctl *, struct nd_defrouter *, - struct nd_prefix **); + struct nd_prefix **); +void nd6_prefix_unlink(struct nd_prefix *, struct nd_prhead *); +void nd6_prefix_del(struct nd_prefix *); +void nd6_prefix_ref(struct nd_prefix *); +void nd6_prefix_rele(struct nd_prefix *); +int nd6_prefix_onlink(struct nd_prefix *); +int nd6_prefix_offlink(struct nd_prefix *); void pfxlist_onlink_check(void); struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *); struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *); Modified: head/sys/netinet6/nd6_rtr.c ============================================================================== --- head/sys/netinet6/nd6_rtr.c Fri Oct 7 21:03:18 2016 (r306828) +++ head/sys/netinet6/nd6_rtr.c Fri Oct 7 21:10:53 2016 (r306829) @@ -87,9 +87,6 @@ static int in6_init_prefix_ltimes(struct static void in6_init_address_ltimes(struct nd_prefix *, struct in6_addrlifetime *); -static int nd6_prefix_onlink(struct nd_prefix *); -static int nd6_prefix_offlink(struct nd_prefix *); - static int rt6_deleteroute(const struct rtentry *, void *); VNET_DECLARE(int, nd6_recalc_reachtm_interval); @@ -661,6 +658,7 @@ defrouter_unlink(struct nd_defrouter *dr ND6_WLOCK_ASSERT(); TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry); + V_nd6_list_genid++; if (drq != NULL) TAILQ_INSERT_TAIL(drq, dr, dr_entry); } @@ -670,6 +668,7 @@ defrouter_del(struct nd_defrouter *dr) { struct nd_defrouter *deldr = NULL; struct nd_prefix *pr; + struct nd_pfxrouter *pfxrtr; ND6_UNLOCK_ASSERT(); @@ -688,11 +687,13 @@ defrouter_del(struct nd_defrouter *dr) /* * Also delete all the pointers to the router in each prefix lists. */ + ND6_WLOCK(); LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { - struct nd_pfxrouter *pfxrtr; if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL) pfxrtr_del(pfxrtr); } + ND6_WUNLOCK(); + pfxlist_onlink_check(); /* @@ -852,14 +853,18 @@ static struct nd_defrouter * defrtrlist_update(struct nd_defrouter *new) { struct nd_defrouter *dr, *n; + uint64_t genid; int oldpref; + bool writelocked; if (new->rtlifetime == 0) { defrouter_remove(&new->rtaddr, new->ifp); return (NULL); } - ND6_WLOCK(); + ND6_RLOCK(); + writelocked = false; +restart: dr = defrouter_lookup_locked(&new->rtaddr, new->ifp); if (dr != NULL) { oldpref = rtpref(dr); @@ -875,10 +880,32 @@ defrtrlist_update(struct nd_defrouter *n * router is still installed in the kernel. */ if (dr->installed && rtpref(new) == oldpref) { - ND6_WUNLOCK(); + if (writelocked) + ND6_WUNLOCK(); + else + ND6_RUNLOCK(); return (dr); } + } + /* + * The router needs to be reinserted into the default router + * list, so upgrade to a write lock. If that fails and the list + * has potentially changed while the lock was dropped, we'll + * redo the lookup with the write lock held. + */ + if (!writelocked) { + writelocked = true; + if (!ND6_TRY_UPGRADE()) { + genid = V_nd6_list_genid; + ND6_RUNLOCK(); + ND6_WLOCK(); + if (genid != V_nd6_list_genid) + goto restart; + } + } + + if (dr != NULL) { /* * The preferred router may have changed, so relocate this * router. @@ -912,6 +939,7 @@ defrtrlist_update(struct nd_defrouter *n TAILQ_INSERT_BEFORE(dr, n, dr_entry); else TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry); + V_nd6_list_genid++; ND6_WUNLOCK(); defrouter_select(); @@ -924,11 +952,12 @@ pfxrtr_lookup(struct nd_prefix *pr, stru { struct nd_pfxrouter *search; + ND6_LOCK_ASSERT(); + LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) { if (search->router == dr) break; } - return (search); } @@ -936,55 +965,110 @@ static void pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr) { struct nd_pfxrouter *new; + bool update; + + ND6_UNLOCK_ASSERT(); + + ND6_RLOCK(); + if (pfxrtr_lookup(pr, dr) != NULL) { + ND6_RUNLOCK(); + return; + } + ND6_RUNLOCK(); new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO); if (new == NULL) return; - new->router = dr; defrouter_ref(dr); + new->router = dr; - LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); + ND6_WLOCK(); + if (pfxrtr_lookup(pr, dr) == NULL) { + LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry); + update = true; + } else { + /* We lost a race to add the reference. */ + defrouter_rele(dr); + free(new, M_IP6NDP); + update = false; + } + ND6_WUNLOCK(); - pfxlist_onlink_check(); + if (update) + pfxlist_onlink_check(); } static void pfxrtr_del(struct nd_pfxrouter *pfr) { + ND6_WLOCK_ASSERT(); + LIST_REMOVE(pfr, pfr_entry); defrouter_rele(pfr->router); free(pfr, M_IP6NDP); } -struct nd_prefix * -nd6_prefix_lookup(struct nd_prefixctl *key) +static struct nd_prefix * +nd6_prefix_lookup_locked(struct nd_prefixctl *key) { struct nd_prefix *search; + ND6_LOCK_ASSERT(); + LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) { if (key->ndpr_ifp == search->ndpr_ifp && key->ndpr_plen == search->ndpr_plen && in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr, &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) { + nd6_prefix_ref(search); break; } } + return (search); +} +struct nd_prefix * +nd6_prefix_lookup(struct nd_prefixctl *key) +{ + struct nd_prefix *search; + + ND6_RLOCK(); + search = nd6_prefix_lookup_locked(key); + ND6_RUNLOCK(); return (search); } +void +nd6_prefix_ref(struct nd_prefix *pr) +{ + + refcount_acquire(&pr->ndpr_refcnt); +} + +void +nd6_prefix_rele(struct nd_prefix *pr) +{ + + if (refcount_release(&pr->ndpr_refcnt)) { + KASSERT(LIST_EMPTY(&pr->ndpr_advrtrs), + ("prefix %p has advertising routers", pr)); + free(pr, M_IP6NDP); + } +} + int nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr, struct nd_prefix **newp) { - struct nd_prefix *new = NULL; - int error = 0; + struct nd_prefix *new; char ip6buf[INET6_ADDRSTRLEN]; + int error; new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO); if (new == NULL) return (ENOMEM); + refcount_init(&new->ndpr_refcnt, newp != NULL ? 2 : 1); new->ndpr_ifp = pr->ndpr_ifp; new->ndpr_prefix = pr->ndpr_prefix; new->ndpr_plen = pr->ndpr_plen; @@ -1003,20 +1087,22 @@ nd6_prelist_add(struct nd_prefixctl *pr, /* make prefix in the canonical form */ IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask); - /* link ndpr_entry to nd_prefix list */ + ND6_WLOCK(); LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry); + V_nd6_list_genid++; + ND6_WUNLOCK(); /* ND_OPT_PI_FLAG_ONLINK processing */ if (new->ndpr_raf_onlink) { - int e; - - if ((e = nd6_prefix_onlink(new)) != 0) { + ND6_ONLINK_LOCK(); + if ((error = nd6_prefix_onlink(new)) != 0) { nd6log((LOG_ERR, "nd6_prelist_add: failed to make " "the prefix %s/%d on-link on %s (errno=%d)\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + pr->ndpr_plen, if_name(pr->ndpr_ifp), error)); /* proceed anyway. XXX: is it correct? */ } + ND6_ONLINK_UNLOCK(); } if (dr != NULL) @@ -1026,51 +1112,69 @@ nd6_prelist_add(struct nd_prefixctl *pr, return (0); } +/* + * Remove a prefix from the prefix list and optionally stash it in a + * caller-provided list. + * + * The ND6 lock must be held. + */ void -prelist_remove(struct nd_prefix *pr) +nd6_prefix_unlink(struct nd_prefix *pr, struct nd_prhead *list) +{ + + KASSERT(pr->ndpr_addrcnt == 0, + ("prefix %p has referencing addresses", pr)); + ND6_WLOCK_ASSERT(); + + LIST_REMOVE(pr, ndpr_entry); + V_nd6_list_genid++; + if (list != NULL) + LIST_INSERT_HEAD(list, pr, ndpr_entry); +} + +/* + * Free an unlinked prefix, first marking it off-link if necessary. + */ +void +nd6_prefix_del(struct nd_prefix *pr) { struct nd_pfxrouter *pfr, *next; int e; char ip6buf[INET6_ADDRSTRLEN]; - /* make sure to invalidate the prefix until it is really freed. */ - pr->ndpr_vltime = 0; - pr->ndpr_pltime = 0; + KASSERT(pr->ndpr_addrcnt == 0, + ("prefix %p has referencing addresses", pr)); + ND6_UNLOCK_ASSERT(); /* * Though these flags are now meaningless, we'd rather keep the value * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users * when executing "ndp -p". */ - - if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 && - (e = nd6_prefix_offlink(pr)) != 0) { - nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink " - "on %s, errno=%d\n", - ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); - /* what should we do? */ + if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { + ND6_ONLINK_LOCK(); + if ((e = nd6_prefix_offlink(pr)) != 0) { + nd6log((LOG_ERR, + "nd6_prefix_del: failed to make %s/%d offlink " + "on %s, errno=%d\n", + ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + /* what should we do? */ + } + ND6_ONLINK_UNLOCK(); } - if (pr->ndpr_addrcnt > 0) - return; /* notice here? */ - - /* unlink ndpr_entry from nd_prefix list */ - LIST_REMOVE(pr, ndpr_entry); - - /* free list of routers that advertised the prefix */ - LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) { + /* Release references to routers that have advertised this prefix. */ + ND6_WLOCK(); + LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next) pfxrtr_del(pfr); - } - free(pr, M_IP6NDP); + ND6_WUNLOCK(); + + nd6_prefix_rele(pr); pfxlist_onlink_check(); } -/* - * dr - may be NULL - */ - static int prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr, struct mbuf *m, int mcast) @@ -1120,21 +1224,22 @@ prelist_update(struct nd_prefixctl *new, if (new->ndpr_raf_onlink && (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) { - int e; - - if ((e = nd6_prefix_onlink(pr)) != 0) { + ND6_ONLINK_LOCK(); + if ((error = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "prelist_update: failed to make " "the prefix %s/%d on-link on %s " "(errno=%d)\n", ip6_sprintf(ip6buf, - &pr->ndpr_prefix.sin6_addr), - pr->ndpr_plen, if_name(pr->ndpr_ifp), e)); + &pr->ndpr_prefix.sin6_addr), + pr->ndpr_plen, if_name(pr->ndpr_ifp), + error)); /* proceed anyway. XXX: is it correct? */ } + ND6_ONLINK_UNLOCK(); } - if (dr && pfxrtr_lookup(pr, dr) == NULL) + if (dr != NULL) pfxrtr_add(pr, dr); } else { if (new->ndpr_vltime == 0) @@ -1393,8 +1498,10 @@ prelist_update(struct nd_prefixctl *new, } } - end: - return error; +end: + if (pr != NULL) + nd6_prefix_rele(pr); + return (error); } /* @@ -1409,6 +1516,8 @@ find_pfxlist_reachable_router(struct nd_ struct llentry *ln; int canreach; + ND6_LOCK_ASSERT(); + LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) { IF_AFDATA_RLOCK(pfxrtr->router->ifp); ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp); @@ -1444,6 +1553,11 @@ pfxlist_onlink_check(void) struct nd_defrouter *dr; struct nd_pfxrouter *pfxrtr = NULL; struct rm_priotracker in6_ifa_tracker; + uint64_t genid; + uint32_t flags; + + ND6_ONLINK_LOCK(); + ND6_RLOCK(); /* * Check if there is a prefix that has a reachable advertising @@ -1459,7 +1573,6 @@ pfxlist_onlink_check(void) * that does not advertise any prefixes. */ if (pr == NULL) { - ND6_RLOCK(); TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { struct nd_prefix *pr0; @@ -1470,7 +1583,6 @@ pfxlist_onlink_check(void) if (pfxrtr != NULL) break; } - ND6_RUNLOCK(); } if (pr != NULL || (!TAILQ_EMPTY(&V_nd_defrouter) && pfxrtr == NULL)) { /* @@ -1515,6 +1627,7 @@ pfxlist_onlink_check(void) * interfaces. Such cases will be handled in nd6_prefix_onlink, * so we don't have to care about them. */ +restart: LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { char ip6buf[INET6_ADDRSTRLEN]; int e; @@ -1524,21 +1637,20 @@ pfxlist_onlink_check(void) pr->ndpr_raf_auto == 0) continue; - if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 && - (pr->ndpr_stateflags & NDPRF_ONLINK) != 0) { - if ((e = nd6_prefix_offlink(pr)) != 0) { + flags = pr->ndpr_stateflags & (NDPRF_DETACHED | NDPRF_ONLINK); + if (flags == 0 || flags == (NDPRF_DETACHED | NDPRF_ONLINK)) { + genid = V_nd6_list_genid; + ND6_RUNLOCK(); + if ((flags & NDPRF_ONLINK) != 0 && + (e = nd6_prefix_offlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d offlink, errno=%d\n", ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); - } - } - if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 && - (pr->ndpr_stateflags & NDPRF_ONLINK) == 0 && - pr->ndpr_raf_onlink) { - if ((e = nd6_prefix_onlink(pr)) != 0) { + } else if ((flags & NDPRF_ONLINK) == 0 && + (e = nd6_prefix_onlink(pr)) != 0) { nd6log((LOG_ERR, "pfxlist_onlink_check: failed to " "make %s/%d onlink, errno=%d\n", @@ -1546,6 +1658,9 @@ pfxlist_onlink_check(void) &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, e)); } + ND6_RLOCK(); + if (genid != V_nd6_list_genid) + goto restart; } } @@ -1606,6 +1721,8 @@ pfxlist_onlink_check(void) } } IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); + ND6_RUNLOCK(); + ND6_ONLINK_UNLOCK(); } static int @@ -1686,23 +1803,20 @@ nd6_prefix_onlink_rtrequest(struct nd_pr return (a_failure); } -static int +int nd6_prefix_onlink(struct nd_prefix *pr) { struct ifaddr *ifa; struct ifnet *ifp = pr->ndpr_ifp; struct nd_prefix *opr; - int error = 0; char ip6buf[INET6_ADDRSTRLEN]; *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***