Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 21 Sep 2021 13:48:30 GMT
From:      Mark Johnston <markj@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: bcc7e68fcf2c - stable/13 - nd6: Make the DAD callout MPSAFE
Message-ID:  <202109211348.18LDmUrA035616@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by markj:

URL: https://cgit.FreeBSD.org/src/commit/?id=bcc7e68fcf2c34b5f090f55923adc4debc7cc91d

commit bcc7e68fcf2c34b5f090f55923adc4debc7cc91d
Author:     Mark Johnston <markj@FreeBSD.org>
AuthorDate: 2021-09-07 13:49:47 +0000
Commit:     Mark Johnston <markj@FreeBSD.org>
CommitDate: 2021-09-21 13:37:52 +0000

    nd6: Make the DAD callout MPSAFE
    
    Interface addresses with pending duplicate address detection (DAD) live
    in a global queue.  In this case, a callout is associated with each
    entry.  The callout transmits neighbour solicitations until the system
    decides the address is no longer tentative, or until a duplicate address
    is discovered.  At this point the entry is dequeued and freed.  DAD may
    be manually stopped as well.
    
    The callout currently runs (and potentially transmits packets) with
    Giant held.  Reorganize DAD queue locking to interlock properly with the
    callout:
    
    - Configure the callout to acquire the DAD queue lock before running.
      The lock is dropped before transmitting any packets.  Stop protecting
      the callout with Giant.
    - When looking up DAD queue entries for an incoming NS or NA, don't
      bother fiddling with the DAD queue entry reference count.
    - Split nd6_dad_starttimer() so that the caller is responsible to
      transmitting a NS if it so desires.
    - Remove the DAD entry from the queue before stopping the timer.  Use a
      temporary reference to make sure that the entry doesn't get freed by
      the callout while we're draining.
    
    Reported by:    mav
    Reviewed by:    bz, hrs
    Sponsored by:   The FreeBSD Foundation
    
    (cherry picked from commit 9a94097cd0f46aacf688973d15b3b462c79f0008)
---
 sys/netinet6/nd6_nbr.c | 116 +++++++++++++++++++++++++++----------------------
 1 file changed, 65 insertions(+), 51 deletions(-)

diff --git a/sys/netinet6/nd6_nbr.c b/sys/netinet6/nd6_nbr.c
index 30d73f9d71a9..2100700733ad 100644
--- a/sys/netinet6/nd6_nbr.c
+++ b/sys/netinet6/nd6_nbr.c
@@ -84,9 +84,9 @@ static struct dadq *nd6_dad_find(struct ifaddr *, struct nd_opt_nonce *);
 static void nd6_dad_add(struct dadq *dp);
 static void nd6_dad_del(struct dadq *dp);
 static void nd6_dad_rele(struct dadq *);
-static void nd6_dad_starttimer(struct dadq *, int, int);
+static void nd6_dad_starttimer(struct dadq *, int);
 static void nd6_dad_stoptimer(struct dadq *);
-static void nd6_dad_timer(struct dadq *);
+static void nd6_dad_timer(void *);
 static void nd6_dad_duplicated(struct ifaddr *, struct dadq *);
 static void nd6_dad_ns_output(struct dadq *);
 static void nd6_dad_ns_input(struct ifaddr *, struct nd_opt_nonce *);
@@ -1135,26 +1135,31 @@ VNET_DEFINE_STATIC(struct rwlock, dad_rwlock);
 #define	V_dadq			VNET(dadq)
 #define	V_dad_rwlock		VNET(dad_rwlock)
 
-#define	DADQ_RLOCK()		rw_rlock(&V_dad_rwlock)	
-#define	DADQ_RUNLOCK()		rw_runlock(&V_dad_rwlock)	
-#define	DADQ_WLOCK()		rw_wlock(&V_dad_rwlock)	
-#define	DADQ_WUNLOCK()		rw_wunlock(&V_dad_rwlock)	
+#define	DADQ_LOCKPTR()		(&V_dad_rwlock)
+#define	DADQ_LOCK_INIT()	rw_init(DADQ_LOCKPTR(), "nd6 DAD queue")
+#define	DADQ_RLOCK()		rw_rlock(DADQ_LOCKPTR())
+#define	DADQ_RUNLOCK()		rw_runlock(DADQ_LOCKPTR())
+#define	DADQ_WLOCK()		rw_wlock(DADQ_LOCKPTR())
+#define	DADQ_WUNLOCK()		rw_wunlock(DADQ_LOCKPTR())
+
+#define	DADQ_LOCK_ASSERT()	rw_assert(DADQ_LOCKPTR(), RA_LOCKED);
+#define	DADQ_RLOCK_ASSERT()	rw_assert(DADQ_LOCKPTR(), RA_RLOCKED);
+#define	DADQ_WLOCK_ASSERT()	rw_assert(DADQ_LOCKPTR(), RA_WLOCKED);
 
 static void
 nd6_dad_add(struct dadq *dp)
 {
+	DADQ_WLOCK_ASSERT();
 
-	DADQ_WLOCK();
 	TAILQ_INSERT_TAIL(&V_dadq, dp, dad_list);
 	dp->dad_ondadq = true;
-	DADQ_WUNLOCK();
 }
 
 static void
 nd6_dad_del(struct dadq *dp)
 {
+	DADQ_WLOCK_ASSERT();
 
-	DADQ_WLOCK();
 	if (dp->dad_ondadq) {
 		/*
 		 * Remove dp from the dadq and release the dadq's
@@ -1162,10 +1167,8 @@ nd6_dad_del(struct dadq *dp)
 		 */
 		TAILQ_REMOVE(&V_dadq, dp, dad_list);
 		dp->dad_ondadq = false;
-		DADQ_WUNLOCK();
 		nd6_dad_rele(dp);
-	} else
-		DADQ_WUNLOCK();
+	}
 }
 
 static struct dadq *
@@ -1173,10 +1176,12 @@ nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
 {
 	struct dadq *dp;
 
-	DADQ_RLOCK();
+	DADQ_LOCK_ASSERT();
+
 	TAILQ_FOREACH(dp, &V_dadq, dad_list) {
 		if (dp->dad_ifa != ifa)
 			continue;
+
 		/*
 		 * Skip if the nonce matches the received one.
 		 * +2 in the length is required because of type and
@@ -1185,42 +1190,35 @@ nd6_dad_find(struct ifaddr *ifa, struct nd_opt_nonce *n)
 		if (n != NULL &&
 		    n->nd_opt_nonce_len == (ND_OPT_NONCE_LEN + 2) / 8 &&
 		    memcmp(&n->nd_opt_nonce[0], &dp->dad_nonce[0],
-		        ND_OPT_NONCE_LEN) == 0) {
+		    ND_OPT_NONCE_LEN) == 0) {
 			dp->dad_ns_lcount++;
 			continue;
 		}
-		refcount_acquire(&dp->dad_refcnt);
 		break;
 	}
-	DADQ_RUNLOCK();
 
 	return (dp);
 }
 
 static void
-nd6_dad_starttimer(struct dadq *dp, int ticks, int send_ns)
+nd6_dad_starttimer(struct dadq *dp, int ticks)
 {
+	DADQ_WLOCK_ASSERT();
 
-	NET_EPOCH_ASSERT();
-
-	if (send_ns != 0)
-		nd6_dad_ns_output(dp);
-	callout_reset(&dp->dad_timer_ch, ticks,
-	    (void (*)(void *))nd6_dad_timer, (void *)dp);
+	callout_reset(&dp->dad_timer_ch, ticks, nd6_dad_timer, dp);
 }
 
 static void
 nd6_dad_stoptimer(struct dadq *dp)
 {
-
 	callout_drain(&dp->dad_timer_ch);
 }
 
 static void
 nd6_dad_rele(struct dadq *dp)
 {
-
 	if (refcount_release(&dp->dad_refcnt)) {
+		KASSERT(!dp->dad_ondadq, ("dp %p still on DAD queue", dp));
 		ifa_free(dp->dad_ifa);
 		free(dp, M_IP6NDP);
 	}
@@ -1229,8 +1227,7 @@ nd6_dad_rele(struct dadq *dp)
 void
 nd6_dad_init(void)
 {
-
-	rw_init(&V_dad_rwlock, "nd6 DAD queue");
+	DADQ_LOCK_INIT();
 	TAILQ_INIT(&V_dadq);
 }
 
@@ -1243,7 +1240,6 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	struct dadq *dp;
 	char ip6buf[INET6_ADDRSTRLEN];
-	struct epoch_tracker et;
 
 	KASSERT((ia->ia6_flags & IN6_IFF_TENTATIVE) != 0,
 	    ("starting DAD on non-tentative address %p", ifa));
@@ -1265,12 +1261,13 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
 	    (ND_IFINFO(ifa->ifa_ifp)->flags & ND6_IFF_IFDISABLED) != 0)
 		return;
 
+	DADQ_WLOCK();
 	if ((dp = nd6_dad_find(ifa, NULL)) != NULL) {
 		/*
 		 * DAD is already in progress.  Let the existing entry
 		 * finish it.
 		 */
-		nd6_dad_rele(dp);
+		DADQ_WUNLOCK();
 		return;
 	}
 
@@ -1282,7 +1279,8 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
 			ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
 		return;
 	}
-	callout_init(&dp->dad_timer_ch, 0);
+	callout_init_rw(&dp->dad_timer_ch, DADQ_LOCKPTR(),
+	    CALLOUT_RETURNUNLOCKED);
 #ifdef VIMAGE
 	dp->dad_vnet = curvnet;
 #endif
@@ -1305,9 +1303,8 @@ nd6_dad_start(struct ifaddr *ifa, int delay)
 	/* Add this to the dadq and add a reference for the dadq. */
 	refcount_init(&dp->dad_refcnt, 1);
 	nd6_dad_add(dp);
-	NET_EPOCH_ENTER(et);
-	nd6_dad_starttimer(dp, delay, 0);
-	NET_EPOCH_EXIT(et);
+	nd6_dad_starttimer(dp, delay);
+	DADQ_WUNLOCK();
 }
 
 /*
@@ -1318,29 +1315,36 @@ nd6_dad_stop(struct ifaddr *ifa)
 {
 	struct dadq *dp;
 
+	DADQ_WLOCK();
 	dp = nd6_dad_find(ifa, NULL);
-	if (!dp) {
+	if (dp == NULL) {
+		DADQ_WUNLOCK();
 		/* DAD wasn't started yet */
 		return;
 	}
 
-	nd6_dad_stoptimer(dp);
+	/*
+	 * Acquire a temporary reference so that we can safely stop the callout.
+	 */
+	(void)refcount_acquire(&dp->dad_refcnt);
 	nd6_dad_del(dp);
+	DADQ_WUNLOCK();
 
-	/* Release this function's reference, acquired by nd6_dad_find(). */
+	nd6_dad_stoptimer(dp);
 	nd6_dad_rele(dp);
 }
 
 static void
-nd6_dad_timer(struct dadq *dp)
+nd6_dad_timer(void *arg)
 {
-	CURVNET_SET(dp->dad_vnet);
+	struct dadq *dp = arg;
 	struct ifaddr *ifa = dp->dad_ifa;
 	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
 	struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
 	char ip6buf[INET6_ADDRSTRLEN];
 	struct epoch_tracker et;
 
+	CURVNET_SET(dp->dad_vnet);
 	KASSERT(ia != NULL, ("DAD entry %p with no address", dp));
 
 	NET_EPOCH_ENTER(et);
@@ -1381,17 +1385,18 @@ nd6_dad_timer(struct dadq *dp)
 		 * We have more NS to go.  Send NS packet for DAD.
 		 */
 		nd6_dad_starttimer(dp,
-		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000, 1);
+		    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+		nd6_dad_ns_output(dp);
 		goto done;
 	} else {
 		/*
 		 * We have transmitted sufficient number of DAD packets.
 		 * See what we've got.
 		 */
-		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0)
+		if (dp->dad_ns_icount > 0 || dp->dad_na_icount > 0) {
 			/* We've seen NS or NA, means DAD has failed. */
 			nd6_dad_duplicated(ifa, dp);
-		else if (V_dad_enhanced != 0 &&
+		} else if (V_dad_enhanced != 0 &&
 		    dp->dad_ns_lcount > 0 &&
 		    dp->dad_ns_lcount > dp->dad_loopbackprobe) {
 			/*
@@ -1412,8 +1417,8 @@ nd6_dad_timer(struct dadq *dp)
 			dp->dad_count =
 			    dp->dad_ns_ocount + V_nd6_mmaxtries - 1;
 			nd6_dad_starttimer(dp,
-			    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000,
-			    1);
+			    (long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
+			nd6_dad_ns_output(dp);
 			goto done;
 		} else {
 			/*
@@ -1439,6 +1444,7 @@ nd6_dad_timer(struct dadq *dp)
 	}
 err:
 	nd6_dad_del(dp);
+	DADQ_WUNLOCK();
 done:
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
@@ -1498,6 +1504,10 @@ nd6_dad_duplicated(struct ifaddr *ifa, struct dadq *dp)
 	}
 }
 
+/*
+ * Transmit a neighbour solicitation for the purpose of DAD.  Returns with the
+ * DAD queue unlocked.
+ */
 static void
 nd6_dad_ns_output(struct dadq *dp)
 {
@@ -1505,11 +1515,15 @@ nd6_dad_ns_output(struct dadq *dp)
 	struct ifnet *ifp = dp->dad_ifa->ifa_ifp;
 	int i;
 
+	DADQ_WLOCK_ASSERT();
+
 	dp->dad_ns_tcount++;
 	if ((ifp->if_flags & IFF_UP) == 0) {
+		DADQ_WUNLOCK();
 		return;
 	}
 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
+		DADQ_WUNLOCK();
 		return;
 	}
 
@@ -1526,6 +1540,7 @@ nd6_dad_ns_output(struct dadq *dp)
 		 * should work well in almost all cases.
 		 */
 	}
+	DADQ_WUNLOCK();
 	nd6_ns_output(ifp, NULL, NULL, &ia->ia_addr.sin6_addr,
 	    (uint8_t *)&dp->dad_nonce[0]);
 }
@@ -1541,12 +1556,11 @@ nd6_dad_ns_input(struct ifaddr *ifa, struct nd_opt_nonce *ndopt_nonce)
 	/* Ignore Nonce option when Enhanced DAD is disabled. */
 	if (V_dad_enhanced == 0)
 		ndopt_nonce = NULL;
+	DADQ_RLOCK();
 	dp = nd6_dad_find(ifa, ndopt_nonce);
-	if (dp == NULL)
-		return;
-
-	dp->dad_ns_icount++;
-	nd6_dad_rele(dp);
+	if (dp != NULL)
+		dp->dad_ns_icount++;
+	DADQ_RUNLOCK();
 }
 
 static void
@@ -1557,9 +1571,9 @@ nd6_dad_na_input(struct ifaddr *ifa)
 	if (ifa == NULL)
 		panic("ifa == NULL in nd6_dad_na_input");
 
+	DADQ_RLOCK();
 	dp = nd6_dad_find(ifa, NULL);
-	if (dp != NULL) {
+	if (dp != NULL)
 		dp->dad_na_icount++;
-		nd6_dad_rele(dp);
-	}
+	DADQ_RUNLOCK();
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202109211348.18LDmUrA035616>