Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 14 Sep 2008 00:24:25 -0700
From:      Julian Elischer <julian@elischer.org>
To:        Giorgos Keramidas <keramida@freebsd.org>
Cc:        freebsd-current@freebsd.org, Robert Watson <rwatson@freebsd.org>
Subject:   Re: panic in rt_check_fib()
Message-ID:  <48CCBC29.5070802@elischer.org>
In-Reply-To: <48CCAF23.1010605@elischer.org>
References:  <87prnjh80z.fsf@kobe.laptop>	<alpine.BSF.1.10.0809131105280.55411@fledge.watson.org>	<48CC14AD.4090708@elischer.org>	<874p4ju8t3.fsf@kobe.laptop> <87zlmbstv1.fsf@kobe.laptop> <48CCAF23.1010605@elischer.org>

next in thread | previous in thread | raw e-mail | index | archive | help
This is a multi-part message in MIME format.
--------------060405080301040705060307
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit

Julian Elischer wrote:
> To recap on this, I rewrote this function a couple of week sagobecause I 
> couldn't keep track of what was going on, and I thought it might 
> havesome bad edge cases.  a couple of days later Giorgos contacted me 
> saying hta the had a fairly reproducible situation
> where this was triggered and it appeared to be an edge case in
> this function that allowed it to try lock the same lock twice.
> 
> I immediatly thought "ah=hah!" I may have a solution to this,
> and gave him a copy of my new function and indead it DOES fix that 
> panic. however after deleting and recreating intefaces a few hundred 
> times without crashing in rt_check_fib() it then fails somewhere else,
> (actually it leacks some resources and eventually networking stops).
> 
> I'm not convinced that is a problem with the new or old rt_check() but
> it did stop me from just committing the new code.
> 
> I rereading the way the function (did and still does) work it
> occurred to me that there was a large flaw in teh way it worked..
> 
> It dropped a the lock on one route while it went off an did something 
> else that might block, On returning it blindly re-grabbed that lock,
> completely ignoring the fact that the route might not even be valid any 
> more. (or any of several other things that may have changed while
> it was away (maybe sleeping)).
> 
> the code Giorgos is referring to is a patch I suggested to him to
> fix this oversight and not the one that I originally tested and
> had suggested to fix the edge case.
> 
> I do however ask that some other people look at this patch!
> 
> Julian
> 

here's the version of the patch I ended up with...

--------------060405080301040705060307
Content-Type: text/plain; x-mac-type="0"; x-mac-creator="0";
	name="rt_check.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="rt_check.diff"

Index: route.c
===================================================================
--- route.c	(revision 183012)
+++ route.c	(working copy)
@@ -1676,18 +1676,39 @@
  *   *lrt0 points to the cached route to the final destination;
  *   *lrt is not meaningful;
  *    fibnum is the index to the correct network fib for this packet
+ *	XXX maybe we should/could get it from (*lrt0)->rt_fibnum
+ * 	instead of bringing it in here? I guess it depends on the
+ *	call graph of how we get here.
+ *	(*lrt0 has no ref held on it by us so REMREF is not needed.
+ *	Refs only account for major structural references and not usages,
+ * 	which is actually a bit of a problem.)
  *
  * === Operation ===
  * If the route is marked down try to find a new route.  If the route
  * to the gateway is gone, try to setup a new route.  Otherwise,
  * if the route is marked for packets to be rejected, enforce that.
+ * Note that rtalloc returns an rtentry with an extra REF that we may
+ * need to lose.
  *
  * === On return ===
  *   *dst is unchanged;
  *   *lrt0 points to the (possibly new) route to the final destination
- *   *lrt points to the route to the next hop
+ *   *lrt points to the route to the next hop   [LOCKED]
  *
  * Their values are meaningful ONLY if no error is returned.
+ *
+ * To follow this you have to remember that:
+ * RT_REMREF reduces the reference count by 1 but doesn't check it for 0 (!)
+ * RTFREE_LOCKED includes an RT_REMREF (or an rtfree if refs == 1)
+ *    and an RT_UNLOCK
+ * RTFREE does an RT_LOCK and an RTFREE_LOCKED
+ * The gwroute pointer counts as a reference on the rtentry to which it points.
+ * so when we add it we use the ref that rtalloc gives us and when we lose it
+ * we need to remove the reference.
+ * RT_TEMP_UNLOCK does an RT_ADDREF before freeing the lock, and
+ * RT_RELOCK locks it (it can't have gone away due to the ref) and
+ * drops the ref, possibly freeing it  (unocking in the process) if the
+ * ref goes to 0. (and zeroing the pointer if we do).
  */
 int
 rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
@@ -1701,58 +1722,82 @@
 {
 	struct rtentry *rt;
 	struct rtentry *rt0;
-	int error;
 
 	KASSERT(*lrt0 != NULL, ("rt_check"));
-	rt = rt0 = *lrt0;
+	rt0 = *lrt0;
+	rt = NULL;
 
 	/* NB: the locking here is tortuous... */
-	RT_LOCK(rt);
-	if ((rt->rt_flags & RTF_UP) == 0) {
-		RT_UNLOCK(rt);
-		rt = rtalloc1_fib(dst, 1, 0UL, fibnum);
-		if (rt != NULL) {
-			RT_REMREF(rt);
-			/* XXX what about if change? */
-		} else
+	RT_LOCK(rt0);
+retry:
+	if (rt0 && (rt0->rt_flags & RTF_UP) == 0) {
+		/* Current rt0 is useless, try get a replacement. */
+		RT_UNLOCK(rt0);
+		rt0 = NULL;
+	}
+	if (rt0 == NULL) {
+		rt0 = rtalloc1_fib(dst, 1, 0UL, fibnum);
+		if (rt0 == NULL) {
 			return (EHOSTUNREACH);
-		rt0 = rt;
+		}
+		RT_REMREF(rt0); /* don't need the reference. */
 	}
-	/* XXX BSD/OS checks dst->sa_family != AF_NS */
-	if (rt->rt_flags & RTF_GATEWAY) {
-		if (rt->rt_gwroute == NULL)
-			goto lookup;
-		rt = rt->rt_gwroute;
-		RT_LOCK(rt);		/* NB: gwroute */
-		if ((rt->rt_flags & RTF_UP) == 0) {
-			RTFREE_LOCKED(rt);	/* unlock gwroute */
-			rt = rt0;
-			rt0->rt_gwroute = NULL;
-		lookup:
-			RT_UNLOCK(rt0);
-/* XXX MRT link level looked up in table 0 */
-			rt = rtalloc1_fib(rt->rt_gateway, 1, 0UL, 0);
-			if (rt == rt0) {
-				RT_REMREF(rt0);
-				RT_UNLOCK(rt0);
+
+	if (rt0->rt_flags & RTF_GATEWAY) {
+		if ((rt = rt0->rt_gwroute) != NULL) {
+			RT_LOCK(rt);		/* NB: gwroute */
+			if ((rt->rt_flags & RTF_UP) == 0) {
+				/* gw route is dud. ignore/lose it */
+				RTFREE_LOCKED(rt); /* unref (&unlock) gwroute */
+				rt = rt0->rt_gwroute = NULL;
+			}
+		}
+		
+		if (rt == NULL) {  /* NOT AN ELSE CLAUSE */
+			RT_TEMP_UNLOCK(rt0); /* MUST return to undo this */
+			rt = rtalloc1_fib(rt0->rt_gateway, 1, 0UL, fibnum);
+			if ((rt == rt0) || (rt == NULL)) {
+				/* the best we can do is not good enough */
+				if (rt) {
+					RT_REMREF(rt); /* assumes ref > 0 */
+					RT_UNLOCK(rt);
+				}
+				RTFREE(rt0); /* lock, unref, (unlock) */
 				return (ENETUNREACH);
 			}
-			RT_LOCK(rt0);
-			if (rt0->rt_gwroute != NULL)
-				RTFREE(rt0->rt_gwroute);
-			rt0->rt_gwroute = rt;
-			if (rt == NULL) {
-				RT_UNLOCK(rt0);
-				return (EHOSTUNREACH);
+			/*
+			 * Relock it and lose the added reference.
+			 * All sorts of things could have happenned while we
+			 * had no lock on it, so check for them.
+			 */
+			RT_RELOCK(rt0);
+			if (rt0 == NULL || ((rt0->rt_flags & RTF_UP) == 0))
+				/* Ru-roh.. what we had is no longer any good */
+				goto retry;
+			/* 
+			 * While we were away, someone replaced the gateway.
+			 * Since a reference count is involved we can't just
+			 * overwrite it.
+			 */
+			if (rt0->rt_gwroute) {
+				if (rt0->rt_gwroute != rt) {
+					RTFREE_LOCKED(rt);
+					goto retry;
+				}
+			} else {
+				rt0->rt_gwroute = rt;
 			}
 		}
+		RT_LOCK_ASSERT(rt);
 		RT_UNLOCK(rt0);
+	} else {
+		/* think of rt as having the lock from now on.. */
+		rt = rt0;
 	}
 	/* XXX why are we inspecting rmx_expire? */
-	error = (rt->rt_flags & RTF_REJECT) &&
-		(rt->rt_rmx.rmx_expire == 0 ||
-			time_uptime < rt->rt_rmx.rmx_expire);
-	if (error) {
+	if ((rt->rt_flags & RTF_REJECT) &&
+	    (rt->rt_rmx.rmx_expire == 0 ||
+	    time_uptime < rt->rt_rmx.rmx_expire)) {
 		RT_UNLOCK(rt);
 		return (rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
Index: route.h
===================================================================
--- route.h	(revision 183012)
+++ route.h	(working copy)
@@ -316,21 +316,37 @@
 } while (0)
 
 #define	RTFREE_LOCKED(_rt) do {					\
-		if ((_rt)->rt_refcnt <= 1)			\
-			rtfree(_rt);				\
-		else {						\
-			RT_REMREF(_rt);				\
-			RT_UNLOCK(_rt);				\
-		}						\
-		/* guard against invalid refs */		\
-		_rt = 0;					\
-	} while (0)
+	if ((_rt)->rt_refcnt <= 1)				\
+		rtfree(_rt);					\
+	else {							\
+		RT_REMREF(_rt);					\
+		RT_UNLOCK(_rt);					\
+	}							\
+	/* guard against invalid refs */			\
+	_rt = 0;						\
+} while (0)
 
 #define	RTFREE(_rt) do {					\
-		RT_LOCK(_rt);					\
-		RTFREE_LOCKED(_rt);				\
-	} while (0)
+	RT_LOCK(_rt);						\
+	RTFREE_LOCKED(_rt);					\
+} while (0)
 
+#define RT_TEMP_UNLOCK(_rt) do {				\
+	RT_ADDREF(_rt);						\
+	RT_UNLOCK(_rt);						\
+} while (0)
+
+#define RT_RELOCK(_rt) do {					\
+	RT_LOCK(_rt);						\
+	if ((_rt)->rt_refcnt <= 1) {				\
+		rtfree(_rt);					\
+		_rt = 0; /*  signal that it went away */	\
+	} else {						\
+		RT_REMREF(_rt);					\
+		/* note that _rt is still valid */		\
+	}							\
+} while (0)
+
 extern struct radix_node_head *rt_tables[][AF_MAX+1];
 
 struct ifmultiaddr;

--------------060405080301040705060307--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?48CCBC29.5070802>