Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 5 Aug 2003 16:33:49 -0700 (PDT)
From:      Sam Leffler <sam@FreeBSD.org>
To:        Perforce Change Reviews <perforce@freebsd.org>
Subject:   PERFORCE change 35578 for review
Message-ID:  <200308052333.h75NXnGf074571@repoman.freebsd.org>

next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=35578

Change 35578 by sam@sam_ebb on 2003/08/05 16:33:45

	Checkpoint routing table locking: each rtentry has a lock
	that is used to synchronize updates.  This happens independently
	of locking the radix tree.  Fallout is everywhere as rtalloc1 now
	must return a locked entry to insure consistency.
	
	This work is incomplete; there's still work to do in the multicast
	code and probable elsewhere.  Also some areas such as ATM are hard
	for me to test (and grok in some cases).
	
	Also need to revisit some of the macros to decide, for example, if
	RTFREE and RTFREE_LOCKED should be reversed.
	
	Needs another pass over all code that touches rtentry's to make sure
	the locking is consistent.
	
	Mostly untested; checkpoint now before some of this stuff gets lost.
	
	Note these changes also introduce LOR's with ifnet locking (e.g.
	in the bridge code).  To be resolved.

Affected files ...

.. //depot/projects/netperf/sys/net/if.c#2 edit
.. //depot/projects/netperf/sys/net/if_disc.c#2 edit
.. //depot/projects/netperf/sys/net/if_faith.c#2 edit
.. //depot/projects/netperf/sys/net/if_loop.c#2 edit
.. //depot/projects/netperf/sys/net/route.c#4 edit
.. //depot/projects/netperf/sys/net/route.h#2 edit
.. //depot/projects/netperf/sys/net/rtsock.c#2 edit
.. //depot/projects/netperf/sys/netinet/if_atm.c#2 edit
.. //depot/projects/netperf/sys/netinet/if_ether.c#2 edit
.. //depot/projects/netperf/sys/netinet/in_pcb.c#3 edit
.. //depot/projects/netperf/sys/netinet/in_rmx.c#2 edit
.. //depot/projects/netperf/sys/netinet/ip_icmp.c#2 edit
.. //depot/projects/netperf/sys/netinet6/icmp6.c#2 edit
.. //depot/projects/netperf/sys/netinet6/in6.c#2 edit
.. //depot/projects/netperf/sys/netinet6/in6_pcb.c#2 edit
.. //depot/projects/netperf/sys/netinet6/in6_rmx.c#2 edit
.. //depot/projects/netperf/sys/netinet6/ip6_output.c#2 edit
.. //depot/projects/netperf/sys/netinet6/nd6.c#2 edit
.. //depot/projects/netperf/sys/netinet6/nd6_rtr.c#2 edit

Differences ...

==== //depot/projects/netperf/sys/net/if.c#2 (text+ko) ====

@@ -831,8 +831,7 @@
 	return (error);
 }
 
-#define	equal(a1, a2) \
-  (bcmp((caddr_t)(a1), (caddr_t)(a2), ((struct sockaddr *)(a1))->sa_len) == 0)
+#define	equal(a1, a2)	(bcmp((a1), (a2), ((a1))->sa_len) == 0)
 
 /*
  * Locate an interface based on a complete address.
@@ -912,7 +911,7 @@
 	 * so do that if we can.
 	 */
 	if (af == AF_LINK) {
-	    register struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
+	    struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
 	    if (sdl->sdl_index && sdl->sdl_index <= if_index)
 		return (ifaddr_byindex(sdl->sdl_index));
 	}
@@ -1049,18 +1048,21 @@
 	register struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
-	register struct ifaddr *ifa;
+	register struct ifaddr *ifa, *oifa;
 	struct sockaddr *dst;
 	struct ifnet *ifp;
 
+	RT_LOCK_ASSERT(rt, MA_OWNED);
+
 	if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
 	    ((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
 		return;
 	ifa = ifaof_ifpforaddr(dst, ifp);
 	if (ifa) {
-		IFAFREE(rt->rt_ifa);
 		IFAREF(ifa);		/* XXX */
+		oifa = rt->rt_ifa;
 		rt->rt_ifa = ifa;
+		IFAFREE(oifa);
 		if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
 			ifa->ifa_rtrequest(cmd, rt, info);
 	}

==== //depot/projects/netperf/sys/net/if_disc.c#2 (text+ko) ====

@@ -194,6 +194,8 @@
 static void
 discrtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
 {
+	RT_LOCK_ASSERT(rt, MA_OWNED);
+
 	if (rt)
 		rt->rt_rmx.rmx_mtu = DSMTU;
 }

==== //depot/projects/netperf/sys/net/if_faith.c#2 (text+ko) ====

@@ -270,6 +270,8 @@
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
+	RT_LOCK_ASSERT(rt, MA_OWNED);
+
 	if (rt) {
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
 		/*
@@ -371,7 +373,7 @@
 	else
 		ret = 0;
 	if (rt)
-		RTFREE(rt);
+		RTFREE_LOCKED(rt);
 	return ret;
 }
 #endif

==== //depot/projects/netperf/sys/net/if_loop.c#2 (text+ko) ====

@@ -351,6 +351,8 @@
 	struct rtentry *rt;
 	struct rt_addrinfo *info;
 {
+	RT_LOCK_ASSERT(rt, MA_OWNED);
+
 	if (rt) {
 		rt->rt_rmx.rmx_mtu = rt->rt_ifp->if_mtu; /* for ISO */
 		/*

==== //depot/projects/netperf/sys/net/route.c#4 (text+ko) ====

@@ -95,35 +95,36 @@
 rtalloc_ign(struct route *ro, u_long ignore)
 {
 	struct rtentry *rt;
-	int s;
 
 	if ((rt = ro->ro_rt) != NULL) {
 		if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
 			return;
-		/* XXX - We are probably always at splnet here already. */
-		s = splnet();
 		RTFREE(rt);
 		ro->ro_rt = NULL;
-		splx(s);
 	}
 	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, ignore);
+	if (ro->ro_rt)
+		RT_UNLOCK(ro->ro_rt);
 }
 
 /*
  * Look up the route that matches the address given
  * Or, at least try.. Create a cloned route if needed.
+ *
+ * The returned route, if any, is locked.
  */
 struct rtentry *
 rtalloc1(struct sockaddr *dst, int report, u_long ignflags)
 {
-	register struct radix_node_head *rnh = rt_tables[dst->sa_family];
-	register struct rtentry *rt;
-	register struct radix_node *rn;
-	struct rtentry *newrt = 0;
+	struct radix_node_head *rnh = rt_tables[dst->sa_family];
+	struct rtentry *rt;
+	struct radix_node *rn;
+	struct rtentry *newrt;
 	struct rt_addrinfo info;
 	u_long nflags;
-	int  s = splnet(), err = 0, msgtype = RTM_MISS;
+	int err = 0, msgtype = RTM_MISS;
 
+	newrt = 0;
 	/*
 	 * Look up the address in the table for that Address Family
 	 */
@@ -131,9 +132,10 @@
 		rtstat.rts_unreach++;
 		goto miss2;
 	}
+	bzero(&info, sizeof(info));
 	RADIX_NODE_HEAD_LOCK(rnh);
-	if ((rn = rnh->rnh_matchaddr((caddr_t)dst, rnh)) &&
-	    ((rn->rn_flags & RNF_ROOT) == 0)) {
+	if ((rn = rnh->rnh_matchaddr(dst, rnh)) &&
+	    (rn->rn_flags & RNF_ROOT) == 0) {
 		/*
 		 * If we find it and it's not the root node, then
 		 * get a refernce on the rtentry associated.
@@ -153,11 +155,14 @@
 				 * If the cloning didn't succeed, maybe
 				 * what we have will do. Return that.
 				 */
-				newrt = rt;
-				rt->rt_refcnt++;
+				newrt = rt;		/* existing route */
+				RT_LOCK(newrt);
+				newrt->rt_refcnt++;
 				goto miss;
 			}
-			if ((rt = newrt) && (rt->rt_flags & RTF_XRESOLVE)) {
+			KASSERT(newrt, ("no route and no error"));
+			RT_LOCK(newrt);
+			if (newrt->rt_flags & RTF_XRESOLVE) {
 				/*
 				 * If the new route specifies it be
 				 * externally resolved, then go do that.
@@ -166,18 +171,20 @@
 				goto miss;
 			}
 			/* Inform listeners of the new route. */
-			bzero(&info, sizeof(info));
-			info.rti_info[RTAX_DST] = rt_key(rt);
-			info.rti_info[RTAX_NETMASK] = rt_mask(rt);
-			info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
-			if (rt->rt_ifp != NULL) {
+			info.rti_info[RTAX_DST] = rt_key(newrt);
+			info.rti_info[RTAX_NETMASK] = rt_mask(newrt);
+			info.rti_info[RTAX_GATEWAY] = newrt->rt_gateway;
+			if (newrt->rt_ifp != NULL) {
 				info.rti_info[RTAX_IFP] =
-				    TAILQ_FIRST(&rt->rt_ifp->if_addrhead)->ifa_addr;
-				info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
+				    TAILQ_FIRST(&newrt->rt_ifp->if_addrhead)->ifa_addr;
+				info.rti_info[RTAX_IFA] = newrt->rt_ifa->ifa_addr;
 			}
-			rt_missmsg(RTM_ADD, &info, rt->rt_flags, 0);
-		} else
-			rt->rt_refcnt++;
+			rt_missmsg(RTM_ADD, &info, newrt->rt_flags, 0);
+		} else {
+			KASSERT(rt == newrt, ("locking wrong route"));
+			RT_LOCK(newrt);
+			newrt->rt_refcnt++;
+		}
 		RADIX_NODE_HEAD_UNLOCK(rnh);
 	} else {
 		/*
@@ -194,12 +201,12 @@
 			 * Authorities.
 			 * For a delete, this is not an error. (report == 0)
 			 */
-			bzero((caddr_t)&info, sizeof(info));
 			info.rti_info[RTAX_DST] = dst;
 			rt_missmsg(msgtype, &info, 0, err);
 		}
 	}
-	splx(s);
+	if (newrt)
+		RT_LOCK_ASSERT(newrt, MA_OWNED);
 	return (newrt);
 }
 
@@ -218,21 +225,24 @@
 	if (rt == 0 || rnh == 0)
 		panic("rtfree");
 
+	RT_LOCK_ASSERT(rt, MA_OWNED);
+
 	/*
 	 * decrement the reference count by one and if it reaches 0,
 	 * and there is a close function defined, call the close function
 	 */
-	rt->rt_refcnt--;
-	if (rnh->rnh_close && rt->rt_refcnt == 0) {
+	if (--rt->rt_refcnt > 0)
+		goto done;
+	/* XXX refcount==0? */
+	if (rt->rt_refcnt == 0 && rnh->rnh_close)
 		rnh->rnh_close((struct radix_node *)rt, rnh);
-	}
 
 	/*
 	 * If we are no longer "up" (and ref == 0)
 	 * then we can free the resources associated
 	 * with the route.
 	 */
-	if (rt->rt_refcnt <= 0 && (rt->rt_flags & RTF_UP) == 0) {
+	if ((rt->rt_flags & RTF_UP) == 0) {
 		if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic ("rtfree 2");
 		/*
@@ -240,14 +250,12 @@
 		 * so it is represented in rttrash.. remove that now.
 		 */
 		rttrash--;
-
 #ifdef	DIAGNOSTIC
 		if (rt->rt_refcnt < 0) {
 			printf("rtfree: %p not freed (neg refs)\n", rt);
-			return;
+			goto done;
 		}
 #endif
-
 		/*
 		 * release references on items we hold them on..
 		 * e.g other routes and ifaddrs.
@@ -267,8 +275,12 @@
 		/*
 		 * and the rtentry itself of course
 		 */
+		RT_LOCK_DESTROY(rt);
 		Free(rt);
+		return;
 	}
+done:
+	RT_UNLOCK(rt);
 }
 
 /* compare two sockaddr structures */
@@ -279,17 +291,13 @@
  * destination to go through the given gateway.
  * Normally called as a result of a routing redirect
  * message from the network layer.
- *
- * N.B.: must be called at splnet
- *
  */
 void
 rtredirect(struct sockaddr *dst,
 	struct sockaddr *gateway,
 	struct sockaddr *netmask,
 	int flags,
-	struct sockaddr *src,
-	struct rtentry **rtp)
+	struct sockaddr *src)
 {
 	struct rtentry *rt;
 	int error = 0;
@@ -302,7 +310,7 @@
 		error = ENETUNREACH;
 		goto out;
 	}
-	rt = rtalloc1(dst, 0, 0UL);
+	rt = rtalloc1(dst, 0, 0UL);	/* NB: rt is locked */
 	/*
 	 * If the redirect isn't from our current router for this dst,
 	 * it's either old or wrong.  If it redirects us to ourselves,
@@ -322,7 +330,7 @@
 	 * which use routing redirects generated by smart gateways
 	 * to dynamically build the routing tables.
 	 */
-	if ((rt == 0) || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
+	if (rt == 0 || (rt_mask(rt) && rt_mask(rt)->sa_len < 2))
 		goto create;
 	/*
 	 * Don't listen to the redirect if it's
@@ -346,8 +354,10 @@
 			info.rti_flags = flags;
 			rt = NULL;
 			error = rtrequest1(RTM_ADD, &info, &rt);
-			if (rt != NULL)
+			if (rt != NULL) {
+				RT_UNLOCK(rt);
 				flags = rt->rt_flags;
+			}
 			stat = &rtstat.rts_dynamic;
 		} else {
 			/*
@@ -365,12 +375,8 @@
 	} else
 		error = EHOSTUNREACH;
 done:
-	if (rt) {
-		if (rtp && !error)
-			*rtp = rt;
-		else
-			rtfree(rt);
-	}
+	if (rt)
+		rtfree(rt);
 out:
 	if (error)
 		rtstat.rts_badredirect++;
@@ -402,6 +408,7 @@
 ifa_ifwithroute(int flags, struct sockaddr *dst, struct sockaddr *gateway)
 {
 	register struct ifaddr *ifa;
+
 	if ((flags & RTF_GATEWAY) == 0) {
 		/*
 		 * If we are adding a route to an interface,
@@ -431,6 +438,7 @@
 		if (rt == 0)
 			return (0);
 		--rt->rt_refcnt;
+		RT_UNLOCK(rt);
 		if ((ifa = rt->rt_ifa) == 0)
 			return (0);
 	}
@@ -523,7 +531,7 @@
 int
 rtrequest1(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt)
 {
-	int s = splnet(); int error = 0;
+	int error = 0;
 	register struct rtentry *rt;
 	register struct radix_node *rn;
 	register struct radix_node_head *rnh;
@@ -534,10 +542,9 @@
 	/*
 	 * Find the correct routing tree to use for this Address Family
 	 */
-	if ((rnh = rt_tables[dst->sa_family]) == 0) {
-		splx(s);
+	rnh = rt_tables[dst->sa_family];
+	if (rnh == 0)
 		return (EAFNOSUPPORT);
-	}
 	RADIX_NODE_HEAD_LOCK(rnh);
 	/*
 	 * If we are adding a host route then we don't want to put
@@ -553,11 +560,13 @@
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
 		 */
-		if ((rn = rnh->rnh_deladdr(dst, netmask, rnh)) == 0)
+		rn = rnh->rnh_deladdr(dst, netmask, rnh);
+		if (rn == 0)
 			senderr(ESRCH);
 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
 			panic ("rtrequest delete");
 		rt = (struct rtentry *)rn;
+		RT_LOCK(rt);
 		rt->rt_refcnt++;
 		rt->rt_flags &= ~RTF_UP;
 
@@ -577,9 +586,9 @@
 		 * we held its last reference.
 		 */
 		if (rt->rt_gwroute) {
-			rt = rt->rt_gwroute;
-			RTFREE(rt);
-			(rt = (struct rtentry *)rn)->rt_gwroute = 0;
+			struct rtentry *gwrt = rt->rt_gwroute;
+			RTFREE(gwrt);
+			rt->rt_gwroute = 0;
 		}
 
 		/*
@@ -599,16 +608,18 @@
 		 * but it's up to it to free the rtentry as we won't be
 		 * doing it.
 		 */
-		if (ret_nrt)
+		if (ret_nrt) {
 			*ret_nrt = rt;
-		else
-			RTFREE(rt);
+			RT_UNLOCK(rt);
+		} else
+			RTFREE_LOCKED(rt);
 		break;
 
 	case RTM_RESOLVE:
 		if (ret_nrt == 0 || (rt = *ret_nrt) == 0)
 			senderr(EINVAL);
 		ifa = rt->rt_ifa;
+		/* XXX locking? */
 		flags = rt->rt_flags &
 		    ~(RTF_CLONING | RTF_PRCLONING | RTF_STATIC);
 		flags |= RTF_WASCLONED;
@@ -626,16 +637,18 @@
 		ifa = info->rti_ifa;
 
 	makeroute:
-		R_Malloc(rt, struct rtentry *, sizeof(*rt));
+		R_Zalloc(rt, struct rtentry *, sizeof(*rt));
 		if (rt == 0)
 			senderr(ENOBUFS);
-		Bzero(rt, sizeof(*rt));
+		RT_LOCK_INIT(rt);
 		rt->rt_flags = RTF_UP | flags;
 		/*
 		 * Add the gateway. Possibly re-malloc-ing the storage for it
 		 * also add the rt_gwroute if possible.
 		 */
+		RT_LOCK(rt);
 		if ((error = rt_setgate(rt, dst, gateway)) != 0) {
+			RT_LOCK_DESTROY(rt);
 			Free(rt);
 			senderr(error);
 		}
@@ -643,7 +656,7 @@
 		/*
 		 * point to the (possibly newly malloc'd) dest address.
 		 */
-		ndst = rt_key(rt);
+		ndst = (struct sockaddr *)rt_key(rt);
 
 		/*
 		 * make sure it contains the value we want (masked if needed).
@@ -661,10 +674,9 @@
 		IFAREF(ifa);
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
+
 		/* XXX mtu manipulation will be done in rnh_addaddr -- itojun */
-
-		rn = rnh->rnh_addaddr((caddr_t)ndst, (caddr_t)netmask,
-					rnh, rt->rt_nodes);
+		rn = rnh->rnh_addaddr(ndst, netmask, rnh, rt->rt_nodes);
 		if (rn == 0) {
 			struct rtentry *rt2;
 			/*
@@ -677,16 +689,15 @@
 			rt2 = rtalloc1(dst, 0, RTF_PRCLONING);
 			if (rt2 && rt2->rt_parent) {
 				rtrequest(RTM_DELETE,
-					  (struct sockaddr *)rt_key(rt2),
+					  rt_key(rt2),
 					  rt2->rt_gateway,
 					  rt_mask(rt2), rt2->rt_flags, 0);
-				RTFREE(rt2);
-				rn = rnh->rnh_addaddr((caddr_t)ndst,
-						      (caddr_t)netmask,
+				RTFREE_LOCKED(rt2);
+				rn = rnh->rnh_addaddr(ndst, netmask,
 						      rnh, rt->rt_nodes);
 			} else if (rt2) {
 				/* undo the extra ref we got */
-				RTFREE(rt2);
+				RTFREE_LOCKED(rt2);
 			}
 		}
 
@@ -697,10 +708,11 @@
 		if (rn == 0) {
 			if (rt->rt_gwroute)
 				rtfree(rt->rt_gwroute);
-			if (rt->rt_ifa) {
+			if (rt->rt_ifa)
 				IFAFREE(rt->rt_ifa);
-			}
 			Free(rt_key(rt));
+			/* XXX still locked */
+			RT_LOCK_DESTROY(rt);
 			Free(rt);
 			senderr(EEXIST);
 		}
@@ -713,10 +725,13 @@
 		 * are a clone (and increment the parent's references)
 		 */
 		if (req == RTM_RESOLVE) {
+			KASSERT(ret_nrt && *ret_nrt,
+				("no route to clone from"));
 			rt->rt_rmx = (*ret_nrt)->rt_rmx; /* copy metrics */
 			rt->rt_rmx.rmx_pksent = 0; /* reset packet counter */
 			if ((*ret_nrt)->rt_flags & (RTF_CLONING | RTF_PRCLONING)) {
-				rt->rt_parent = (*ret_nrt);
+				rt->rt_parent = *ret_nrt;
+				/* XXX locking */
 				(*ret_nrt)->rt_refcnt++;
 			}
 		}
@@ -750,21 +765,23 @@
 			*ret_nrt = rt;
 			rt->rt_refcnt++;
 		}
+		RT_UNLOCK(rt);
 		break;
 	default:
 		error = EOPNOTSUPP;
 	}
 bad:
 	RADIX_NODE_HEAD_UNLOCK(rnh);
-	splx(s);
 	return (error);
+#undef senderr
+}
+
 #undef dst
 #undef gateway
 #undef netmask
 #undef ifaaddr
 #undef ifpaddr
 #undef flags
-}
 
 /*
  * Called from rtrequest(RTM_DELETE, ...) to fix up the route's ``family''
@@ -841,8 +858,7 @@
 	 * There probably is a function somewhere which does this...
 	 * if not, there should be.
 	 */
-	len = imin(((struct sockaddr *)rt_key(rt0))->sa_len,
-		   ((struct sockaddr *)rt_key(rt))->sa_len);
+	len = imin(rt_key(rt0)->sa_len, rt_key(rt)->sa_len);
 
 	xk1 = (u_char *)rt_key(rt0);
 	xm1 = (u_char *)rt_mask(rt0);
@@ -850,8 +866,8 @@
 
 	/* avoid applying a less specific route */
 	xmp = (u_char *)rt_mask(rt->rt_parent);
-	mlen = ((struct sockaddr *)rt_key(rt->rt_parent))->sa_len;
-	if (mlen > ((struct sockaddr *)rt_key(rt0))->sa_len) {
+	mlen = rt_key(rt->rt_parent)->sa_len;
+	if (mlen > rt_key(rt0)->sa_len) {
 #ifdef DEBUG
 		if (rtfcdebug)
 			printf("rt_fixchange: inserting a less "
@@ -893,29 +909,31 @@
 #define ROUNDUP(a) (a>0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
 
 int
-rt_setgate(struct rtentry *rt0, struct sockaddr *dst, struct sockaddr *gate)
+rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
 {
+	/* XXX dst may be overwritten, can we move this to below */
+	struct radix_node_head *rnh = rt_tables[dst->sa_family];
 	caddr_t new, old;
 	int dlen = ROUNDUP(dst->sa_len), glen = ROUNDUP(gate->sa_len);
-	register struct rtentry *rt = rt0;
-	struct radix_node_head *rnh = rt_tables[dst->sa_family];
+
+	RT_LOCK_ASSERT(rt, MA_OWNED);
 
 	/*
 	 * A host route with the destination equal to the gateway
 	 * will interfere with keeping LLINFO in the routing
 	 * table, so disallow it.
 	 */
-	if (((rt0->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
+	if (((rt->rt_flags & (RTF_HOST|RTF_GATEWAY|RTF_LLINFO)) ==
 					(RTF_HOST|RTF_GATEWAY)) &&
-	    (dst->sa_len == gate->sa_len) &&
-	    (bcmp(dst, gate, dst->sa_len) == 0)) {
+	    dst->sa_len == gate->sa_len &&
+	    bcmp(dst, gate, dst->sa_len) == 0) {
 		/*
 		 * The route might already exist if this is an RTM_CHANGE
 		 * or a routing redirect, so try to delete it.
 		 */
-		if (rt_key(rt0))
-			rtrequest(RTM_DELETE, (struct sockaddr *)rt_key(rt0),
-			    rt0->rt_gateway, rt_mask(rt0), rt0->rt_flags, 0);
+		if (rt_key(rt))
+			rtrequest(RTM_DELETE, rt_key(rt),
+			    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
 		return EADDRNOTAVAIL;
 	}
 
@@ -930,12 +948,12 @@
 		R_Malloc(new, caddr_t, dlen + glen);
 		if (new == 0)
 			return ENOBUFS;
-		rt->rt_nodes->rn_key = new;
+		rt_key(rt) = new;
 	} else {
 		/*
 		 * otherwise just overwrite the old one
 		 */
-		new = rt->rt_nodes->rn_key;
+		new = (caddr_t)rt_key(rt);
 		old = 0;
 	}
 
@@ -951,6 +969,7 @@
 	if (old) {
 		Bcopy(dst, new, dlen);
 		Free(old);
+		dst = gate = 0;		/* XXX??? */
 	}
 
 	/*
@@ -974,10 +993,11 @@
 	if (rt->rt_flags & RTF_GATEWAY) {
 		rt->rt_gwroute = rtalloc1(gate, 1, RTF_PRCLONING);
 		if (rt->rt_gwroute == rt) {
-			RTFREE(rt->rt_gwroute);
+			RTFREE_LOCKED(rt->rt_gwroute);
 			rt->rt_gwroute = 0;
 			return EDQUOT; /* failure */
 		}
+		RT_UNLOCK(rt->rt_gwroute);
 	}
 
 	/*
@@ -987,6 +1007,7 @@
 	 */
 	if (!(rt->rt_flags & RTF_HOST) && rt_mask(rt) != 0) {
 		struct rtfc_arg arg;
+
 		arg.rnh = rnh;
 		arg.rt0 = rt;
 		RADIX_NODE_HEAD_LOCK(rnh);
@@ -1094,19 +1115,23 @@
 		/*
 		 * notify any listening routing agents of the change
 		 */
+		RT_LOCK(rt);
 		rt_newaddrmsg(cmd, ifa, error, rt);
 		if (cmd == RTM_DELETE) {
 			/*
 			 * If we are deleting, and we found an entry, then
 			 * it's been removed from the tree.. now throw it away.
 			 */
-			RTFREE(rt);
-		} else if (cmd == RTM_ADD) {
-			/*
-			 * We just wanted to add it.. we don't actually
-			 * need a reference.
-			 */
-			rt->rt_refcnt--;
+			RTFREE_LOCKED(rt);
+		} else {
+			if (cmd == RTM_ADD) {
+				/*
+				 * We just wanted to add it.. we don't actually
+				 * need a reference.
+				 */
+				rt->rt_refcnt--;
+			}
+			RT_UNLOCK(rt);
 		}
 	}
 	if (m)
@@ -1114,52 +1139,78 @@
 	return (error);
 }
 
+/*
+ * Validate the route rt0 to the specified destination.  If the
+ * route is marked down try to find a new route.  If the route
+ * to the gateway is gone, try to setup a new route.  Otherwise,
+ * if the route is marked for packets to be rejected, enforce that.
+ *
+ * On return lrt contains the route to the destination and lrt0
+ * contains the route to the next hop.  Their values are meaningul
+ * ONLY if no error is returned.
+ *
+ * This routine is invoked on each layer 2 output path, prior to
+ * encapsulating outbound packets.
+ */
 int
 rt_check(struct rtentry **lrt, struct rtentry **lrt0, struct sockaddr *dst)
 {
+#define senderr(x) { error = x ; goto bad; }
 	struct rtentry *rt;
 	struct rtentry *rt0;
 	int error;
 
-	rt = *lrt;
 	rt0 = *lrt0;
-	error = 0;
-
 	rt = rt0;
-
-	if (rt != NULL) {
+	if (rt) {
+		/* NB: the locking here is tortuous... */
+		RT_LOCK(rt);
 		if ((rt->rt_flags & RTF_UP) == 0) {
-			rt0 = rt = rtalloc1(dst, 1, 0UL);
-			if (rt0 != NULL)
+			RT_UNLOCK(rt);
+			rt = rtalloc1(dst, 1, 0UL);
+			if (rt != NULL) {
 				rt->rt_refcnt--;
-			else
+				RT_UNLOCK(rt);
+			} else
 				senderr(EHOSTUNREACH);
+			rt0 = rt;
 		}
+		/* XXX BSD/OS checks dst->sa_family != AF_NS */
 		if (rt->rt_flags & RTF_GATEWAY) {
-			if (rt->rt_gwroute == NULL)
+			if (rt->rt_gwroute == 0)
 				goto lookup;
-
 			rt = rt->rt_gwroute;
+			RT_LOCK(rt);		/* NB: gwroute */
 			if ((rt->rt_flags & RTF_UP) == 0) {
-				rtfree(rt);
+				rtfree(rt);	/* unlock gwroute */
 				rt = rt0;
 			lookup:
-				rt->rt_gwroute = rtalloc1(rt->rt_gateway, 1, 0UL);
-				rt = rt->rt_gwroute;
-				if (rt == NULL)
+				RT_UNLOCK(rt0);
+				rt = rtalloc1(rt->rt_gateway, 1, 0UL);
+				RT_LOCK(rt0);
+				rt0->rt_gwroute = rt;
+				if (rt == 0) {
+					RT_UNLOCK(rt0);
 					senderr(EHOSTUNREACH);
+				}
 			}
+			RT_UNLOCK(rt0);
 		}
-		if (rt->rt_flags & RTF_REJECT)
-			if (rt->rt_rmx.rmx_expire == 0 ||
-				time_second < rt->rt_rmx.rmx_expire)
-				senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
+		/* XXX why are we inspecting rmx_expire? */
+		error = (rt->rt_flags & RTF_REJECT) &&
+			(rt->rt_rmx.rmx_expire == 0 ||
+				time_second < rt->rt_rmx.rmx_expire);
+		RT_UNLOCK(rt);
+		if (error)
+			senderr(rt == rt0 ? EHOSTDOWN : EHOSTUNREACH);
 	}
-
+	*lrt = rt;		/* NB: return unlocked */
+	*lrt0 = rt0;
+	return (0);
 bad:
-	*lrt = rt;
-	*lrt0 = rt0;
+	/* NB: lrt and lrt0 should not be interpreted if error is non-zero */
 	return (error);
+#undef senderr
 }
 
 /* This must be before ip6_init2(), which is now SI_ORDER_MIDDLE */

==== //depot/projects/netperf/sys/net/route.h#2 (text+ko) ====

@@ -113,7 +113,7 @@
 		    struct rtentry *);
 					/* output routine for this (rt,if) */
 	struct	rtentry *rt_parent; 	/* cloning parent of this route */
-	struct	mtx *rt_mtx;		/* mutex for routing entry */
+	struct	mtx rt_mtx;		/* mutex for routing entry */
 };
 
 /*
@@ -263,18 +263,26 @@
 
 #ifdef _KERNEL
 
-#define	RT_LOCK_INIT(rt) \
-    mtx_init((rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
-#define	RT_LOCK(rt)		mtx_lock((rt)->rt_mtx)
-#define	RT_UNLOCK(rt)		mtx_unlock((rt)->rt_mtx)
-#define	RT_LOCK_DESTROY(rt)	mtx_destroy((rt)->rt_mtx)
+#define	RT_LOCK_INIT(_rt) \
+	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
+#define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)
+#define	RT_UNLOCK(_rt)		mtx_unlock(&(_rt)->rt_mtx)
+#define	RT_LOCK_DESTROY(_rt)	mtx_destroy(&(_rt)->rt_mtx)
+#define	RT_LOCK_ASSERT(_rt, _what)	mtx_assert(&(_rt)->rt_mtx, _what)
 
-#define	RTFREE(rt) \
-	do { \
-		if ((rt)->rt_refcnt <= 1) \
-			rtfree(rt); \
-		else \
-			(rt)->rt_refcnt--; \
+#define	RTFREE_LOCKED(_rt) do {				\
+		if ((_rt)->rt_refcnt <= 1)		\
+			rtfree(_rt);			\
+		else {					\
+			(_rt)->rt_refcnt--;		\
+			RT_UNLOCK(_rt);			\
+		}					\
+		/* guard against invalid refs */	\
+		_rt = 0;				\
+	} while (0)
+#define	RTFREE(_rt) do {				\
+		RT_LOCK(_rt);				\
+		RTFREE_LOCKED(_rt);			\
 	} while (0)
 
 extern struct route_cb route_cb;
@@ -289,16 +297,16 @@
 void	 rt_missmsg(int, struct rt_addrinfo *, int, int);
 void	 rt_newaddrmsg(int, struct ifaddr *, int, struct rtentry *);
 void	 rt_newmaddrmsg(int, struct ifmultiaddr *);
+void	 rtalloc(struct route *);
 int	 rt_setgate(struct rtentry *, struct sockaddr *, struct sockaddr *);
-void	 rtalloc(struct route *);
 void	 rtalloc_ign(struct route *, u_long);
-struct rtentry *
-	 rtalloc1(struct sockaddr *, int, u_long);
+/* NB: the rtentry is returned locked */
+struct rtentry *rtalloc1(struct sockaddr *, int, u_long);
 void	 rtfree(struct rtentry *);
 int	 rtinit(struct ifaddr *, int, int);
 int	 rtioctl(u_long, caddr_t);
 void	 rtredirect(struct sockaddr *, struct sockaddr *,
-	    struct sockaddr *, int, struct sockaddr *, struct rtentry **);
+	    struct sockaddr *, int, struct sockaddr *);
 int	 rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **);
 int	 rtrequest1(int, struct rt_addrinfo *, struct rtentry **);

==== //depot/projects/netperf/sys/net/rtsock.c#2 (text+ko) ====

@@ -337,6 +337,7 @@
 			senderr(EINVAL);
 		error = rtrequest1(RTM_ADD, &info, &saved_nrt);
 		if (error == 0 && saved_nrt) {
+			RT_LOCK(saved_nrt);
 			rt_setmetrics(rtm->rtm_inits,
 				&rtm->rtm_rmx, &saved_nrt->rt_rmx);
 			saved_nrt->rt_rmx.rmx_locks &= ~(rtm->rtm_inits);
@@ -344,12 +345,14 @@
 				(rtm->rtm_inits & rtm->rtm_rmx.rmx_locks);
 			saved_nrt->rt_refcnt--;
 			saved_nrt->rt_genmask = genmask;
+			RT_UNLOCK(saved_nrt);
 		}
 		break;
 
 	case RTM_DELETE:
 		error = rtrequest1(RTM_DELETE, &info, &saved_nrt);
 		if (error == 0) {
+			RT_LOCK(saved_nrt);
 			rt = saved_nrt;
 			goto report;
 		}
@@ -358,15 +361,15 @@
 	case RTM_GET:
 	case RTM_CHANGE:
 	case RTM_LOCK:
-		if ((rnh = rt_tables[dst->sa_family]) == 0) {
+		if ((rnh = rt_tables[dst->sa_family]) == 0)
 			senderr(EAFNOSUPPORT);
-		}
 		RADIX_NODE_HEAD_LOCK(rnh);
 		rt = (struct rtentry *) rnh->rnh_lookup(dst, netmask, rnh);
 		RADIX_NODE_HEAD_UNLOCK(rnh);
-		if (rt != NULL)
+		if (rt != NULL) {		/* XXX looks bogus */
+			RT_LOCK(rt);
 			rt->rt_refcnt++;
-		else
+		} else
 			senderr(ESRCH);
 
 		switch(rtm->rtm_type) {
@@ -395,8 +398,10 @@
 			if (len > rtm->rtm_msglen) {
 				struct rt_msghdr *new_rtm;
 				R_Malloc(new_rtm, struct rt_msghdr *, len);
-				if (new_rtm == 0)
+				if (new_rtm == 0) {
+					RT_UNLOCK(rt);
 					senderr(ENOBUFS);
+				}
 				Bcopy(rtm, new_rtm, rtm->rtm_msglen);
 				Free(rtm); rtm = new_rtm;
 			}
@@ -413,28 +418,33 @@
 			   by ll sockaddr when protocol address is ambiguous */
 /* compare two sockaddr structures */
 #define	sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
-			if ((rt->rt_flags & RTF_GATEWAY && gate != NULL) ||
+			if (((rt->rt_flags & RTF_GATEWAY) && gate != NULL) ||
 			    ifpaddr != NULL ||
 			    (ifaaddr != NULL &&
 			    !sa_equal(ifaaddr, rt->rt_ifa->ifa_addr))) {
-				if ((error = rt_getifa(&info)) != 0)
+				if ((error = rt_getifa(&info)) != 0) {
+					RT_UNLOCK(rt);
 					senderr(error);
+				}
 			}
 			if (gate != NULL &&
-			    (error = rt_setgate(rt, rt_key(rt), gate)) != 0)
+			    (error = rt_setgate(rt, rt_key(rt), gate)) != 0) {
+				RT_UNLOCK(rt);
 				senderr(error);
+			}
 			if ((ifa = info.rti_ifa) != NULL) {
-				register struct ifaddr *oifa = rt->rt_ifa;
+				struct ifaddr *oifa = rt->rt_ifa;
 				if (oifa != ifa) {
-				    if (oifa) {
-					if (oifa->ifa_rtrequest)
-					    oifa->ifa_rtrequest(RTM_DELETE, rt,
-						&info);
-				        IFAFREE(oifa);
-				    }
-				    IFAREF(ifa);
-				    rt->rt_ifa = ifa;
-				    rt->rt_ifp = info.rti_ifp;
+					if (oifa) {
+						if (oifa->ifa_rtrequest)
+							oifa->ifa_rtrequest(
+								RTM_DELETE, rt,
+								&info);
+						IFAFREE(oifa);
+					}
+				        IFAREF(ifa);
+				        rt->rt_ifa = ifa;

>>> TRUNCATED FOR MAIL (1000 lines) <<<



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200308052333.h75NXnGf074571>