Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 9 Mar 2010 01:11:45 +0000 (UTC)
From:      Qing Li <qingli@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r204902 - in head/sys: net netinet
Message-ID:  <201003090111.o291Bj79062503@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: qingli
Date: Tue Mar  9 01:11:45 2010
New Revision: 204902
URL: http://svn.freebsd.org/changeset/base/204902

Log:
  One of the advantages of enabling ECMP (a.k.a RADIX_MPATH) is to
  allow for connection load balancing across interfaces. Currently
  the address alias handling method is colliding with the ECMP code.
  For example, when two interfaces are configured on the same prefix,
  only one prefix route is installed. So connection load balancing
  among the available interfaces is not possible.
  
  The other advantage of ECMP is for failover. The issue with the
  current code, is that the interface link-state is not reflected
  in the route entry. For example, if there are two interfaces on
  the same prefix, the cable on one interface is unplugged, new and
  existing connections should switch over to the other interface.
  This is not done today and packets go into a black hole.
  
  Also, there is a small bug in the kernel where deleting ECMP routes
  in the userland will always return an error even though the command
  is successfully executed.
  
  MFC after:	5 days

Modified:
  head/sys/net/flowtable.c
  head/sys/net/radix.c
  head/sys/net/radix_mpath.c
  head/sys/net/route.c
  head/sys/net/route.h
  head/sys/netinet/in.c
  head/sys/netinet/ip_output.c

Modified: head/sys/net/flowtable.c
==============================================================================
--- head/sys/net/flowtable.c	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/net/flowtable.c	Tue Mar  9 01:11:45 2010	(r204902)
@@ -472,7 +472,8 @@ flow_stale(struct flowtable *ft, struct 
 	    || ((fle->f_rt->rt_flags & RTF_HOST) &&
 		((fle->f_rt->rt_flags & (RTF_UP))
 		    != (RTF_UP)))
-	    || (fle->f_rt->rt_ifp == NULL))
+	    || (fle->f_rt->rt_ifp == NULL)
+	    || !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
 		return (1);
 
 	idle_time = time_uptime - fle->f_uptime;

Modified: head/sys/net/radix.c
==============================================================================
--- head/sys/net/radix.c	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/net/radix.c	Tue Mar  9 01:11:45 2010	(r204902)
@@ -761,8 +761,10 @@ on2:
 		if (m->rm_flags & RNF_NORMAL) {
 			mmask = m->rm_leaf->rn_mask;
 			if (tt->rn_flags & RNF_NORMAL) {
+#if !defined(RADIX_MPATH)
 			    log(LOG_ERR,
 			        "Non-unique normal route, mask not entered\n");
+#endif
 				return tt;
 			}
 		} else

Modified: head/sys/net/radix_mpath.c
==============================================================================
--- head/sys/net/radix_mpath.c	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/net/radix_mpath.c	Tue Mar  9 01:11:45 2010	(r204902)
@@ -270,7 +270,8 @@ rtalloc_mpath_fib(struct route *ro, uint
 	 * XXX we don't attempt to lookup cached route again; what should
 	 * be done for sendto(3) case?
 	 */
-	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
+	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
+	    && RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
 		return;				 
 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
 

Modified: head/sys/net/route.c
==============================================================================
--- head/sys/net/route.c	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/net/route.c	Tue Mar  9 01:11:45 2010	(r204902)
@@ -830,7 +830,13 @@ rt_getifa_fib(struct rt_addrinfo *info, 
 int
 rtexpunge(struct rtentry *rt)
 {
+#if !defined(RADIX_MPATH)
 	struct radix_node *rn;
+#else
+	struct rt_addrinfo info;
+	int fib;
+	struct rtentry *rt0;
+#endif
 	struct radix_node_head *rnh;
 	struct ifaddr *ifa;
 	int error = 0;
@@ -843,14 +849,26 @@ rtexpunge(struct rtentry *rt)
 	if (rnh == NULL)
 		return (EAFNOSUPPORT);
 	RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
-#if 0
-	/*
-	 * We cannot assume anything about the reference count
-	 * because protocols call us in many situations; often
-	 * before unwinding references to the table entry.
-	 */
-	KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
-#endif
+
+#ifdef RADIX_MPATH
+	fib = rt->rt_fibnum;
+	bzero(&info, sizeof(info));
+	info.rti_ifp = rt->rt_ifp;
+	info.rti_flags = RTF_RNH_LOCKED;
+	info.rti_info[RTAX_DST] = rt_key(rt);
+	info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
+
+	RT_UNLOCK(rt);
+	error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
+
+	if (error == 0 && rt0 != NULL) {
+		rt = rt0;
+		RT_LOCK(rt);
+	} else if (error != 0) {
+		RT_LOCK(rt);
+		return (error);
+	}
+#else
 	/*
 	 * Remove the item from the tree; it should be there,
 	 * but when callers invoke us blindly it may not (sigh).
@@ -864,6 +882,7 @@ rtexpunge(struct rtentry *rt)
 		("unexpected flags 0x%x", rn->rn_flags));
 	KASSERT(rt == RNTORT(rn),
 		("lookup mismatch, rt %p rn %p", rt, rn));
+#endif /* RADIX_MPATH */
 
 	rt->rt_flags &= ~RTF_UP;
 
@@ -886,7 +905,9 @@ rtexpunge(struct rtentry *rt)
 	 * linked to the routing table.
 	 */
 	V_rttrash++;
+#if !defined(RADIX_MPATH)
 bad:
+#endif
 	return (error);
 }
 
@@ -1044,6 +1065,7 @@ rtrequest1_fib(int req, struct rt_addrin
 			 */
 			if (error != ENOENT)
 				goto bad;
+			error = 0;
 		}
 #endif
 		/*

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/net/route.h	Tue Mar  9 01:11:45 2010	(r204902)
@@ -319,6 +319,8 @@ struct rt_addrinfo {
 
 #ifdef _KERNEL
 
+#define RT_LINK_IS_UP(ifp)	((ifp)->if_link_state == LINK_STATE_UP)
+
 #define	RT_LOCK_INIT(_rt) \
 	mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
 #define	RT_LOCK(_rt)		mtx_lock(&(_rt)->rt_mtx)

Modified: head/sys/netinet/in.c
==============================================================================
--- head/sys/netinet/in.c	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/netinet/in.c	Tue Mar  9 01:11:45 2010	(r204902)
@@ -34,6 +34,7 @@
 __FBSDID("$FreeBSD$");
 
 #include "opt_carp.h"
+#include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1040,6 +1041,13 @@ in_addprefix(struct in_ifaddr *target, i
 		 * interface address, we are done here.
 		 */
 		if (ia->ia_flags & IFA_ROUTE) {
+#ifdef RADIX_MPATH
+			if (ia->ia_addr.sin_addr.s_addr == 
+			    target->ia_addr.sin_addr.s_addr)
+				return (EEXIST);
+			else
+				break;
+#endif
 			if (V_sameprefixcarponly &&
 			    target->ia_ifp->if_type != IFT_CARP &&
 			    ia->ia_ifp->if_type != IFT_CARP) {

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c	Tue Mar  9 00:52:16 2010	(r204901)
+++ head/sys/netinet/ip_output.c	Tue Mar  9 01:11:45 2010	(r204902)
@@ -199,6 +199,8 @@ again:
 	 */
 	rte = ro->ro_rt;
 	if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
+		    rte->rt_ifp == NULL ||
+		    !RT_LINK_IS_UP(rte->rt_ifp) ||
 			  dst->sin_family != AF_INET ||
 			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
 		if (!nortfree)
@@ -270,7 +272,9 @@ again:
 #endif
 			rte = ro->ro_rt;
 		}
-		if (rte == NULL) {
+		if (rte == NULL ||
+		    rte->rt_ifp == NULL ||
+		    !RT_LINK_IS_UP(rte->rt_ifp)) {
 #ifdef IPSEC
 			/*
 			 * There is no route for this packet, but it is



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201003090111.o291Bj79062503>