Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 8 Nov 2012 16:47:36 GMT
From:      Ingo Flaschberger <if@FreeBSD.org>
To:        freebsd-gnats-submit@FreeBSD.org
Subject:   kern/173477: mpath bugfixes
Message-ID:  <201211081647.qA8GlaZU099271@red.freebsd.org>
Resent-Message-ID: <201211081650.qA8Go1Yp051893@freefall.freebsd.org>

next in thread | raw e-mail | index | archive | help

>Number:         173477
>Category:       kern
>Synopsis:       mpath bugfixes
>Confidential:   no
>Severity:       non-critical
>Priority:       low
>Responsible:    freebsd-bugs
>State:          open
>Quarter:        
>Keywords:       
>Date-Required:
>Class:          sw-bug
>Submitter-Id:   current-users
>Arrival-Date:   Thu Nov 08 16:50:01 UTC 2012
>Closed-Date:
>Last-Modified:
>Originator:     Ingo Flaschberger
>Release:        9.1 Stable
>Organization:
crossip communications gmbh
>Environment:
9.1-PRERELEASE
>Description:
Severall mpath bugfixes:
*) if mpath is enabled, the interface loopbackroute could not be deleted
   (introduced SVN rev 226241)
*) route selection crashes when 3 mpath routes are installed and deleted:
   1: route to gw1 weight 3
   2: roote to gw2 weight 2
   3: interface route metric 1
   and deleted in 2-1 order (already freed rm_leaf returned)
*) added correct mpath selection on interface-routes (in_lltable_rtcheck)
*) added mpath to fastforward
*) do correct equal cost mpath route selection based on weight (rtalloc_mpath_fib_flags)

>How-To-Repeat:
Mpath test-script:
em0: interface must be up
em3: up and there must be a pingable host with 10.11.11.1/24

Routingtable have to be the same before and after running the script.
#!/bin/sh

ifconfig em0 192.168.2.100/24

read "Press [Enter] key"

ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
	echo test1 failed
else
	echo test1 ok
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test2 failed
else
        echo test2 ok
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test3 failed
else
        echo test3 ok
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test4 failed
else
        echo test4 ok
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test5 failed
else
        echo test5 ok
fi

read "Press [Enter] key"

ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test6 ok
else
        echo test6 failed
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test7 ok    
else
        echo test7 failed
fi

read "Press [Enter] key"

ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test8 failed
else
        echo test8 ok
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test9 failed
else
        echo test9 ok
fi

read "Press [Enter] key"

ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test10 ok    
else
        echo test10 failed
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test11 ok    
else
        echo test11 failed
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test12 ok   
else
        echo test12 failed
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.1 -weight 2 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test13 ok   
else
        echo test13 failed
fi

read "Press [Enter] key"

route add 10.11.11.0/24 192.168.2.3 -weight 3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test14 ok   
else
        echo test14 failed
fi

read "Press [Enter] key"

ifconfig em3 alias 10.11.11.175/24 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test15 failed
else
        echo test15 ok
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.3 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test16 failed
else
        echo test16 ok    
fi

read "Press [Enter] key"

route delete 10.11.11.0/24 192.168.2.1 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test17 failed
else
        echo test17 ok    
fi

read "Press [Enter] key"

ifconfig em3 -alias 10.11.11.175 > /dev/null
ping -t 1 -c 1 10.11.11.1 > /dev/null
if [ "$?" -ne "0" ]; then
        echo test18 ok
else
        echo test18 failed
fi


>Fix:


Patch attached with submission follows:

diff -u -r sys_org/contrib/ipfilter/netinet/ip_pool.c /router/usr/src/sys/contrib/ipfilter/netinet/ip_pool.c
--- sys_org/contrib/ipfilter/netinet/ip_pool.c	2012-11-08 15:15:22.000000000 +0100
+++ /router/usr/src/sys/contrib/ipfilter/netinet/ip_pool.c	2012-10-29 16:19:05.000000000 +0100
@@ -620,7 +620,7 @@
 
 	RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
 	ipo->ipo_head->rnh_deladdr(&ipe->ipn_addr, &ipe->ipn_mask,
-				   ipo->ipo_head);
+				   ipo->ipo_head, NULL);
 	RADIX_NODE_HEAD_UNLOCK(ipo->ipo_head);
 
 	ip_pool_node_deref(ipe);
@@ -751,7 +751,7 @@
 	RADIX_NODE_HEAD_LOCK(ipo->ipo_head);
 	while ((n = ipo->ipo_list) != NULL) {
 		ipo->ipo_head->rnh_deladdr(&n->ipn_addr, &n->ipn_mask,
-					   ipo->ipo_head);
+					   ipo->ipo_head, NULL);
 
 		*n->ipn_pnext = n->ipn_next;
 		if (n->ipn_next)
@@ -963,7 +963,7 @@
 	struct radix_node_head *rnh = p;
 	struct radix_node *d;
 
-	d = rnh->rnh_deladdr(n->rn_key, NULL, rnh);
+	d = rnh->rnh_deladdr(n->rn_key, NULL, rnh, NULL);
 	if (d != NULL) {
 		FreeS(d, max_keylen + 2 * sizeof (*d));
 	}
diff -u -r sys_org/kern/vfs_export.c /router/usr/src/sys/kern/vfs_export.c
--- sys_org/kern/vfs_export.c	2012-11-08 15:15:13.000000000 +0100
+++ /router/usr/src/sys/kern/vfs_export.c	2012-10-29 16:16:33.000000000 +0100
@@ -228,7 +228,7 @@
 	struct radix_node_head *rnh = (struct radix_node_head *) w;
 	struct ucred *cred;
 
-	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
+	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh, NULL);
 	cred = ((struct netcred *)rn)->netc_anon;
 	if (cred != NULL)
 		crfree(cred);
diff -u -r sys_org/net/if.c /router/usr/src/sys/net/if.c
--- sys_org/net/if.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/if.c	2012-10-30 00:34:40.000000000 +0100
@@ -70,6 +70,7 @@
 #include <net/if_types.h>
 #include <net/if_var.h>
 #include <net/radix.h>
+#include "opt_mpath.h"
 #include <net/route.h>
 #include <net/vnet.h>
 
@@ -1485,6 +1486,9 @@
 {
 	int error = 0;
 	struct rt_addrinfo info;
+#ifdef RADIX_MPATH
+	struct ifaddr *new_ifa;
+#else
 	struct sockaddr_dl null_sdl;
 
 	bzero(&null_sdl, sizeof(null_sdl));
@@ -1492,14 +1496,25 @@
 	null_sdl.sdl_family = AF_LINK;
 	null_sdl.sdl_type = ifa->ifa_ifp->if_type;
 	null_sdl.sdl_index = ifa->ifa_ifp->if_index;
+#endif
 	bzero(&info, sizeof(info));
 	info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC;
 	info.rti_info[RTAX_DST] = ia;
+#ifdef RADIX_MPATH
+	info.rti_ifp = V_loif;
+
+	/* link_rtrequest modifies ifa - do this also */
+	new_ifa = ifaof_ifpforaddr( ia, V_loif);
+
+	/* rt_mpath_matchgate matches ifa_addr and not gateway */		
+	info.rti_info[RTAX_GATEWAY] = new_ifa->ifa_addr;
+#else
 	info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl;
+#endif
 	error = rtrequest1_fib(RTM_DELETE, &info, NULL, 0);
 
 	if (error != 0)
-		log(LOG_INFO, "ifa_del_loopback_route: deletion failed\n");
+		log(LOG_INFO, "ifa_del_loopback_route: deletion failed err: %d\n", error);
 
 	return (error);
 }
diff -u -r sys_org/net/radix.c /router/usr/src/sys/net/radix.c
--- sys_org/net/radix.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix.c	2012-11-08 15:20:04.000000000 +0100
@@ -312,7 +312,7 @@
 	 * lot of confusion.
 	 */
 	if (t->rn_flags & RNF_ROOT)
-		t = t->rn_dupedkey;
+	        t = t->rn_dupedkey;
 	return t;
 on1:
 	test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
@@ -723,12 +723,20 @@
 		x = t->rn_right;
 	/* Promote general routes from below */
 	if (x->rn_bit < 0) {
-	    for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
-		if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
-			*mp = m = rn_new_radix_mask(x, 0);
-			if (m)
-				mp = &m->rm_mklist;
-		}
+	        struct	radix_node *xx = NULL;
+	        for (mp = &t->rn_mklist; x; xx = x, x = x->rn_dupedkey) {
+	                if (xx && xx->rn_mklist && xx->rn_mask == x->rn_mask &&
+	                    x->rn_mklist == 0) {
+	                        /* multipath route, bump refcount on first mklist */
+	                        x->rn_mklist = xx->rn_mklist;
+	                        x->rn_mklist->rm_refs++;
+                        }
+                        if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
+			        *mp = m = rn_new_radix_mask(x, 0);
+			        if (m)
+				        mp = &m->rm_mklist;
+                        }
+                }
 	} else if (x->rn_mklist) {
 		/*
 		 * Skip over masks whose index is > that of new node
@@ -760,11 +768,30 @@
 			break;
 		if (m->rm_flags & RNF_NORMAL) {
 			mmask = m->rm_leaf->rn_mask;
-			if (tt->rn_flags & RNF_NORMAL) {
-#if !defined(RADIX_MPATH)
+			if (keyduplicated) {
+			        if (m->rm_leaf->rn_parent == tt)
+			                /* new route is better */
+                                        m->rm_leaf = tt;
+#ifdef DIAGNOSTIC
+                                else {
+                                        for (t = m->rm_leaf; t;
+                                            t = t->rn_dupedkey)
+                                                if (t == tt)
+                                                        break;
+                                        if (t == NULL) {
+                                                log(LOG_ERR, "Non-unique "
+                                                    "normal route on dupedkey, "
+                                                    "mask not entered\n");
+                                                return tt;
+                                        }
+                                }
+#endif
+                                m->rm_refs++;
+                                tt->rn_mklist = m;
+                                return tt;
+                        } else if (tt->rn_flags & RNF_NORMAL) {
 			    log(LOG_ERR,
 			        "Non-unique normal route, mask not entered\n");
-#endif
 				return tt;
 			}
 		} else
@@ -783,9 +810,10 @@
 }
 
 struct radix_node *
-rn_delete(v_arg, netmask_arg, head)
+rn_delete(v_arg, netmask_arg, head, rn)
 	void *v_arg, *netmask_arg;
 	struct radix_node_head *head;
+	struct radix_node *rn;
 {
 	register struct radix_node *t, *p, *x, *tt;
 	struct radix_mask *m, *saved_m, **mp;
@@ -815,18 +843,41 @@
 			if ((tt = tt->rn_dupedkey) == 0)
 				return (0);
 	}
+#ifdef RADIX_MPATH
+        if (rn) {
+                while (tt != rn)
+                        if ((tt = tt->rn_dupedkey) == 0)
+                                return (0);
+        }
+#endif
 	if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
 		goto on1;
 	if (tt->rn_flags & RNF_NORMAL) {
-		if (m->rm_leaf != tt || m->rm_refs > 0) {
-			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
-			return 0;  /* dangling ref could cause disaster */
-		}
+		if (m->rm_leaf != tt && m->rm_refs == 0) {
+		        log(LOG_ERR, "rn_delete: inconsistent normal "
+		            "annotation\n");
+                        return (0);
+                }
+                if (m->rm_leaf != tt) {
+                        if (--m->rm_refs >= 0)
+                                goto on1;
+                }
+                /* tt is currently the head of the possible multipath chain */
+                if (m->rm_refs > 0) {
+                        if (tt->rn_dupedkey == NULL ||
+                            tt->rn_dupedkey->rn_mklist != m) {
+                                log(LOG_ERR, "rn_delete: inconsistent "
+                                    "dupedkey list\n");
+                                return (0);
+                        }
+                        m->rm_leaf = tt->rn_dupedkey;
+                        --m->rm_refs;
+                        goto on1;
+                }
+		/* else tt is last and only route */
 	} else {
-		if (m->rm_mask != tt->rn_mask) {
-			log(LOG_ERR, "rn_delete: inconsistent annotation\n");
+		if (m->rm_mask != tt->rn_mask)
 			goto on1;
-		}
 		if (--m->rm_refs >= 0)
 			goto on1;
 	}
@@ -875,15 +926,10 @@
 			else
 				t->rn_right = x;
 		} else {
-			/* find node in front of tt on the chain */
-			for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
-				p = p->rn_dupedkey;
-			if (p) {
-				p->rn_dupedkey = tt->rn_dupedkey;
-				if (tt->rn_dupedkey)		/* parent */
-					tt->rn_dupedkey->rn_parent = p;
-								/* parent */
-			} else log(LOG_ERR, "rn_delete: couldn't find us\n");
+		        x = saved_tt;
+		        t->rn_dupedkey = tt->rn_dupedkey;
+		        if (tt->rn_dupedkey)
+		                tt->rn_dupedkey->rn_parent = t;
 		}
 		t = tt + 1;
 		if  (t->rn_flags & RNF_ACTIVE) {
@@ -931,8 +977,16 @@
 				if (m == x->rn_mklist) {
 					struct radix_mask *mm = m->rm_mklist;
 					x->rn_mklist = 0;
-					if (--(m->rm_refs) < 0)
+					if (--(m->rm_refs) < 0) {
 						MKFree(m);
+                                        } else if (m->rm_flags & RNF_NORMAL) {
+                                                /*
+                                                 * don't progress because this
+                                                 * a multipath route. Next
+                                                 * route will use the same m.
+                                                 */
+                                                mm = m;
+                                        }
 					m = mm;
 				}
 			if (m)
@@ -1107,7 +1161,7 @@
 			rn = rn->rn_left;
 		next = rn;
 		/* Process leaves */
-		while ((rn = base)) {
+		while ((rn = base) != NULL) {
 			base = rn->rn_dupedkey;
 			if (!(rn->rn_flags & RNF_ROOT)
 			    && (error = (*f)(rn, w)))
diff -u -r sys_org/net/radix.h /router/usr/src/sys/net/radix.h
--- sys_org/net/radix.h	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix.h	2012-10-29 16:15:23.000000000 +0100
@@ -116,7 +116,8 @@
 		(void *v, void *mask,
 		     struct radix_node_head *head, struct radix_node nodes[]);
 	struct	radix_node *(*rnh_deladdr)	/* remove based on sockaddr */
-		(void *v, void *mask, struct radix_node_head *head);
+		(void *v, void *mask, struct radix_node_head *head,
+                     struct radix_node *rn);
 	struct	radix_node *(*rnh_delpkt)	/* remove based on packet hdr */
 		(void *v, void *mask, struct radix_node_head *head);
 	struct	radix_node *(*rnh_matchaddr)	/* locate based on sockaddr */
@@ -169,7 +170,8 @@
 	 *rn_addmask(void *, int, int),
 	 *rn_addroute (void *, void *, struct radix_node_head *,
 			struct radix_node [2]),
-	 *rn_delete(void *, void *, struct radix_node_head *),
+	 *rn_delete(void *, void *, struct radix_node_head *,
+	                 struct radix_node *),
 	 *rn_lookup (void *v_arg, void *m_arg,
 		        struct radix_node_head *head),
 	 *rn_match(void *, struct radix_node_head *);
diff -u -r sys_org/net/radix_mpath.c /router/usr/src/sys/net/radix_mpath.c
--- sys_org/net/radix_mpath.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix_mpath.c	2012-10-30 01:33:18.000000000 +0100
@@ -77,20 +77,6 @@
 		return NULL;
 }
 
-uint32_t
-rn_mpath_count(struct radix_node *rn)
-{
-	uint32_t i = 0;
-	struct rtentry *rt;
-	
-	while (rn != NULL) {
-		rt = (struct rtentry *)rn;
-		i += rt->rt_rmx.rmx_weight;
-		rn = rn_mpath_next(rn);
-	}
-	return (i);
-}
-
 struct rtentry *
 rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
 {
@@ -122,33 +108,6 @@
 	return (struct rtentry *)rn;
 }
 
-/* 
- * go through the chain and unlink "rt" from the list
- * the caller will free "rt"
- */
-int
-rt_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
-{
-        struct radix_node *t, *tt;
-
-        if (!headrt || !rt)
-            return (0);
-        t = (struct radix_node *)headrt;
-        tt = rn_mpath_next(t);
-        while (tt) {
-            if (tt == (struct radix_node *)rt) {
-                t->rn_dupedkey = tt->rn_dupedkey;
-                tt->rn_dupedkey = NULL;
-    	        tt->rn_flags &= ~RNF_ACTIVE;
-	        tt[1].rn_flags &= ~RNF_ACTIVE;
-                return (1);
-            }
-            t = tt;
-            tt = rn_mpath_next((struct radix_node *)t);
-        }
-        return (0);
-}
-
 /*
  * check if we have the same key/mask/gateway on the table already.
  */
@@ -256,12 +215,21 @@
 }
 
 void
-rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
+rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) {
+	rtalloc_mpath_fib_flags( ro, hash, fibnum, 0);
+}
+
+/*
+ * flag RTF_GATEWAY returns only interface routes,
+ * only one interface-route is possible
+ */ 
+void
+rtalloc_mpath_fib_flags(struct route *ro, uint32_t hash, u_int fibnum, int flags)
 {
 	struct radix_node *rn0, *rn;
-	u_int32_t n;
+	u_int32_t n = 0;
 	struct rtentry *rt;
-	int64_t weight;
+	int64_t lowest_weight;
 
 	/*
 	 * XXX we don't attempt to lookup cached route again; what should
@@ -269,29 +237,52 @@
 	 */
 	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
 	    && RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
-		return;				 
+		return;
 	ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
 
 	/* if the route does not exist or it is not multipath, don't care */
 	if (ro->ro_rt == NULL)
 		return;
 	if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
+		if (flags & RTF_GATEWAY)
+			return;
 		RT_UNLOCK(ro->ro_rt);
 		return;
 	}
 
 	/* beyond here, we use rn as the master copy */
 	rn0 = rn = (struct radix_node *)ro->ro_rt;
-	n = rn_mpath_count(rn0);
 
-	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
+	/* find count of lowest weight route */
+	for (rt = ro->ro_rt, lowest_weight = 9223372036854775807; rn != NULL;){
+		if( rt->rt_flags & RTF_UP) {
+			if ((flags & RTF_GATEWAY) && 
+			    (!(rt->rt_flags & RTF_GATEWAY)) && 
+			    (!(rt->rt_flags & RTF_HOST)) )
+				goto end;   /* only 1 interface route possible! */
+			if( lowest_weight > rt->rt_rmx.rmx_weight) {
+				lowest_weight = rt->rt_rmx.rmx_weight;
+				n = 1;
+			} else if( lowest_weight == rt->rt_rmx.rmx_weight)
+				n++;
+		}
+		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
+			break;
+		rn = rn->rn_dupedkey;
+		rt = (struct rtentry *)rn;
+	}
+	/* select now one of the lowest weight routes */
+	/* gw selection by Modulo-N Hash (RFC2991) */
 	hash += hashjitter;
 	hash %= n;
-	for (weight = abs((int32_t)hash), rt = ro->ro_rt;
-	     weight >= rt->rt_rmx.rmx_weight && rn; 
-	     weight -= rt->rt_rmx.rmx_weight) {
-		
-		/* stay within the multipath routes */
+	for ( rt = ro->ro_rt, rn = rn0, n = 0; rn != NULL; ) {
+		if( rt->rt_flags & RTF_UP) {
+			if ( rt->rt_rmx.rmx_weight == lowest_weight) {
+				if (n == hash)
+					break;
+				n++;
+			}
+		}
 		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
 			break;
 		rn = rn->rn_dupedkey;
@@ -300,19 +291,22 @@
 	/* XXX try filling rt_gwroute and avoid unreachable gw  */
 
 	/* gw selection has failed - there must be only zero weight routes */
-	if (!rn) {
+	if (!rn || (flags & RTF_GATEWAY)) {
 		RT_UNLOCK(ro->ro_rt);
 		ro->ro_rt = NULL;
 		return;
 	}
+
+end:		
 	if (ro->ro_rt != rt) {
 		RTFREE_LOCKED(ro->ro_rt);
 		ro->ro_rt = (struct rtentry *)rn;
 		RT_LOCK(ro->ro_rt);
 		RT_ADDREF(ro->ro_rt);
 
-	} 
-	RT_UNLOCK(ro->ro_rt);
+	}
+	if (!(flags & RTF_GATEWAY))
+		RT_UNLOCK(ro->ro_rt);
 }
 
 extern int	in6_inithead(void **head, int off);
diff -u -r sys_org/net/radix_mpath.h /router/usr/src/sys/net/radix_mpath.h
--- sys_org/net/radix_mpath.h	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/radix_mpath.h	2012-10-30 01:33:28.000000000 +0100
@@ -46,12 +46,12 @@
 struct sockaddr;
 int	rn_mpath_capable(struct radix_node_head *);
 struct radix_node *rn_mpath_next(struct radix_node *);
-u_int32_t rn_mpath_count(struct radix_node *);
 struct rtentry *rt_mpath_matchgate(struct rtentry *, struct sockaddr *);
 int rt_mpath_conflict(struct radix_node_head *, struct rtentry *,
     struct sockaddr *);
 void rtalloc_mpath_fib(struct route *, u_int32_t, u_int);
 #define rtalloc_mpath(_route, _hash) rtalloc_mpath_fib((_route), (_hash), 0)
+void rtalloc_mpath_fib_flags(struct route *, u_int32_t, u_int, int);
 struct radix_node *rn_mpath_lookup(void *, void *,
     struct radix_node_head *);
 int rt_mpath_deldup(struct rtentry *, struct rtentry *);
diff -u -r sys_org/net/route.c /router/usr/src/sys/net/route.c
--- sys_org/net/route.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/net/route.c	2012-11-08 15:24:13.000000000 +0100
@@ -904,7 +904,7 @@
 	 * Remove the item from the tree; it should be there,
 	 * but when callers invoke us blindly it may not (sigh).
 	 */
-	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
+	rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh, NULL);
 	if (rn == NULL) {
 		error = ESRCH;
 		goto bad;
@@ -942,112 +942,6 @@
 	return (error);
 }
 
-#ifdef RADIX_MPATH
-static int
-rn_mpath_update(int req, struct rt_addrinfo *info,
-    struct radix_node_head *rnh, struct rtentry **ret_nrt)
-{
-	/*
-	 * if we got multipath routes, we require users to specify
-	 * a matching RTAX_GATEWAY.
-	 */
-	struct rtentry *rt, *rto = NULL;
-	register struct radix_node *rn;
-	int error = 0;
-
-	rn = rnh->rnh_matchaddr(dst, rnh);
-	if (rn == NULL)
-		return (ESRCH);
-	rto = rt = RNTORT(rn);
-	rt = rt_mpath_matchgate(rt, gateway);
-	if (rt == NULL)
-		return (ESRCH);
-	/*
-	 * this is the first entry in the chain
-	 */
-	if (rto == rt) {
-		rn = rn_mpath_next((struct radix_node *)rt);
-		/*
-		 * there is another entry, now it's active
-		 */
-		if (rn) {
-			rto = RNTORT(rn);
-			RT_LOCK(rto);
-			rto->rt_flags |= RTF_UP;
-			RT_UNLOCK(rto);
-		} else if (rt->rt_flags & RTF_GATEWAY) {
-			/*
-			 * For gateway routes, we need to 
-			 * make sure that we we are deleting
-			 * the correct gateway. 
-			 * rt_mpath_matchgate() does not 
-			 * check the case when there is only
-			 * one route in the chain.  
-			 */
-			if (gateway &&
-			    (rt->rt_gateway->sa_len != gateway->sa_len ||
-				memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
-				error = ESRCH;
-			else {
-				/*
-				 * remove from tree before returning it
-				 * to the caller
-				 */
-				rn = rnh->rnh_deladdr(dst, netmask, rnh);
-				KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
-				goto gwdelete;
-			}
-			
-		}
-		/*
-		 * use the normal delete code to remove
-		 * the first entry
-		 */
-		if (req != RTM_DELETE) 
-			goto nondelete;
-
-		error = ENOENT;
-		goto done;
-	}
-		
-	/*
-	 * if the entry is 2nd and on up
-	 */
-	if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
-		panic ("rtrequest1: rt_mpath_deldup");
-gwdelete:
-	RT_LOCK(rt);
-	RT_ADDREF(rt);
-	if (req == RTM_DELETE) {
-		rt->rt_flags &= ~RTF_UP;
-		/*
-		 * One more rtentry floating around that is not
-		 * linked to the routing table. rttrash will be decremented
-		 * when RTFREE(rt) is eventually called.
-		 */
-		V_rttrash++;
-	}
-	
-nondelete:
-	if (req != RTM_DELETE)
-		panic("unrecognized request %d", req);
-	
-
-	/*
-	 * If the caller wants it, then it can have it,
-	 * but it's up to it to free the rtentry as we won't be
-	 * doing it.
-	 */
-	if (ret_nrt) {
-		*ret_nrt = rt;
-		RT_UNLOCK(rt);
-	} else
-		RTFREE_LOCKED(rt);
-done:
-	return (error);
-}
-#endif
-
 int
 rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
 				u_int fibnum)
@@ -1100,23 +994,26 @@
 			rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
 			dst = (struct sockaddr *)&mdst;
 		}
+		if ((rn = rnh->rnh_lookup(dst, netmask, rnh)) == NULL)
+		        senderr(ESRCH);
+                rt = RNTORT(rn);
 #ifdef RADIX_MPATH
+                /*
+                 * if we got multipath routes, we require users to specify
+                 * a matching RTAX_GATEWAY.
+                 */
 		if (rn_mpath_capable(rnh)) {
-			error = rn_mpath_update(req, info, rnh, ret_nrt);
-			/*
-			 * "bad" holds true for the success case
-			 * as well
-			 */
-			if (error != ENOENT)
-				goto bad;
-			error = 0;
+		        rt = rt_mpath_matchgate( rt, gateway);
+		        rn = (struct radix_node *)rt;
+		        if (!rt)
+		                senderr(ESRCH);
 		}
 #endif
 		/*
 		 * Remove the item from the tree and return it.
 		 * Complain if it is not there and do no more processing.
 		 */
-		rn = rnh->rnh_deladdr(dst, netmask, rnh);
+		rn = rnh->rnh_deladdr(dst, netmask, rnh, rn);
 		if (rn == NULL)
 			senderr(ESRCH);
 		if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
@@ -1212,7 +1109,7 @@
 		rt->rt_ifa = ifa;
 		rt->rt_ifp = ifa->ifa_ifp;
 		rt->rt_rmx.rmx_weight = 1;
-
+		
 #ifdef RADIX_MPATH
 		/* do not permit exactly the same dst/mask/gw pair */
 		if (rn_mpath_capable(rnh) &&
@@ -1373,7 +1270,7 @@
 	 */
 	if (rt->rt_gateway == NULL || glen > SA_SIZE(rt->rt_gateway)) {
 		caddr_t new;
-
+		
 		R_Malloc(new, caddr_t, dlen + glen);
 		if (new == NULL)
 			return ENOBUFS;
@@ -1506,9 +1403,8 @@
 			RADIX_NODE_HEAD_LOCK(rnh);
 #ifdef RADIX_MPATH
 			if (rn_mpath_capable(rnh)) {
-
-				rn = rnh->rnh_matchaddr(dst, rnh);
-				if (rn == NULL) 
+				rn = rnh->rnh_lookup(dst, netmask, rnh);
+				if (rn == NULL)
 					error = ESRCH;
 				else {
 					rt = RNTORT(rn);
@@ -1523,6 +1419,7 @@
 					    ifa->ifa_addr);
 					if (!rt) 
 						error = ESRCH;
+                                        rn = (struct radix_node *)rt;
 				}
 			}
 			else
diff -u -r sys_org/netatalk/at_rmx.c /router/usr/src/sys/netatalk/at_rmx.c
--- sys_org/netatalk/at_rmx.c	2012-11-08 15:15:09.000000000 +0100
+++ /router/usr/src/sys/netatalk/at_rmx.c	2012-10-29 16:20:11.000000000 +0100
@@ -91,10 +91,10 @@
 }
 
 static struct radix_node *
-at_delroute(void *v_arg, void *netmask_arg, struct radix_node_head *head)
+at_delroute(void *v_arg, void *netmask_arg, struct radix_node_head *head, struct radix_node *rn)
 {
 
-	return (rn_delete(v_arg, netmask_arg, head));
+	return (rn_delete(v_arg, netmask_arg, head, rn));
 }
 
 /*
diff -u -r sys_org/netinet/in.c /router/usr/src/sys/netinet/in.c
--- sys_org/netinet/in.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/in.c	2012-10-30 03:09:39.000000000 +0100
@@ -1397,13 +1397,23 @@
 in_lltable_rtcheck(struct ifnet *ifp, u_int flags, const struct sockaddr *l3addr)
 {
 	struct rtentry *rt;
+#ifdef RADIX_MPATH
+	struct route ro;
+#endif
 
 	KASSERT(l3addr->sa_family == AF_INET,
 	    ("sin_family %d", l3addr->sa_family));
 
+#ifdef RADIX_MPATH
+	/* ensure to select a interface route */
+	bzero( &ro, sizeof(ro));
+	bcopy( __DECONST(struct sockaddr *, l3addr), &ro.ro_dst, sizeof(struct sockaddr));
+	rtalloc_mpath_fib_flags( (struct route *)&ro, 0, RT_DEFAULT_FIB, RTF_GATEWAY);
+	rt = ro.ro_rt;
+#else
 	/* XXX rtalloc1 should take a const param */
 	rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
-
+#endif
 	if (rt == NULL)
 		return (EINVAL);
 
diff -u -r sys_org/netinet/ip_fastfwd.c /router/usr/src/sys/netinet/ip_fastfwd.c
--- sys_org/netinet/ip_fastfwd.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/ip_fastfwd.c	2012-11-08 15:32:49.000000000 +0100
@@ -78,6 +78,7 @@
 
 #include "opt_ipfw.h"
 #include "opt_ipstealth.h"
+#include "opt_mpath.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -113,7 +115,11 @@
     &VNET_NAME(ipfastforward_active), 0, "Enable fast IP forwarding");
 
 static struct sockaddr_in *
+#ifdef RADIX_MPATH
+ip_findroute(struct route *ro, uint32_t hash, struct in_addr dest, struct mbuf *m)
+#else
 ip_findroute(struct route *ro, struct in_addr dest, struct mbuf *m)
+#endif
 {
 	struct sockaddr_in *dst;
 	struct rtentry *rt;
@@ -126,7 +132,11 @@
 	dst->sin_family = AF_INET;
 	dst->sin_len = sizeof(*dst);
 	dst->sin_addr.s_addr = dest.s_addr;
+#ifdef RADIX_MPATH
+	rtalloc_mpath_fib(ro, hash, M_GETFIB(m));
+#else
 	in_rtalloc_ign(ro, 0, M_GETFIB(m));
+#endif
 
 	/*
 	 * Route there and interface still up?
@@ -420,7 +440,12 @@
 	/*
 	 * Find route to destination.
 	 */
+#ifdef RADIX_MPATH
+	if ((dst = ip_findroute(&ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
+	    dest, m)) == NULL)
+#else
 	if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+#endif
 		return NULL;	/* icmp unreach already sent */
 	ifp = ro.ro_rt->rt_ifp;
 
@@ -491,7 +516,13 @@
 		}
 #endif /* IPFIREWALL_FORWARD */
 		RTFREE(ro.ro_rt);
+#ifdef RADIX_MPATH
+		if ((dst = ip_findroute(&ro,
+		    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), dest, m))
+		    == NULL)
+#else
 		if ((dst = ip_findroute(&ro, dest, m)) == NULL)
+#endif
 			return NULL;	/* icmp unreach already sent */
 		ifp = ro.ro_rt->rt_ifp;
 	}
diff -u -r sys_org/netinet/ipfw/ip_fw_table.c /router/usr/src/sys/netinet/ipfw/ip_fw_table.c
--- sys_org/netinet/ipfw/ip_fw_table.c	2012-11-08 15:15:11.000000000 +0100
+++ /router/usr/src/sys/netinet/ipfw/ip_fw_table.c	2012-10-29 16:07:26.000000000 +0100
@@ -379,7 +379,7 @@
 		return (EINVAL);
 	}
 
-	ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh);
+	ent = (struct table_entry *)rnh->rnh_deladdr(sa_ptr, mask_ptr, rnh, NULL);
 	IPFW_WUNLOCK(ch);
 
 	if (ent == NULL)
@@ -396,7 +396,7 @@
 	struct table_entry *ent;
 
 	ent = (struct table_entry *)
-	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh);
+	    rnh->rnh_deladdr(rn->rn_key, rn->rn_mask, rnh, NULL);
 	if (ent != NULL)
 		free(ent, M_IPFW_TBL);
 	return (0);


>Release-Note:
>Audit-Trail:
>Unformatted:



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211081647.qA8GlaZU099271>