Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 27 Nov 2012 20:16:37 +0000 (UTC)
From:      "Alexander V. Chernikov" <melifaro@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org
Subject:   svn commit: r243629 - in stable/8/sys: netinet netinet6
Message-ID:  <201211272016.qARKGbQH049649@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: melifaro
Date: Tue Nov 27 20:16:37 2012
New Revision: 243629
URL: http://svnweb.freebsd.org/changeset/base/243629

Log:
  MFC r241406, r241502, r241884.
  
  Do not check if found IPv4 rte is dynamic if net.inet.icmp.drop_redirect is
  enabled. This eliminates one mtx_lock() per each routing lookup thus improving
  performance in several cases (routing to directly connected interface or routing
  to default gateway).
  
  Icmp redirects should not be used to provide routing direction nowadays, even
  for end hosts. Routers should not use them too (and this is explicitly restricted
  in IPv6, see RFC 4861, clause 8.2).
  
  Current commit changes rnh_machaddr function to 'stock' rn_match (and back) for every
  AF_INET routing table in given VNET instance on drop_redirect sysctl change.
  
  Eliminate code checking if found IPv6 rte is dynamic. IPv6 redirects
  are using (different) ND-based approach described in RFC 4861. This change
  is similar to r241406 which conditionally skips the same check in IPv4.
  
  Cleanup documentation: cloning route support has been removed in r186119.
  
  This change is part of bigger patch eliminating rte locking.
  
  Sponsored by:	Yandex LLC

Modified:
  stable/8/sys/netinet/in_rmx.c
  stable/8/sys/netinet/in_var.h
  stable/8/sys/netinet/ip_icmp.c
  stable/8/sys/netinet/ip_var.h
  stable/8/sys/netinet6/in6_rmx.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/netinet/   (props changed)
  stable/8/sys/netinet6/   (props changed)

Modified: stable/8/sys/netinet/in_rmx.c
==============================================================================
--- stable/8/sys/netinet/in_rmx.c	Tue Nov 27 20:16:01 2012	(r243628)
+++ stable/8/sys/netinet/in_rmx.c	Tue Nov 27 20:16:37 2012	(r243629)
@@ -27,19 +27,6 @@
  * SUCH DAMAGE.
  */
 
-/*
- * This code does two things necessary for the enhanced TCP metrics to
- * function in a useful manner:
- *  1) It marks all non-host routes as `cloning', thus ensuring that
- *     every actual reference to such a route actually gets turned
- *     into a reference to a host route to the specific destination
- *     requested.
- *  2) When such routes lose all their references, it arranges for them
- *     to be deleted in some random collection of circumstances, so that
- *     a large quantity of stale routing data is not kept in kernel memory
- *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
- */
-
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -58,6 +45,8 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 
 extern int	in_inithead(void **head, int off);
@@ -340,6 +329,13 @@ in_rtqdrain(void)
 	VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
+void
+in_setmatchfunc(struct radix_node_head *rnh, int val)
+{
+
+	rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute;
+}
+
 static int _in_rt_was_here;
 /*
  * Initialize our routing tree.
@@ -365,7 +361,7 @@ in_inithead(void **head, int off)
 
 	rnh = *head;
 	rnh->rnh_addaddr = in_addroute;
-	rnh->rnh_matchaddr = in_matroute;
+	in_setmatchfunc(rnh, V_drop_redirect);
 	rnh->rnh_close = in_clsroute;
 	if (_in_rt_was_here == 0 ) {
 		callout_init(&V_rtq_timer, CALLOUT_MPSAFE);

Modified: stable/8/sys/netinet/in_var.h
==============================================================================
--- stable/8/sys/netinet/in_var.h	Tue Nov 27 20:16:01 2012	(r243628)
+++ stable/8/sys/netinet/in_var.h	Tue Nov 27 20:16:37 2012	(r243629)
@@ -423,6 +423,7 @@ inm_acquire_locked(struct in_multi *inm)
 struct	rtentry;
 struct	route;
 struct	ip_moptions;
+struct radix_node_head;
 
 int	imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
 	    const struct sockaddr *, const struct sockaddr *);
@@ -461,6 +462,7 @@ void	 in_rtredirect(struct sockaddr *, s
 	    struct sockaddr *, int, struct sockaddr *, u_int);
 int	 in_rtrequest(int, struct sockaddr *,
 	    struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int);
+void	in_setmatchfunc(struct radix_node_head *, int);
 
 #if 0
 int	 in_rt_getifa(struct rt_addrinfo *, u_int fibnum);

Modified: stable/8/sys/netinet/ip_icmp.c
==============================================================================
--- stable/8/sys/netinet/ip_icmp.c	Tue Nov 27 20:16:01 2012	(r243628)
+++ stable/8/sys/netinet/ip_icmp.c	Tue Nov 27 20:16:37 2012	(r243629)
@@ -92,11 +92,7 @@ SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUT
 	&VNET_NAME(icmpmaskfake), 0,
 	"Fake reply to ICMP Address Mask Request packets.");
 
-static VNET_DEFINE(int, drop_redirect) = 0;
-#define	V_drop_redirect			VNET(drop_redirect)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
-	&VNET_NAME(drop_redirect), 0,
-	"Ignore ICMP redirects");
+VNET_DEFINE(int, drop_redirect) = 0;
 
 static VNET_DEFINE(int, log_redirect) = 0;
 #define	V_log_redirect			VNET(log_redirect)
@@ -153,6 +149,39 @@ static void	icmp_send(struct mbuf *, str
 
 extern	struct protosw inetsw[];
 
+static int
+sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS)
+{
+	int error, new;
+	int i;
+	struct radix_node_head *rnh;
+
+	new = V_drop_redirect;
+	error = sysctl_handle_int(oidp, &new, 0, req);
+	if (error == 0 && req->newptr) {
+		new = (new != 0) ? 1 : 0;
+
+		if (new == V_drop_redirect)
+			return (0);
+
+		for (i = 0; i < rt_numfibs; i++) {
+			if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL)
+				continue;
+			RADIX_NODE_HEAD_LOCK(rnh);
+			in_setmatchfunc(rnh, new);
+			RADIX_NODE_HEAD_UNLOCK(rnh);
+		}
+		
+		V_drop_redirect = new;
+	}
+
+	return (error);
+}
+
+SYSCTL_VNET_PROC(_net_inet_icmp, OID_AUTO, drop_redirect,
+    CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+    sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects");
+
 /*
  * Kernel module interface for updating icmpstat.  The argument is an index
  * into icmpstat treated as an array of u_long.  While this encodes the

Modified: stable/8/sys/netinet/ip_var.h
==============================================================================
--- stable/8/sys/netinet/ip_var.h	Tue Nov 27 20:16:01 2012	(r243628)
+++ stable/8/sys/netinet/ip_var.h	Tue Nov 27 20:16:37 2012	(r243629)
@@ -187,6 +187,7 @@ VNET_DECLARE(struct socket *, ip_mrouter
 extern int	(*legal_vif_num)(int);
 extern u_long	(*ip_mcast_src)(int);
 VNET_DECLARE(int, rsvp_on);
+VNET_DECLARE(int, drop_redirect);
 extern struct	pr_usrreqs rip_usrreqs;
 
 #define	V_ipstat		VNET(ipstat)
@@ -199,6 +200,7 @@ extern struct	pr_usrreqs rip_usrreqs;
 #define	V_ip_rsvpd		VNET(ip_rsvpd)
 #define	V_ip_mrouter		VNET(ip_mrouter)
 #define	V_rsvp_on		VNET(rsvp_on)
+#define	V_drop_redirect		VNET(drop_redirect)
 
 void	inp_freemoptions(struct ip_moptions *);
 int	inp_getmoptions(struct inpcb *, struct sockopt *);

Modified: stable/8/sys/netinet6/in6_rmx.c
==============================================================================
--- stable/8/sys/netinet6/in6_rmx.c	Tue Nov 27 20:16:01 2012	(r243628)
+++ stable/8/sys/netinet6/in6_rmx.c	Tue Nov 27 20:16:37 2012	(r243629)
@@ -59,19 +59,6 @@
  *
  */
 
-/*
- * This code does two things necessary for the enhanced TCP metrics to
- * function in a useful manner:
- *  1) It marks all non-host routes as `cloning', thus ensuring that
- *     every actual reference to such a route actually gets turned
- *     into a reference to a host route to the specific destination
- *     requested.
- *  2) When such routes lose all their references, it arranges for them
- *     to be deleted in some random collection of circumstances, so that
- *     a large quantity of stale routing data is not kept in kernel memory
- *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
- */
-
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -111,8 +98,6 @@ extern int	in6_inithead(void **head, int
 extern int	in6_detachhead(void **head, int off);
 #endif
 
-#define RTPRF_OURS		RTF_PROTO3	/* set on routes we manage */
-
 /*
  * Do what we need to do when inserting a route.
  */
@@ -183,42 +168,8 @@ in6_addroute(void *v_arg, void *n_arg, s
 	return (ret);
 }
 
-/*
- * This code is the inverse of in6_clsroute: on first reference, if we
- * were managing the route, stop doing so and set the expiration timer
- * back off again.
- */
-static struct radix_node *
-in6_matroute(void *v_arg, struct radix_node_head *head)
-{
-	struct radix_node *rn = rn_match(v_arg, head);
-	struct rtentry *rt = (struct rtentry *)rn;
-
-	if (rt) {
-		RT_LOCK(rt);
-		if (rt->rt_flags & RTPRF_OURS) {
-			rt->rt_flags &= ~RTPRF_OURS;
-			rt->rt_rmx.rmx_expire = 0;
-		}
-		RT_UNLOCK(rt);
-	}
-	return rn;
-}
-
 SYSCTL_DECL(_net_inet6_ip6);
 
-static VNET_DEFINE(int, rtq_reallyold6) = 60*60;
-	/* one hour is ``really old'' */
-#define	V_rtq_reallyold6		VNET(rtq_reallyold6)
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
-    &VNET_NAME(rtq_reallyold6) , 0, "");
-
-static VNET_DEFINE(int, rtq_minreallyold6) = 10;
-	/* never automatically crank down to less */
-#define	V_rtq_minreallyold6		VNET(rtq_minreallyold6)
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
-    &VNET_NAME(rtq_minreallyold6) , 0, "");
-
 static VNET_DEFINE(int, rtq_toomany6) = 128;
 	/* 128 cached routes is ``too many'' */
 #define	V_rtq_toomany6			VNET(rtq_toomany6)
@@ -236,122 +187,6 @@ struct rtqk_arg {
 };
 
 /*
- * Get rid of old routes.  When draining, this deletes everything, even when
- * the timeout is not expired yet.  When updating, this makes sure that
- * nothing has a timeout longer than the current value of rtq_reallyold6.
- */
-static int
-in6_rtqkill(struct radix_node *rn, void *rock)
-{
-	struct rtqk_arg *ap = rock;
-	struct rtentry *rt = (struct rtentry *)rn;
-	int err;
-
-	RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh);
-
-	if (rt->rt_flags & RTPRF_OURS) {
-		ap->found++;
-
-		if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
-			if (rt->rt_refcnt > 0)
-				panic("rtqkill route really not free");
-
-			err = in6_rtrequest(RTM_DELETE,
-					(struct sockaddr *)rt_key(rt),
-					rt->rt_gateway, rt_mask(rt),
-					rt->rt_flags|RTF_RNH_LOCKED, 0,
-					rt->rt_fibnum);
-			if (err) {
-				log(LOG_WARNING, "in6_rtqkill: error %d", err);
-			} else {
-				ap->killed++;
-			}
-		} else {
-			if (ap->updating
-			   && (rt->rt_rmx.rmx_expire - time_uptime
-			       > V_rtq_reallyold6)) {
-				rt->rt_rmx.rmx_expire = time_uptime
-					+ V_rtq_reallyold6;
-			}
-			ap->nextstop = lmin(ap->nextstop,
-					    rt->rt_rmx.rmx_expire);
-		}
-	}
-
-	return 0;
-}
-
-#define RTQ_TIMEOUT	60*10	/* run no less than once every ten minutes */
-static VNET_DEFINE(int, rtq_timeout6) = RTQ_TIMEOUT;
-static VNET_DEFINE(struct callout, rtq_timer6);
-
-#define	V_rtq_timeout6			VNET(rtq_timeout6)
-#define	V_rtq_timer6			VNET(rtq_timer6)
-
-static void
-in6_rtqtimo_one(struct radix_node_head *rnh)
-{
-	struct rtqk_arg arg;
-	static time_t last_adjusted_timeout = 0;
-
-	arg.found = arg.killed = 0;
-	arg.rnh = rnh;
-	arg.nextstop = time_uptime + V_rtq_timeout6;
-	arg.draining = arg.updating = 0;
-	RADIX_NODE_HEAD_LOCK(rnh);
-	rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
-	RADIX_NODE_HEAD_UNLOCK(rnh);
-
-	/*
-	 * Attempt to be somewhat dynamic about this:
-	 * If there are ``too many'' routes sitting around taking up space,
-	 * then crank down the timeout, and see if we can't make some more
-	 * go away.  However, we make sure that we will never adjust more
-	 * than once in rtq_timeout6 seconds, to keep from cranking down too
-	 * hard.
-	 */
-	if ((arg.found - arg.killed > V_rtq_toomany6)
-	   && (time_uptime - last_adjusted_timeout >= V_rtq_timeout6)
-	   && V_rtq_reallyold6 > V_rtq_minreallyold6) {
-		V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3;
-		if (V_rtq_reallyold6 < V_rtq_minreallyold6) {
-			V_rtq_reallyold6 = V_rtq_minreallyold6;
-		}
-
-		last_adjusted_timeout = time_uptime;
-#ifdef DIAGNOSTIC
-		log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d",
-		    V_rtq_reallyold6);
-#endif
-		arg.found = arg.killed = 0;
-		arg.updating = 1;
-		RADIX_NODE_HEAD_LOCK(rnh);
-		rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
-		RADIX_NODE_HEAD_UNLOCK(rnh);
-	}
-}
-
-static void
-in6_rtqtimo(void *rock)
-{
-	CURVNET_SET_QUIET((struct vnet *) rock);
-	struct radix_node_head *rnh;
-	struct timeval atv;
-	u_int fibnum;
-
-	for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
-		rnh = rt_tables_get_rnh(fibnum, AF_INET6);
-		if (rnh != NULL)
-			in6_rtqtimo_one(rnh);
-	}
-
-	atv.tv_usec = 0;
-	atv.tv_sec = V_rtq_timeout6;
-	callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock);
-	CURVNET_RESTORE();
-}
-
-/*
  * Age old PMTUs.
  */
 struct mtuex_arg {
@@ -440,12 +275,9 @@ in6_inithead(void **head, int off)
 
 	rnh = *head;
 	rnh->rnh_addaddr = in6_addroute;
-	rnh->rnh_matchaddr = in6_matroute;
 
 	if (V__in6_rt_was_here == 0) {
-		callout_init(&V_rtq_timer6, CALLOUT_MPSAFE);
 		callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
-		in6_rtqtimo(curvnet);	/* kick off timeout first time */
 		in6_mtutimo(curvnet);	/* kick off timeout first time */
 		V__in6_rt_was_here = 1;
 	}
@@ -458,7 +290,6 @@ int
 in6_detachhead(void **head, int off)
 {
 
-	callout_drain(&V_rtq_timer6);
 	callout_drain(&V_rtq_mtutimer);
 	return (1);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211272016.qARKGbQH049649>