Date: Tue, 27 Nov 2012 20:16:37 +0000 (UTC) From: "Alexander V. Chernikov" <melifaro@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-8@freebsd.org Subject: svn commit: r243629 - in stable/8/sys: netinet netinet6 Message-ID: <201211272016.qARKGbQH049649@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: melifaro Date: Tue Nov 27 20:16:37 2012 New Revision: 243629 URL: http://svnweb.freebsd.org/changeset/base/243629 Log: MFC r241406, r241502, r241884. Do not check if found IPv4 rte is dynamic if net.inet.icmp.drop_redirect is enabled. This eliminates one mtx_lock() per each routing lookup thus improving performance in several cases (routing to directly connected interface or routing to default gateway). Icmp redirects should not be used to provide routing direction nowadays, even for end hosts. Routers should not use them too (and this is explicitly restricted in IPv6, see RFC 4861, clause 8.2). Current commit changes rnh_machaddr function to 'stock' rn_match (and back) for every AF_INET routing table in given VNET instance on drop_redirect sysctl change. Eliminate code checking if found IPv6 rte is dynamic. IPv6 redirects are using (different) ND-based approach described in RFC 4861. This change is similar to r241406 which conditionally skips the same check in IPv4. Cleanup documentation: cloning route support has been removed in r186119. This change is part of bigger patch eliminating rte locking. Sponsored by: Yandex LLC Modified: stable/8/sys/netinet/in_rmx.c stable/8/sys/netinet/in_var.h stable/8/sys/netinet/ip_icmp.c stable/8/sys/netinet/ip_var.h stable/8/sys/netinet6/in6_rmx.c Directory Properties: stable/8/sys/ (props changed) stable/8/sys/netinet/ (props changed) stable/8/sys/netinet6/ (props changed) Modified: stable/8/sys/netinet/in_rmx.c ============================================================================== --- stable/8/sys/netinet/in_rmx.c Tue Nov 27 20:16:01 2012 (r243628) +++ stable/8/sys/netinet/in_rmx.c Tue Nov 27 20:16:37 2012 (r243629) @@ -27,19 +27,6 @@ * SUCH DAMAGE. */ -/* - * This code does two things necessary for the enhanced TCP metrics to - * function in a useful manner: - * 1) It marks all non-host routes as `cloning', thus ensuring that - * every actual reference to such a route actually gets turned - * into a reference to a host route to the specific destination - * requested. - * 2) When such routes lose all their references, it arranges for them - * to be deleted in some random collection of circumstances, so that - * a large quantity of stale routing data is not kept in kernel memory - * indefinitely. See in_rtqtimo() below for the exact mechanism. - */ - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -58,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include <netinet/in.h> #include <netinet/in_var.h> +#include <netinet/ip.h> +#include <netinet/ip_icmp.h> #include <netinet/ip_var.h> extern int in_inithead(void **head, int off); @@ -340,6 +329,13 @@ in_rtqdrain(void) VNET_LIST_RUNLOCK_NOSLEEP(); } +void +in_setmatchfunc(struct radix_node_head *rnh, int val) +{ + + rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute; +} + static int _in_rt_was_here; /* * Initialize our routing tree. @@ -365,7 +361,7 @@ in_inithead(void **head, int off) rnh = *head; rnh->rnh_addaddr = in_addroute; - rnh->rnh_matchaddr = in_matroute; + in_setmatchfunc(rnh, V_drop_redirect); rnh->rnh_close = in_clsroute; if (_in_rt_was_here == 0 ) { callout_init(&V_rtq_timer, CALLOUT_MPSAFE); Modified: stable/8/sys/netinet/in_var.h ============================================================================== --- stable/8/sys/netinet/in_var.h Tue Nov 27 20:16:01 2012 (r243628) +++ stable/8/sys/netinet/in_var.h Tue Nov 27 20:16:37 2012 (r243629) @@ -423,6 +423,7 @@ inm_acquire_locked(struct in_multi *inm) struct rtentry; struct route; struct ip_moptions; +struct radix_node_head; int imo_multi_filter(const struct ip_moptions *, const struct ifnet *, const struct sockaddr *, const struct sockaddr *); @@ -461,6 +462,7 @@ void in_rtredirect(struct sockaddr *, s struct sockaddr *, int, struct sockaddr *, u_int); int in_rtrequest(int, struct sockaddr *, struct sockaddr *, struct sockaddr *, int, struct rtentry **, u_int); +void in_setmatchfunc(struct radix_node_head *, int); #if 0 int in_rt_getifa(struct rt_addrinfo *, u_int fibnum); Modified: stable/8/sys/netinet/ip_icmp.c ============================================================================== --- stable/8/sys/netinet/ip_icmp.c Tue Nov 27 20:16:01 2012 (r243628) +++ stable/8/sys/netinet/ip_icmp.c Tue Nov 27 20:16:37 2012 (r243629) @@ -92,11 +92,7 @@ SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUT &VNET_NAME(icmpmaskfake), 0, "Fake reply to ICMP Address Mask Request packets."); -static VNET_DEFINE(int, drop_redirect) = 0; -#define V_drop_redirect VNET(drop_redirect) -SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW, - &VNET_NAME(drop_redirect), 0, - "Ignore ICMP redirects"); +VNET_DEFINE(int, drop_redirect) = 0; static VNET_DEFINE(int, log_redirect) = 0; #define V_log_redirect VNET(log_redirect) @@ -153,6 +149,39 @@ static void icmp_send(struct mbuf *, str extern struct protosw inetsw[]; +static int +sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS) +{ + int error, new; + int i; + struct radix_node_head *rnh; + + new = V_drop_redirect; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + new = (new != 0) ? 1 : 0; + + if (new == V_drop_redirect) + return (0); + + for (i = 0; i < rt_numfibs; i++) { + if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL) + continue; + RADIX_NODE_HEAD_LOCK(rnh); + in_setmatchfunc(rnh, new); + RADIX_NODE_HEAD_UNLOCK(rnh); + } + + V_drop_redirect = new; + } + + return (error); +} + +SYSCTL_VNET_PROC(_net_inet_icmp, OID_AUTO, drop_redirect, + CTLTYPE_INT|CTLFLAG_RW, 0, 0, + sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects"); + /* * Kernel module interface for updating icmpstat. The argument is an index * into icmpstat treated as an array of u_long. While this encodes the Modified: stable/8/sys/netinet/ip_var.h ============================================================================== --- stable/8/sys/netinet/ip_var.h Tue Nov 27 20:16:01 2012 (r243628) +++ stable/8/sys/netinet/ip_var.h Tue Nov 27 20:16:37 2012 (r243629) @@ -187,6 +187,7 @@ VNET_DECLARE(struct socket *, ip_mrouter extern int (*legal_vif_num)(int); extern u_long (*ip_mcast_src)(int); VNET_DECLARE(int, rsvp_on); +VNET_DECLARE(int, drop_redirect); extern struct pr_usrreqs rip_usrreqs; #define V_ipstat VNET(ipstat) @@ -199,6 +200,7 @@ extern struct pr_usrreqs rip_usrreqs; #define V_ip_rsvpd VNET(ip_rsvpd) #define V_ip_mrouter VNET(ip_mrouter) #define V_rsvp_on VNET(rsvp_on) +#define V_drop_redirect VNET(drop_redirect) void inp_freemoptions(struct ip_moptions *); int inp_getmoptions(struct inpcb *, struct sockopt *); Modified: stable/8/sys/netinet6/in6_rmx.c ============================================================================== --- stable/8/sys/netinet6/in6_rmx.c Tue Nov 27 20:16:01 2012 (r243628) +++ stable/8/sys/netinet6/in6_rmx.c Tue Nov 27 20:16:37 2012 (r243629) @@ -59,19 +59,6 @@ * */ -/* - * This code does two things necessary for the enhanced TCP metrics to - * function in a useful manner: - * 1) It marks all non-host routes as `cloning', thus ensuring that - * every actual reference to such a route actually gets turned - * into a reference to a host route to the specific destination - * requested. - * 2) When such routes lose all their references, it arranges for them - * to be deleted in some random collection of circumstances, so that - * a large quantity of stale routing data is not kept in kernel memory - * indefinitely. See in6_rtqtimo() below for the exact mechanism. - */ - #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); @@ -111,8 +98,6 @@ extern int in6_inithead(void **head, int extern int in6_detachhead(void **head, int off); #endif -#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */ - /* * Do what we need to do when inserting a route. */ @@ -183,42 +168,8 @@ in6_addroute(void *v_arg, void *n_arg, s return (ret); } -/* - * This code is the inverse of in6_clsroute: on first reference, if we - * were managing the route, stop doing so and set the expiration timer - * back off again. - */ -static struct radix_node * -in6_matroute(void *v_arg, struct radix_node_head *head) -{ - struct radix_node *rn = rn_match(v_arg, head); - struct rtentry *rt = (struct rtentry *)rn; - - if (rt) { - RT_LOCK(rt); - if (rt->rt_flags & RTPRF_OURS) { - rt->rt_flags &= ~RTPRF_OURS; - rt->rt_rmx.rmx_expire = 0; - } - RT_UNLOCK(rt); - } - return rn; -} - SYSCTL_DECL(_net_inet6_ip6); -static VNET_DEFINE(int, rtq_reallyold6) = 60*60; - /* one hour is ``really old'' */ -#define V_rtq_reallyold6 VNET(rtq_reallyold6) -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW, - &VNET_NAME(rtq_reallyold6) , 0, ""); - -static VNET_DEFINE(int, rtq_minreallyold6) = 10; - /* never automatically crank down to less */ -#define V_rtq_minreallyold6 VNET(rtq_minreallyold6) -SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW, - &VNET_NAME(rtq_minreallyold6) , 0, ""); - static VNET_DEFINE(int, rtq_toomany6) = 128; /* 128 cached routes is ``too many'' */ #define V_rtq_toomany6 VNET(rtq_toomany6) @@ -236,122 +187,6 @@ struct rtqk_arg { }; /* - * Get rid of old routes. When draining, this deletes everything, even when - * the timeout is not expired yet. When updating, this makes sure that - * nothing has a timeout longer than the current value of rtq_reallyold6. - */ -static int -in6_rtqkill(struct radix_node *rn, void *rock) -{ - struct rtqk_arg *ap = rock; - struct rtentry *rt = (struct rtentry *)rn; - int err; - - RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh); - - if (rt->rt_flags & RTPRF_OURS) { - ap->found++; - - if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) { - if (rt->rt_refcnt > 0) - panic("rtqkill route really not free"); - - err = in6_rtrequest(RTM_DELETE, - (struct sockaddr *)rt_key(rt), - rt->rt_gateway, rt_mask(rt), - rt->rt_flags|RTF_RNH_LOCKED, 0, - rt->rt_fibnum); - if (err) { - log(LOG_WARNING, "in6_rtqkill: error %d", err); - } else { - ap->killed++; - } - } else { - if (ap->updating - && (rt->rt_rmx.rmx_expire - time_uptime - > V_rtq_reallyold6)) { - rt->rt_rmx.rmx_expire = time_uptime - + V_rtq_reallyold6; - } - ap->nextstop = lmin(ap->nextstop, - rt->rt_rmx.rmx_expire); - } - } - - return 0; -} - -#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */ -static VNET_DEFINE(int, rtq_timeout6) = RTQ_TIMEOUT; -static VNET_DEFINE(struct callout, rtq_timer6); - -#define V_rtq_timeout6 VNET(rtq_timeout6) -#define V_rtq_timer6 VNET(rtq_timer6) - -static void -in6_rtqtimo_one(struct radix_node_head *rnh) -{ - struct rtqk_arg arg; - static time_t last_adjusted_timeout = 0; - - arg.found = arg.killed = 0; - arg.rnh = rnh; - arg.nextstop = time_uptime + V_rtq_timeout6; - arg.draining = arg.updating = 0; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - - /* - * Attempt to be somewhat dynamic about this: - * If there are ``too many'' routes sitting around taking up space, - * then crank down the timeout, and see if we can't make some more - * go away. However, we make sure that we will never adjust more - * than once in rtq_timeout6 seconds, to keep from cranking down too - * hard. - */ - if ((arg.found - arg.killed > V_rtq_toomany6) - && (time_uptime - last_adjusted_timeout >= V_rtq_timeout6) - && V_rtq_reallyold6 > V_rtq_minreallyold6) { - V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3; - if (V_rtq_reallyold6 < V_rtq_minreallyold6) { - V_rtq_reallyold6 = V_rtq_minreallyold6; - } - - last_adjusted_timeout = time_uptime; -#ifdef DIAGNOSTIC - log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d", - V_rtq_reallyold6); -#endif - arg.found = arg.killed = 0; - arg.updating = 1; - RADIX_NODE_HEAD_LOCK(rnh); - rnh->rnh_walktree(rnh, in6_rtqkill, &arg); - RADIX_NODE_HEAD_UNLOCK(rnh); - } -} - -static void -in6_rtqtimo(void *rock) -{ - CURVNET_SET_QUIET((struct vnet *) rock); - struct radix_node_head *rnh; - struct timeval atv; - u_int fibnum; - - for (fibnum = 0; fibnum < rt_numfibs; fibnum++) { - rnh = rt_tables_get_rnh(fibnum, AF_INET6); - if (rnh != NULL) - in6_rtqtimo_one(rnh); - } - - atv.tv_usec = 0; - atv.tv_sec = V_rtq_timeout6; - callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock); - CURVNET_RESTORE(); -} - -/* * Age old PMTUs. */ struct mtuex_arg { @@ -440,12 +275,9 @@ in6_inithead(void **head, int off) rnh = *head; rnh->rnh_addaddr = in6_addroute; - rnh->rnh_matchaddr = in6_matroute; if (V__in6_rt_was_here == 0) { - callout_init(&V_rtq_timer6, CALLOUT_MPSAFE); callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE); - in6_rtqtimo(curvnet); /* kick off timeout first time */ in6_mtutimo(curvnet); /* kick off timeout first time */ V__in6_rt_was_here = 1; } @@ -458,7 +290,6 @@ int in6_detachhead(void **head, int off) { - callout_drain(&V_rtq_timer6); callout_drain(&V_rtq_mtutimer); return (1); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201211272016.qARKGbQH049649>