Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 24 Mar 2016 07:54:56 +0000 (UTC)
From:      "George V. Neville-Neil" <gnn@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r297225 - in head/sys: net netinet netinet6
Message-ID:  <201603240754.u2O7suvM006925@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: gnn
Date: Thu Mar 24 07:54:56 2016
New Revision: 297225
URL: https://svnweb.freebsd.org/changeset/base/297225

Log:
  FreeBSD previously provided route caching for TCP (and UDP). Re-add
  route caching for TCP, with some improvements. In particular, invalidate
  the route cache if a new route is added, which might be a better match.
  The cache is automatically invalidated if the old route is deleted.
  
  Submitted by:	Mike Karels
  Reviewed by:	gnn
  Differential Revision:	https://reviews.freebsd.org/D4306

Modified:
  head/sys/net/route.c
  head/sys/net/route.h
  head/sys/net/route_var.h
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_pcb.h
  head/sys/netinet/ip_output.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet6/in6_pcb.c
  head/sys/netinet6/ip6_output.c
  head/sys/netinet6/udp6_usrreq.c

Modified: head/sys/net/route.c
==============================================================================
--- head/sys/net/route.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/net/route.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -201,6 +201,16 @@ rt_tables_get_rnh(int table, int fam)
 	return (*rt_tables_get_rnh_ptr(table, fam));
 }
 
+rt_gen_t
+rt_tables_get_gen(int table, int fam)
+{
+	struct rib_head *rnh;
+
+	rnh = *rt_tables_get_rnh_ptr(table, fam);
+	return (rnh->rnh_gen);
+}
+
+
 /*
  * route initialization must occur before ip6_init2(), which happenas at
  * SI_ORDER_MIDDLE.
@@ -1754,6 +1764,7 @@ rtrequest1_fib(int req, struct rt_addrin
 			*ret_nrt = rt;
 			RT_ADDREF(rt);
 		}
+		rnh->rnh_gen++;		/* Routing table updated */
 		RT_UNLOCK(rt);
 		break;
 	case RTM_CHANGE:

Modified: head/sys/net/route.h
==============================================================================
--- head/sys/net/route.h	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/net/route.h	Thu Mar 24 07:54:56 2016	(r297225)
@@ -98,6 +98,14 @@ struct rt_metrics {
 /* lle state is exported in rmx_state rt_metrics field */
 #define	rmx_state	rmx_weight
 
+/*
+ * Keep a generation count of routing table, incremented on route addition,
+ * so we can invalidate caches.  This is accessed without a lock, as precision
+ * is not required.
+ */
+typedef volatile u_int rt_gen_t;	/* tree generation (for adds) */
+#define RT_GEN(fibnum, af)	rt_tables_get_gen(fibnum, af)
+
 #define	RT_DEFAULT_FIB	0	/* Explicitly mark fib=0 restricted cases */
 #define	RT_ALL_FIBS	-1	/* Announce event for every fib */
 #ifdef _KERNEL
@@ -398,6 +406,20 @@ struct rt_addrinfo {
 	}							\
 } while (0)
 
+/*
+ * Validate a cached route based on a supplied cookie.  If there is an
+ * out-of-date cache, simply free it.  Update the generation number
+ * for the new allocation
+ */
+#define RT_VALIDATE(ro, cookiep, fibnum) do {				\
+	rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family);	\
+	if (*(cookiep) != cookie && (ro)->ro_rt != NULL) {		\
+		RTFREE((ro)->ro_rt);					\
+		(ro)->ro_rt = NULL;					\
+		*(cookiep) = cookie;					\
+	}								\
+} while (0)
+
 struct ifmultiaddr;
 struct rib_head;
 
@@ -415,6 +437,7 @@ int	 rt_setgate(struct rtentry *, struct
 void 	 rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
 struct rib_head *rt_table_init(int);
 void	rt_table_destroy(struct rib_head *);
+rt_gen_t rt_tables_get_gen(int table, int fam);
 
 int	rtsock_addrmsg(int, struct ifaddr *, int);
 int	rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int);

Modified: head/sys/net/route_var.h
==============================================================================
--- head/sys/net/route_var.h	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/net/route_var.h	Thu Mar 24 07:54:56 2016	(r297225)
@@ -41,7 +41,7 @@ struct rib_head {
 	rn_walktree_t		*rnh_walktree;	/* traverse tree */
 	rn_walktree_from_t	*rnh_walktree_from; /* traverse tree below a */
 	rn_close_t		*rnh_close;	/*do something when the last ref drops*/
-	u_int			rnh_gen;	/* generation counter */
+	rt_gen_t		rnh_gen;	/* generation counter */
 	int			rnh_multipath;	/* multipath capable ? */
 	struct radix_node	rnh_nodes[3];	/* empty tree for common case */
 	struct rwlock		rib_lock;	/* config/data path lock */

Modified: head/sys/netinet/in_pcb.c
==============================================================================
--- head/sys/netinet/in_pcb.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/in_pcb.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -1298,6 +1298,11 @@ in_pcbfree(struct inpcb *inp)
 	if (inp->inp_moptions != NULL)
 		inp_freemoptions(inp->inp_moptions);
 #endif
+	if (inp->inp_route.ro_rt) {
+		RTFREE(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = (struct rtentry *)NULL;
+	}
+
 	inp->inp_vflag = 0;
 	inp->inp_flags2 |= INP_FREED;
 	crfree(inp->inp_cred);
@@ -2225,6 +2230,23 @@ in_pcbremlists(struct inpcb *inp)
 }
 
 /*
+ * Check for alternatives when higher level complains
+ * about service problems.  For now, invalidate cached
+ * routing information.  If the route was created dynamically
+ * (by a redirect), time to try a default gateway again.
+ */
+void
+in_losing(struct inpcb *inp)
+{
+
+	if (inp->inp_route.ro_rt) {
+		RTFREE(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = (struct rtentry *)NULL;
+	}
+	return;
+}
+
+/*
  * A set label operation has occurred at the socket layer, propagate the
  * label change into the in_pcb for the socket.
  */

Modified: head/sys/netinet/in_pcb.h
==============================================================================
--- head/sys/netinet/in_pcb.h	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/in_pcb.h	Thu Mar 24 07:54:56 2016	(r297225)
@@ -42,6 +42,7 @@
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/_rwlock.h>
+#include <net/route.h>
 
 #ifdef _KERNEL
 #include <sys/lock.h>
@@ -238,8 +239,14 @@ struct inpcb {
 #define inp_zero_size offsetof(struct inpcb, inp_gencnt)
 	inp_gen_t	inp_gencnt;	/* (c) generation count */
 	struct llentry	*inp_lle;	/* cached L2 information */
-	struct rtentry	*inp_rt;	/* cached L3 information */
 	struct rwlock	inp_lock;
+	rt_gen_t	inp_rt_cookie;	/* generation for route entry */
+	union {				/* cached L3 information */
+		struct route inpu_route;
+		struct route_in6 inpu_route6;
+	} inp_rtu;
+#define inp_route inp_rtu.inpu_route
+#define inp_route6 inp_rtu.inpu_route6
 };
 #define	inp_fport	inp_inc.inc_fport
 #define	inp_lport	inp_inc.inc_lport
@@ -722,6 +729,7 @@ void	in_pcbrehash_mbuf(struct inpcb *, s
 int	in_pcbrele(struct inpcb *);
 int	in_pcbrele_rlocked(struct inpcb *);
 int	in_pcbrele_wlocked(struct inpcb *);
+void	in_losing(struct inpcb *);
 void	in_pcbsetsolabel(struct socket *so);
 int	in_getpeeraddr(struct socket *so, struct sockaddr **nam);
 int	in_getsockaddr(struct socket *so, struct sockaddr **nam);

Modified: head/sys/netinet/ip_output.c
==============================================================================
--- head/sys/netinet/ip_output.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/ip_output.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -282,17 +282,36 @@ ip_output(struct mbuf *m, struct mbuf *o
 	gw = dst = (struct sockaddr_in *)&ro->ro_dst;
 	fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m);
 	rte = ro->ro_rt;
-	/*
-	 * The address family should also be checked in case of sharing
-	 * the cache with IPv6.
-	 */
-	if (rte == NULL || dst->sin_family != AF_INET) {
+	if (rte == NULL) {
 		bzero(dst, sizeof(*dst));
 		dst->sin_family = AF_INET;
 		dst->sin_len = sizeof(*dst);
 		dst->sin_addr = ip->ip_dst;
 	}
 again:
+	/*
+	 * Validate route against routing table additions;
+	 * a better/more specific route might have been added.
+	 */
+	if (inp)
+		RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
+	/*
+	 * If there is a cached route,
+	 * check that it is to the same destination
+	 * and is still up.  If not, free it and try again.
+	 * The address family should also be checked in case of sharing the
+	 * cache with IPv6.
+	 * Also check whether routing cache needs invalidation.
+	 */
+	rte = ro->ro_rt;
+	if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
+		    rte->rt_ifp == NULL ||
+		    !RT_LINK_IS_UP(rte->rt_ifp) ||
+			  dst->sin_family != AF_INET ||
+			  dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
+		RTFREE(rte);
+		rte = ro->ro_rt = (struct rtentry *)NULL;
+	}
 	ia = NULL;
 	have_ia_ref = 0;
 	/*

Modified: head/sys/netinet/tcp_output.c
==============================================================================
--- head/sys/netinet/tcp_output.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/tcp_output.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -1379,9 +1379,6 @@ send:
 #endif
 #ifdef INET
     {
-	struct route ro;
-
-	bzero(&ro, sizeof(ro));
 	ip->ip_len = htons(m->m_pkthdr.len);
 #ifdef INET6
 	if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO)
@@ -1412,13 +1409,12 @@ send:
 	tcp_pcap_add(th, m, &(tp->t_outpkts));
 #endif
 
-	error = ip_output(m, tp->t_inpcb->inp_options, &ro,
+	error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route,
 	    ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
 	    tp->t_inpcb);
 
-	if (error == EMSGSIZE && ro.ro_rt != NULL)
-		mtu = ro.ro_rt->rt_mtu;
-	RO_RTFREE(&ro);
+	if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL)
+		mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu;
     }
 #endif /* INET */
 

Modified: head/sys/netinet/tcp_subr.c
==============================================================================
--- head/sys/netinet/tcp_subr.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/tcp_subr.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -1632,6 +1632,10 @@ tcp_notify(struct inpcb *inp, int error)
 	if (tp->t_state == TCPS_ESTABLISHED &&
 	    (error == EHOSTUNREACH || error == ENETUNREACH ||
 	     error == EHOSTDOWN)) {
+		if (inp->inp_route.ro_rt) {
+			RTFREE(inp->inp_route.ro_rt);
+			inp->inp_route.ro_rt = (struct rtentry *)NULL;
+		}
 		return (inp);
 	} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
 	    tp->t_softerror) {
@@ -1926,11 +1930,11 @@ tcp_ctlinput(int cmd, struct sockaddr *s
 	else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB ||
 		cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip)
 		notify = tcp_drop_syn_sent;
-	/*
-	 * Redirects don't need to be handled up here.
-	 */
-	else if (PRC_IS_REDIRECT(cmd))
+	else if (PRC_IS_REDIRECT(cmd)) {
+		/* signal EHOSTDOWN, as it flushes the cached route */
+		in_pcbnotifyall(&tcbinfo, faddr, EHOSTDOWN, notify);
 		return;
+	}
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
 	 * XXX: We never get this from ICMP, otherwise it makes an

Modified: head/sys/netinet/tcp_timer.c
==============================================================================
--- head/sys/netinet/tcp_timer.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/tcp_timer.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -786,7 +786,9 @@ tcp_timer_rexmt(void * xtp)
 #ifdef INET6
 		if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 			in6_losing(tp->t_inpcb);
+		else
 #endif
+			in_losing(tp->t_inpcb);
 		tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 		tp->t_srtt = 0;
 	}

Modified: head/sys/netinet/udp_usrreq.c
==============================================================================
--- head/sys/netinet/udp_usrreq.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet/udp_usrreq.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -740,6 +740,11 @@ udp_notify(struct inpcb *inp, int errno)
 	 * or a write lock, but a read lock is sufficient.
 	 */
 	INP_LOCK_ASSERT(inp);
+	if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
+	     errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
+		RTFREE(inp->inp_route.ro_rt);
+		inp->inp_route.ro_rt = (struct rtentry *)NULL;
+	}
 
 	inp->inp_socket->so_error = errno;
 	sorwakeup(inp->inp_socket);
@@ -761,11 +766,11 @@ udp_common_ctlinput(int cmd, struct sock
 	if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY)
 		return;
 
-	/*
-	 * Redirects don't need to be handled up here.
-	 */
-	if (PRC_IS_REDIRECT(cmd))
+	if (PRC_IS_REDIRECT(cmd)) {
+		/* signal EHOSTDOWN, as it flushes the cached route */
+		in_pcbnotifyall(&udbinfo, faddr, EHOSTDOWN, udp_notify);
 		return;
+	}
 
 	/*
 	 * Hostdead is ugly because it goes linearly through all PCBs.
@@ -1116,7 +1121,7 @@ udp_output(struct inpcb *inp, struct mbu
 	int error = 0;
 	int ipflags;
 	u_short fport, lport;
-	int unlock_udbinfo;
+	int unlock_udbinfo, unlock_inp;
 	u_char tos;
 	uint8_t pr;
 	uint16_t cscov = 0;
@@ -1137,7 +1142,15 @@ udp_output(struct inpcb *inp, struct mbu
 	}
 
 	src.sin_family = 0;
-	INP_RLOCK(inp);
+	sin = (struct sockaddr_in *)addr;
+	if (sin == NULL ||
+	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
+		INP_WLOCK(inp);
+		unlock_inp = UH_WLOCKED;
+	} else {
+		INP_RLOCK(inp);
+		unlock_inp = UH_RLOCKED;
+	}
 	tos = inp->inp_ip_tos;
 	if (control != NULL) {
 		/*
@@ -1145,7 +1158,10 @@ udp_output(struct inpcb *inp, struct mbu
 		 * stored in a single mbuf.
 		 */
 		if (control->m_next) {
-			INP_RUNLOCK(inp);
+			if (unlock_inp == UH_WLOCKED)
+				INP_WUNLOCK(inp);
+			else
+				INP_RUNLOCK(inp);
 			m_freem(control);
 			m_freem(m);
 			return (EINVAL);
@@ -1220,7 +1236,10 @@ udp_output(struct inpcb *inp, struct mbu
 		m_freem(control);
 	}
 	if (error) {
-		INP_RUNLOCK(inp);
+		if (unlock_inp == UH_WLOCKED)
+			INP_WUNLOCK(inp);
+		else
+			INP_RUNLOCK(inp);
 		m_freem(m);
 		return (error);
 	}
@@ -1246,8 +1265,6 @@ udp_output(struct inpcb *inp, struct mbu
 	sin = (struct sockaddr_in *)addr;
 	if (sin != NULL &&
 	    (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) {
-		INP_RUNLOCK(inp);
-		INP_WLOCK(inp);
 		INP_HASH_WLOCK(pcbinfo);
 		unlock_udbinfo = UH_WLOCKED;
 	} else if ((sin != NULL && (
@@ -1514,9 +1531,10 @@ udp_output(struct inpcb *inp, struct mbu
 	else if (unlock_udbinfo == UH_RLOCKED)
 		INP_HASH_RUNLOCK(pcbinfo);
 	UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u);
-	error = ip_output(m, inp->inp_options, NULL, ipflags,
+	error = ip_output(m, inp->inp_options,
+	    (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags,
 	    inp->inp_moptions, inp);
-	if (unlock_udbinfo == UH_WLOCKED)
+	if (unlock_inp == UH_WLOCKED)
 		INP_WUNLOCK(inp);
 	else
 		INP_RUNLOCK(inp);

Modified: head/sys/netinet6/in6_pcb.c
==============================================================================
--- head/sys/netinet6/in6_pcb.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet6/in6_pcb.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -827,9 +827,10 @@ void
 in6_losing(struct inpcb *in6p)
 {
 
-	/*
-	 * We don't store route pointers in the routing table anymore
-	 */
+	if (in6p->inp_route6.ro_rt) {
+		RTFREE(in6p->inp_route6.ro_rt);
+		in6p->inp_route6.ro_rt = (struct rtentry *)NULL;
+	}
 	return;
 }
 
@@ -840,9 +841,11 @@ in6_losing(struct inpcb *in6p)
 struct inpcb *
 in6_rtchange(struct inpcb *inp, int errno)
 {
-	/*
-	 * We don't store route pointers in the routing table anymore
-	 */
+
+	if (inp->inp_route6.ro_rt) {
+		RTFREE(inp->inp_route6.ro_rt);
+		inp->inp_route6.ro_rt = (struct rtentry *)NULL;
+	}
 	return inp;
 }
 

Modified: head/sys/netinet6/ip6_output.c
==============================================================================
--- head/sys/netinet6/ip6_output.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet6/ip6_output.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -546,7 +546,18 @@ again:
 	/* adjust pointer */
 	ip6 = mtod(m, struct ip6_hdr *);
 
-	if (ro->ro_rt && fwd_tag == NULL) {
+	/*
+	 * Validate route against routing table additions;
+	 * a better/more specific route might have been added.
+	 * Make sure address family is set in route.
+	 */
+	if (inp) {
+		ro->ro_dst.sin6_family = AF_INET6;
+		RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum);
+	}
+	if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) &&
+	    ro->ro_dst.sin6_family == AF_INET6 &&
+	    IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
 		rt = ro->ro_rt;
 		ifp = ro->ro_rt->rt_ifp;
 	} else {
@@ -939,7 +950,8 @@ passout:
 			    m->m_pkthdr.len);
 			ifa_free(&ia6->ia_ifa);
 		}
-		error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
+		error = nd6_output_ifp(ifp, origifp, m, dst,
+		    (struct route *)ro);
 		goto done;
 	}
 
@@ -1038,7 +1050,8 @@ sendorfree:
 				counter_u64_add(ia->ia_ifa.ifa_obytes,
 				    m->m_pkthdr.len);
 			}
-			error = nd6_output_ifp(ifp, origifp, m, dst, NULL);
+			error = nd6_output_ifp(ifp, origifp, m, dst,
+			    (struct route *)ro);
 		} else
 			m_freem(m);
 	}

Modified: head/sys/netinet6/udp6_usrreq.c
==============================================================================
--- head/sys/netinet6/udp6_usrreq.c	Thu Mar 24 07:24:33 2016	(r297224)
+++ head/sys/netinet6/udp6_usrreq.c	Thu Mar 24 07:54:56 2016	(r297225)
@@ -876,8 +876,8 @@ udp6_output(struct inpcb *inp, struct mb
 
 		UDP_PROBE(send, NULL, inp, ip6, inp, udp6);
 		UDPSTAT_INC(udps_opackets);
-		error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions,
-		    NULL, inp);
+		error = ip6_output(m, optp, &inp->inp_route6, flags,
+		    inp->in6p_moptions, NULL, inp);
 		break;
 	case AF_INET:
 		error = EAFNOSUPPORT;



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201603240754.u2O7suvM006925>