From owner-svn-src-head@freebsd.org Thu Mar 24 07:54:58 2016 Return-Path: Delivered-To: svn-src-head@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 918B9ADC465; Thu, 24 Mar 2016 07:54:58 +0000 (UTC) (envelope-from gnn@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 5A7D61507; Thu, 24 Mar 2016 07:54:58 +0000 (UTC) (envelope-from gnn@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id u2O7svKW006939; Thu, 24 Mar 2016 07:54:57 GMT (envelope-from gnn@FreeBSD.org) Received: (from gnn@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id u2O7suvM006925; Thu, 24 Mar 2016 07:54:56 GMT (envelope-from gnn@FreeBSD.org) Message-Id: <201603240754.u2O7suvM006925@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: gnn set sender to gnn@FreeBSD.org using -f From: "George V. Neville-Neil" Date: Thu, 24 Mar 2016 07:54:56 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r297225 - in head/sys: net netinet netinet6 X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-head@freebsd.org X-Mailman-Version: 2.1.21 Precedence: list List-Id: SVN commit messages for the src tree for head/-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 24 Mar 2016 07:54:58 -0000 Author: gnn Date: Thu Mar 24 07:54:56 2016 New Revision: 297225 URL: https://svnweb.freebsd.org/changeset/base/297225 Log: FreeBSD previously provided route caching for TCP (and UDP). Re-add route caching for TCP, with some improvements. In particular, invalidate the route cache if a new route is added, which might be a better match. The cache is automatically invalidated if the old route is deleted. Submitted by: Mike Karels Reviewed by: gnn Differential Revision: https://reviews.freebsd.org/D4306 Modified: head/sys/net/route.c head/sys/net/route.h head/sys/net/route_var.h head/sys/netinet/in_pcb.c head/sys/netinet/in_pcb.h head/sys/netinet/ip_output.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_timer.c head/sys/netinet/udp_usrreq.c head/sys/netinet6/in6_pcb.c head/sys/netinet6/ip6_output.c head/sys/netinet6/udp6_usrreq.c Modified: head/sys/net/route.c ============================================================================== --- head/sys/net/route.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/net/route.c Thu Mar 24 07:54:56 2016 (r297225) @@ -201,6 +201,16 @@ rt_tables_get_rnh(int table, int fam) return (*rt_tables_get_rnh_ptr(table, fam)); } +rt_gen_t +rt_tables_get_gen(int table, int fam) +{ + struct rib_head *rnh; + + rnh = *rt_tables_get_rnh_ptr(table, fam); + return (rnh->rnh_gen); +} + + /* * route initialization must occur before ip6_init2(), which happenas at * SI_ORDER_MIDDLE. @@ -1754,6 +1764,7 @@ rtrequest1_fib(int req, struct rt_addrin *ret_nrt = rt; RT_ADDREF(rt); } + rnh->rnh_gen++; /* Routing table updated */ RT_UNLOCK(rt); break; case RTM_CHANGE: Modified: head/sys/net/route.h ============================================================================== --- head/sys/net/route.h Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/net/route.h Thu Mar 24 07:54:56 2016 (r297225) @@ -98,6 +98,14 @@ struct rt_metrics { /* lle state is exported in rmx_state rt_metrics field */ #define rmx_state rmx_weight +/* + * Keep a generation count of routing table, incremented on route addition, + * so we can invalidate caches. This is accessed without a lock, as precision + * is not required. + */ +typedef volatile u_int rt_gen_t; /* tree generation (for adds) */ +#define RT_GEN(fibnum, af) rt_tables_get_gen(fibnum, af) + #define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */ #define RT_ALL_FIBS -1 /* Announce event for every fib */ #ifdef _KERNEL @@ -398,6 +406,20 @@ struct rt_addrinfo { } \ } while (0) +/* + * Validate a cached route based on a supplied cookie. If there is an + * out-of-date cache, simply free it. Update the generation number + * for the new allocation + */ +#define RT_VALIDATE(ro, cookiep, fibnum) do { \ + rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \ + if (*(cookiep) != cookie && (ro)->ro_rt != NULL) { \ + RTFREE((ro)->ro_rt); \ + (ro)->ro_rt = NULL; \ + *(cookiep) = cookie; \ + } \ +} while (0) + struct ifmultiaddr; struct rib_head; @@ -415,6 +437,7 @@ int rt_setgate(struct rtentry *, struct void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *); struct rib_head *rt_table_init(int); void rt_table_destroy(struct rib_head *); +rt_gen_t rt_tables_get_gen(int table, int fam); int rtsock_addrmsg(int, struct ifaddr *, int); int rtsock_routemsg(int, struct ifnet *ifp, int, struct rtentry *, int); Modified: head/sys/net/route_var.h ============================================================================== --- head/sys/net/route_var.h Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/net/route_var.h Thu Mar 24 07:54:56 2016 (r297225) @@ -41,7 +41,7 @@ struct rib_head { rn_walktree_t *rnh_walktree; /* traverse tree */ rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */ rn_close_t *rnh_close; /*do something when the last ref drops*/ - u_int rnh_gen; /* generation counter */ + rt_gen_t rnh_gen; /* generation counter */ int rnh_multipath; /* multipath capable ? */ struct radix_node rnh_nodes[3]; /* empty tree for common case */ struct rwlock rib_lock; /* config/data path lock */ Modified: head/sys/netinet/in_pcb.c ============================================================================== --- head/sys/netinet/in_pcb.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/in_pcb.c Thu Mar 24 07:54:56 2016 (r297225) @@ -1298,6 +1298,11 @@ in_pcbfree(struct inpcb *inp) if (inp->inp_moptions != NULL) inp_freemoptions(inp->inp_moptions); #endif + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } + inp->inp_vflag = 0; inp->inp_flags2 |= INP_FREED; crfree(inp->inp_cred); @@ -2225,6 +2230,23 @@ in_pcbremlists(struct inpcb *inp) } /* + * Check for alternatives when higher level complains + * about service problems. For now, invalidate cached + * routing information. If the route was created dynamically + * (by a redirect), time to try a default gateway again. + */ +void +in_losing(struct inpcb *inp) +{ + + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } + return; +} + +/* * A set label operation has occurred at the socket layer, propagate the * label change into the in_pcb for the socket. */ Modified: head/sys/netinet/in_pcb.h ============================================================================== --- head/sys/netinet/in_pcb.h Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/in_pcb.h Thu Mar 24 07:54:56 2016 (r297225) @@ -42,6 +42,7 @@ #include #include #include +#include #ifdef _KERNEL #include @@ -238,8 +239,14 @@ struct inpcb { #define inp_zero_size offsetof(struct inpcb, inp_gencnt) inp_gen_t inp_gencnt; /* (c) generation count */ struct llentry *inp_lle; /* cached L2 information */ - struct rtentry *inp_rt; /* cached L3 information */ struct rwlock inp_lock; + rt_gen_t inp_rt_cookie; /* generation for route entry */ + union { /* cached L3 information */ + struct route inpu_route; + struct route_in6 inpu_route6; + } inp_rtu; +#define inp_route inp_rtu.inpu_route +#define inp_route6 inp_rtu.inpu_route6 }; #define inp_fport inp_inc.inc_fport #define inp_lport inp_inc.inc_lport @@ -722,6 +729,7 @@ void in_pcbrehash_mbuf(struct inpcb *, s int in_pcbrele(struct inpcb *); int in_pcbrele_rlocked(struct inpcb *); int in_pcbrele_wlocked(struct inpcb *); +void in_losing(struct inpcb *); void in_pcbsetsolabel(struct socket *so); int in_getpeeraddr(struct socket *so, struct sockaddr **nam); int in_getsockaddr(struct socket *so, struct sockaddr **nam); Modified: head/sys/netinet/ip_output.c ============================================================================== --- head/sys/netinet/ip_output.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/ip_output.c Thu Mar 24 07:54:56 2016 (r297225) @@ -282,17 +282,36 @@ ip_output(struct mbuf *m, struct mbuf *o gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); rte = ro->ro_rt; - /* - * The address family should also be checked in case of sharing - * the cache with IPv6. - */ - if (rte == NULL || dst->sin_family != AF_INET) { + if (rte == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } again: + /* + * Validate route against routing table additions; + * a better/more specific route might have been added. + */ + if (inp) + RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); + /* + * If there is a cached route, + * check that it is to the same destination + * and is still up. If not, free it and try again. + * The address family should also be checked in case of sharing the + * cache with IPv6. + * Also check whether routing cache needs invalidation. + */ + rte = ro->ro_rt; + if (rte && ((rte->rt_flags & RTF_UP) == 0 || + rte->rt_ifp == NULL || + !RT_LINK_IS_UP(rte->rt_ifp) || + dst->sin_family != AF_INET || + dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { + RTFREE(rte); + rte = ro->ro_rt = (struct rtentry *)NULL; + } ia = NULL; have_ia_ref = 0; /* Modified: head/sys/netinet/tcp_output.c ============================================================================== --- head/sys/netinet/tcp_output.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/tcp_output.c Thu Mar 24 07:54:56 2016 (r297225) @@ -1379,9 +1379,6 @@ send: #endif #ifdef INET { - struct route ro; - - bzero(&ro, sizeof(ro)); ip->ip_len = htons(m->m_pkthdr.len); #ifdef INET6 if (tp->t_inpcb->inp_vflag & INP_IPV6PROTO) @@ -1412,13 +1409,12 @@ send: tcp_pcap_add(th, m, &(tp->t_outpkts)); #endif - error = ip_output(m, tp->t_inpcb->inp_options, &ro, + error = ip_output(m, tp->t_inpcb->inp_options, &tp->t_inpcb->inp_route, ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); - if (error == EMSGSIZE && ro.ro_rt != NULL) - mtu = ro.ro_rt->rt_mtu; - RO_RTFREE(&ro); + if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL) + mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu; } #endif /* INET */ Modified: head/sys/netinet/tcp_subr.c ============================================================================== --- head/sys/netinet/tcp_subr.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/tcp_subr.c Thu Mar 24 07:54:56 2016 (r297225) @@ -1632,6 +1632,10 @@ tcp_notify(struct inpcb *inp, int error) if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { + if (inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && tp->t_softerror) { @@ -1926,11 +1930,11 @@ tcp_ctlinput(int cmd, struct sockaddr *s else if (V_icmp_may_rst && (cmd == PRC_UNREACH_ADMIN_PROHIB || cmd == PRC_UNREACH_PORT || cmd == PRC_TIMXCEED_INTRANS) && ip) notify = tcp_drop_syn_sent; - /* - * Redirects don't need to be handled up here. - */ - else if (PRC_IS_REDIRECT(cmd)) + else if (PRC_IS_REDIRECT(cmd)) { + /* signal EHOSTDOWN, as it flushes the cached route */ + in_pcbnotifyall(&tcbinfo, faddr, EHOSTDOWN, notify); return; + } /* * Hostdead is ugly because it goes linearly through all PCBs. * XXX: We never get this from ICMP, otherwise it makes an Modified: head/sys/netinet/tcp_timer.c ============================================================================== --- head/sys/netinet/tcp_timer.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/tcp_timer.c Thu Mar 24 07:54:56 2016 (r297225) @@ -786,7 +786,9 @@ tcp_timer_rexmt(void * xtp) #ifdef INET6 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0) in6_losing(tp->t_inpcb); + else #endif + in_losing(tp->t_inpcb); tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT); tp->t_srtt = 0; } Modified: head/sys/netinet/udp_usrreq.c ============================================================================== --- head/sys/netinet/udp_usrreq.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet/udp_usrreq.c Thu Mar 24 07:54:56 2016 (r297225) @@ -740,6 +740,11 @@ udp_notify(struct inpcb *inp, int errno) * or a write lock, but a read lock is sufficient. */ INP_LOCK_ASSERT(inp); + if ((errno == EHOSTUNREACH || errno == ENETUNREACH || + errno == EHOSTDOWN) && inp->inp_route.ro_rt) { + RTFREE(inp->inp_route.ro_rt); + inp->inp_route.ro_rt = (struct rtentry *)NULL; + } inp->inp_socket->so_error = errno; sorwakeup(inp->inp_socket); @@ -761,11 +766,11 @@ udp_common_ctlinput(int cmd, struct sock if (sa->sa_family != AF_INET || faddr.s_addr == INADDR_ANY) return; - /* - * Redirects don't need to be handled up here. - */ - if (PRC_IS_REDIRECT(cmd)) + if (PRC_IS_REDIRECT(cmd)) { + /* signal EHOSTDOWN, as it flushes the cached route */ + in_pcbnotifyall(&udbinfo, faddr, EHOSTDOWN, udp_notify); return; + } /* * Hostdead is ugly because it goes linearly through all PCBs. @@ -1116,7 +1121,7 @@ udp_output(struct inpcb *inp, struct mbu int error = 0; int ipflags; u_short fport, lport; - int unlock_udbinfo; + int unlock_udbinfo, unlock_inp; u_char tos; uint8_t pr; uint16_t cscov = 0; @@ -1137,7 +1142,15 @@ udp_output(struct inpcb *inp, struct mbu } src.sin_family = 0; - INP_RLOCK(inp); + sin = (struct sockaddr_in *)addr; + if (sin == NULL || + (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { + INP_WLOCK(inp); + unlock_inp = UH_WLOCKED; + } else { + INP_RLOCK(inp); + unlock_inp = UH_RLOCKED; + } tos = inp->inp_ip_tos; if (control != NULL) { /* @@ -1145,7 +1158,10 @@ udp_output(struct inpcb *inp, struct mbu * stored in a single mbuf. */ if (control->m_next) { - INP_RUNLOCK(inp); + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); m_freem(control); m_freem(m); return (EINVAL); @@ -1220,7 +1236,10 @@ udp_output(struct inpcb *inp, struct mbu m_freem(control); } if (error) { - INP_RUNLOCK(inp); + if (unlock_inp == UH_WLOCKED) + INP_WUNLOCK(inp); + else + INP_RUNLOCK(inp); m_freem(m); return (error); } @@ -1246,8 +1265,6 @@ udp_output(struct inpcb *inp, struct mbu sin = (struct sockaddr_in *)addr; if (sin != NULL && (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0)) { - INP_RUNLOCK(inp); - INP_WLOCK(inp); INP_HASH_WLOCK(pcbinfo); unlock_udbinfo = UH_WLOCKED; } else if ((sin != NULL && ( @@ -1514,9 +1531,10 @@ udp_output(struct inpcb *inp, struct mbu else if (unlock_udbinfo == UH_RLOCKED) INP_HASH_RUNLOCK(pcbinfo); UDP_PROBE(send, NULL, inp, &ui->ui_i, inp, &ui->ui_u); - error = ip_output(m, inp->inp_options, NULL, ipflags, + error = ip_output(m, inp->inp_options, + (unlock_inp == UH_WLOCKED ? &inp->inp_route : NULL), ipflags, inp->inp_moptions, inp); - if (unlock_udbinfo == UH_WLOCKED) + if (unlock_inp == UH_WLOCKED) INP_WUNLOCK(inp); else INP_RUNLOCK(inp); Modified: head/sys/netinet6/in6_pcb.c ============================================================================== --- head/sys/netinet6/in6_pcb.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet6/in6_pcb.c Thu Mar 24 07:54:56 2016 (r297225) @@ -827,9 +827,10 @@ void in6_losing(struct inpcb *in6p) { - /* - * We don't store route pointers in the routing table anymore - */ + if (in6p->inp_route6.ro_rt) { + RTFREE(in6p->inp_route6.ro_rt); + in6p->inp_route6.ro_rt = (struct rtentry *)NULL; + } return; } @@ -840,9 +841,11 @@ in6_losing(struct inpcb *in6p) struct inpcb * in6_rtchange(struct inpcb *inp, int errno) { - /* - * We don't store route pointers in the routing table anymore - */ + + if (inp->inp_route6.ro_rt) { + RTFREE(inp->inp_route6.ro_rt); + inp->inp_route6.ro_rt = (struct rtentry *)NULL; + } return inp; } Modified: head/sys/netinet6/ip6_output.c ============================================================================== --- head/sys/netinet6/ip6_output.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet6/ip6_output.c Thu Mar 24 07:54:56 2016 (r297225) @@ -546,7 +546,18 @@ again: /* adjust pointer */ ip6 = mtod(m, struct ip6_hdr *); - if (ro->ro_rt && fwd_tag == NULL) { + /* + * Validate route against routing table additions; + * a better/more specific route might have been added. + * Make sure address family is set in route. + */ + if (inp) { + ro->ro_dst.sin6_family = AF_INET6; + RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); + } + if (ro->ro_rt && fwd_tag == NULL && (ro->ro_rt->rt_flags & RTF_UP) && + ro->ro_dst.sin6_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { rt = ro->ro_rt; ifp = ro->ro_rt->rt_ifp; } else { @@ -939,7 +950,8 @@ passout: m->m_pkthdr.len); ifa_free(&ia6->ia_ifa); } - error = nd6_output_ifp(ifp, origifp, m, dst, NULL); + error = nd6_output_ifp(ifp, origifp, m, dst, + (struct route *)ro); goto done; } @@ -1038,7 +1050,8 @@ sendorfree: counter_u64_add(ia->ia_ifa.ifa_obytes, m->m_pkthdr.len); } - error = nd6_output_ifp(ifp, origifp, m, dst, NULL); + error = nd6_output_ifp(ifp, origifp, m, dst, + (struct route *)ro); } else m_freem(m); } Modified: head/sys/netinet6/udp6_usrreq.c ============================================================================== --- head/sys/netinet6/udp6_usrreq.c Thu Mar 24 07:24:33 2016 (r297224) +++ head/sys/netinet6/udp6_usrreq.c Thu Mar 24 07:54:56 2016 (r297225) @@ -876,8 +876,8 @@ udp6_output(struct inpcb *inp, struct mb UDP_PROBE(send, NULL, inp, ip6, inp, udp6); UDPSTAT_INC(udps_opackets); - error = ip6_output(m, optp, NULL, flags, inp->in6p_moptions, - NULL, inp); + error = ip6_output(m, optp, &inp->inp_route6, flags, + inp->in6p_moptions, NULL, inp); break; case AF_INET: error = EAFNOSUPPORT;