Date: Wed, 8 May 2019 23:39:24 +0000 (UTC) From: Gleb Smirnoff <glebius@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r347375 - in head/sys: net netinet Message-ID: <201905082339.x48NdOq3067899@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: glebius Date: Wed May 8 23:39:24 2019 New Revision: 347375 URL: https://svnweb.freebsd.org/changeset/base/347375 Log: Existense of PCB route caching doesn't allow us to use new fast route lookup KPI in ip_output() like it is already used in ip_forward(). However, when there is no PCB provided we can use fast KPI, gaining performance advantage. Typical case when ip_output() is called without a PCB pointer is a sendto(2) on a not connected UDP socket. In practice DNS servers do this. Reviewed by: melifaro Differential Revision: https://reviews.freebsd.org/D19804 Modified: head/sys/net/route.h head/sys/net/route_var.h head/sys/netinet/in_fib.c head/sys/netinet/in_fib.h head/sys/netinet/ip_output.c Modified: head/sys/net/route.h ============================================================================== --- head/sys/net/route.h Wed May 8 23:24:47 2019 (r347374) +++ head/sys/net/route.h Wed May 8 23:39:24 2019 (r347375) @@ -210,6 +210,7 @@ struct rtentry { #define NHF_DEFAULT 0x0080 /* Default route */ #define NHF_BROADCAST 0x0100 /* RTF_BROADCAST */ #define NHF_GATEWAY 0x0200 /* RTF_GATEWAY */ +#define NHF_HOST 0x0400 /* RTF_HOST */ /* Nexthop request flags */ #define NHR_IFAIF 0x01 /* Return ifa_ifp interface */ Modified: head/sys/net/route_var.h ============================================================================== --- head/sys/net/route_var.h Wed May 8 23:24:47 2019 (r347374) +++ head/sys/net/route_var.h Wed May 8 23:39:24 2019 (r347375) @@ -67,6 +67,7 @@ fib_rte_to_nh_flags(int rt_flags) uint16_t res; res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0; + res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0; res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0; res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0; res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0; Modified: head/sys/netinet/in_fib.c ============================================================================== --- head/sys/netinet/in_fib.c Wed May 8 23:24:47 2019 (r347374) +++ head/sys/netinet/in_fib.c Wed May 8 23:39:24 2019 (r347375) @@ -96,7 +96,6 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in uint32_t flags, struct nhop4_extended *pnh4) { struct sockaddr_in *gw; - struct in_ifaddr *ia; if ((flags & NHR_IFAIF) != 0) pnh4->nh_ifp = rte->rt_ifa->ifa_ifp; @@ -113,10 +112,8 @@ fib4_rte_to_nh_extended(struct rtentry *rte, struct in gw = (struct sockaddr_in *)rt_key(rte); if (gw->sin_addr.s_addr == 0) pnh4->nh_flags |= NHF_DEFAULT; - /* XXX: Set RTF_BROADCAST if GW address is broadcast */ - - ia = ifatoia(rte->rt_ifa); - pnh4->nh_src = IA_SIN(ia)->sin_addr; + pnh4->nh_ia = ifatoia(rte->rt_ifa); + pnh4->nh_src = IA_SIN(pnh4->nh_ia)->sin_addr; } /* Modified: head/sys/netinet/in_fib.h ============================================================================== --- head/sys/netinet/in_fib.h Wed May 8 23:24:47 2019 (r347374) +++ head/sys/netinet/in_fib.h Wed May 8 23:39:24 2019 (r347375) @@ -43,12 +43,13 @@ struct nhop4_basic { /* Extended nexthop info used for control protocols */ struct nhop4_extended { struct ifnet *nh_ifp; /* Logical egress interface */ + struct in_ifaddr *nh_ia; /* Associated address */ uint16_t nh_mtu; /* nexthop mtu */ uint16_t nh_flags; /* nhop flags */ uint8_t spare[4]; struct in_addr nh_addr; /* GW/DST IPv4 address */ struct in_addr nh_src; /* default source IPv4 address */ - uint64_t spare2[2]; + uint64_t spare2; }; int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, Modified: head/sys/netinet/ip_output.c ============================================================================== --- head/sys/netinet/ip_output.c Wed May 8 23:24:47 2019 (r347374) +++ head/sys/netinet/ip_output.c Wed May 8 23:39:24 2019 (r347375) @@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$"); #include <net/vnet.h> #include <netinet/in.h> +#include <netinet/in_fib.h> #include <netinet/in_kdtrace.h> #include <netinet/in_systm.h> #include <netinet/ip.h> @@ -227,13 +228,12 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou int hlen = sizeof (struct ip); int mtu; int error = 0; - struct sockaddr_in *dst; + struct sockaddr_in *dst, sin; const struct sockaddr_in *gw; struct in_ifaddr *ia; + struct in_addr src; int isbroadcast; uint16_t ip_len, ip_off; - struct route iproute; - struct rtentry *rte; /* cache for ro->ro_rt */ uint32_t fibnum; #if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; @@ -252,11 +252,6 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou #endif } - if (ro == NULL) { - ro = &iproute; - bzero(ro, sizeof (*ro)); - } - if (opt) { int len = 0; m = ip_insertoptions(m, opt, &len); @@ -281,26 +276,28 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou /* * dst/gw handling: * - * dst can be rewritten but always points to &ro->ro_dst. * gw is readonly but can point either to dst OR rt_gateway, * therefore we need restore gw if we're redoing lookup. */ - gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); - rte = ro->ro_rt; - if (rte == NULL) { + if (ro != NULL) + dst = (struct sockaddr_in *)&ro->ro_dst; + else + dst = &sin; + if (ro == NULL || ro->ro_rt == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } + gw = dst; NET_EPOCH_ENTER(et); again: /* * Validate route against routing table additions; * a better/more specific route might have been added. */ - if (inp) + if (inp != NULL && ro != NULL && ro->ro_rt != NULL) RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); /* * If there is a cached route, @@ -310,15 +307,12 @@ again: * cache with IPv6. * Also check whether routing cache needs invalidation. */ - rte = ro->ro_rt; - if (rte && ((rte->rt_flags & RTF_UP) == 0 || - rte->rt_ifp == NULL || - !RT_LINK_IS_UP(rte->rt_ifp) || - dst->sin_family != AF_INET || - dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { + if (ro != NULL && ro->ro_rt != NULL && + ((ro->ro_rt->rt_flags & RTF_UP) == 0 || + ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) || + dst->sin_family != AF_INET || + dst->sin_addr.s_addr != ip->ip_dst.s_addr)) RO_INVALIDATE_CACHE(ro); - rte = NULL; - } ia = NULL; /* * If routing to interface only, short circuit routing lookup. @@ -338,8 +332,10 @@ again: ip->ip_dst.s_addr = INADDR_BROADCAST; dst->sin_addr = ip->ip_dst; ifp = ia->ia_ifp; + mtu = ifp->if_mtu; ip->ip_ttl = 1; isbroadcast = 1; + src = IA_SIN(ia)->sin_addr; } else if (flags & IP_ROUTETOIF) { if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst), M_GETFIB(m)))) == NULL && @@ -350,9 +346,11 @@ again: goto bad; } ifp = ia->ia_ifp; + mtu = ifp->if_mtu; ip->ip_ttl = 1; isbroadcast = ifp->if_flags & IFF_BROADCAST ? in_ifaddr_broadcast(dst->sin_addr, ia) : 0; + src = IA_SIN(ia)->sin_addr; } else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && imo != NULL && imo->imo_multicast_ifp != NULL) { /* @@ -360,15 +358,17 @@ again: * packets if the interface is specified. */ ifp = imo->imo_multicast_ifp; + mtu = ifp->if_mtu; IFP_TO_IA(ifp, ia, &in_ifa_tracker); isbroadcast = 0; /* fool gcc */ - } else { - /* - * We want to do any cloning requested by the link layer, - * as this is probably required in all cases for correct - * operation (as it is for ARP). - */ - if (rte == NULL) { + src = IA_SIN(ia)->sin_addr; + } else if (ro != NULL) { + if (ro->ro_rt == NULL) { + /* + * We want to do any cloning requested by the link + * layer, as this is probably required in all cases + * for correct operation (as it is for ARP). + */ #ifdef RADIX_MPATH rtalloc_mpath_fib(ro, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), @@ -376,12 +376,47 @@ again: #else in_rtalloc_ign(ro, 0, fibnum); #endif - rte = ro->ro_rt; + if (ro->ro_rt == NULL || + (ro->ro_rt->rt_flags & RTF_UP) == 0 || + ro->ro_rt->rt_ifp == NULL || + !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) { +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * There is no route for this packet, but it is + * possible that a matching SPD entry exists. + */ + no_route_but_check_spd = 1; + mtu = 0; /* Silence GCC warning. */ + goto sendit; +#endif + IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; + goto bad; + } } - if (rte == NULL || - (rte->rt_flags & RTF_UP) == 0 || - rte->rt_ifp == NULL || - !RT_LINK_IS_UP(rte->rt_ifp)) { + ia = ifatoia(ro->ro_rt->rt_ifa); + ifp = ro->ro_rt->rt_ifp; + counter_u64_add(ro->ro_rt->rt_pksent, 1); + rt_update_ro_flags(ro); + if (ro->ro_rt->rt_flags & RTF_GATEWAY) + gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway; + if (ro->ro_rt->rt_flags & RTF_HOST) + isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); + else if (ifp->if_flags & IFF_BROADCAST) + isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); + else + isbroadcast = 0; + if (ro->ro_rt->rt_flags & RTF_HOST) + mtu = ro->ro_rt->rt_mtu; + else + mtu = ifp->if_mtu; + src = IA_SIN(ia)->sin_addr; + } else { + struct nhop4_extended nh; + + bzero(&nh, sizeof(nh)); + if (fib4_lookup_nh_ext(M_GETFIB(m), ip->ip_dst, 0, 0, &nh) != + 0) { #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * There is no route for this packet, but it is @@ -395,31 +430,29 @@ again: error = EHOSTUNREACH; goto bad; } - ia = ifatoia(rte->rt_ifa); - ifp = rte->rt_ifp; - counter_u64_add(rte->rt_pksent, 1); - rt_update_ro_flags(ro); - if (rte->rt_flags & RTF_GATEWAY) - gw = (struct sockaddr_in *)rte->rt_gateway; - if (rte->rt_flags & RTF_HOST) - isbroadcast = (rte->rt_flags & RTF_BROADCAST); - else if (ifp->if_flags & IFF_BROADCAST) - isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); - else - isbroadcast = 0; + ifp = nh.nh_ifp; + mtu = nh.nh_mtu; + /* + * We are rewriting here dst to be gw actually, contradicting + * comment at the beginning of the function. However, in this + * case we are always dealing with on stack dst. + * In case if pfil(9) sends us back to beginning of the + * function, the dst would be rewritten by ip_output_pfil(). + */ + MPASS(dst == &sin); + dst->sin_addr = nh.nh_addr; + ia = nh.nh_ia; + src = nh.nh_src; + isbroadcast = (((nh.nh_flags & (NHF_HOST | NHF_BROADCAST)) == + (NHF_HOST | NHF_BROADCAST)) || + ((ifp->if_flags & IFF_BROADCAST) && + in_ifaddr_broadcast(dst->sin_addr, ia))); } - /* - * Calculate MTU. If we have a route that is up, use that, - * otherwise use the interface's MTU. - */ - if (rte != NULL && (rte->rt_flags & (RTF_UP|RTF_HOST))) - mtu = rte->rt_mtu; - else - mtu = ifp->if_mtu; /* Catch a possible divide by zero later. */ - KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", - __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); + KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p", + __func__, mtu, ro, + (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp)); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { m->m_flags |= M_MCAST; @@ -455,11 +488,8 @@ again: * If source address not specified yet, use address * of outgoing interface. */ - if (ip->ip_src.s_addr == INADDR_ANY) { - /* Interface may have no addresses. */ - if (ia != NULL) - ip->ip_src = IA_SIN(ia)->sin_addr; - } + if (ip->ip_src.s_addr == INADDR_ANY) + ip->ip_src = src; if ((imo == NULL && in_mcast_loop) || (imo && imo->imo_multicast_loop)) { @@ -522,12 +552,8 @@ again: * If the source address is not specified yet, use the address * of the outoing interface. */ - if (ip->ip_src.s_addr == INADDR_ANY) { - /* Interface may have no addresses. */ - if (ia != NULL) { - ip->ip_src = IA_SIN(ia)->sin_addr; - } - } + if (ip->ip_src.s_addr == INADDR_ANY) + ip->ip_src = src; /* * Look for broadcast address and @@ -587,9 +613,10 @@ sendit: case -1: /* Need to try again */ /* Reset everything for a new round */ - RO_RTFREE(ro); - ro->ro_prepend = NULL; - rte = NULL; + if (ro != NULL) { + RO_RTFREE(ro); + ro->ro_prepend = NULL; + } gw = dst; ip = mtod(m, struct ip *); goto again; @@ -733,15 +760,6 @@ sendit: IPSTAT_INC(ips_fragmented); done: - if (ro == &iproute) - RO_RTFREE(ro); - else if (rte == NULL) - /* - * If the caller supplied a route but somehow the reference - * to it has been released need to prevent the caller - * calling RTFREE on it again. - */ - ro->ro_rt = NULL; NET_EPOCH_EXIT(et); return (error); bad:
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201905082339.x48NdOq3067899>