From owner-svn-src-all@freebsd.org Wed Jul 29 18:04:03 2015 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 08FF19AECE4; Wed, 29 Jul 2015 18:04:03 +0000 (UTC) (envelope-from eri@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id ED5FEFCE; Wed, 29 Jul 2015 18:04:02 +0000 (UTC) (envelope-from eri@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.70]) by repo.freebsd.org (8.14.9/8.14.9) with ESMTP id t6TI428I065404; Wed, 29 Jul 2015 18:04:02 GMT (envelope-from eri@FreeBSD.org) Received: (from eri@localhost) by repo.freebsd.org (8.14.9/8.14.9/Submit) id t6TI42iH065403; Wed, 29 Jul 2015 18:04:02 GMT (envelope-from eri@FreeBSD.org) Message-Id: <201507291804.t6TI42iH065403@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: eri set sender to eri@FreeBSD.org using -f From: Ermal Luçi Date: Wed, 29 Jul 2015 18:04:02 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r286028 - head/sys/netinet X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 29 Jul 2015 18:04:03 -0000 Author: eri Date: Wed Jul 29 18:04:01 2015 New Revision: 286028 URL: https://svnweb.freebsd.org/changeset/base/286028 Log: ip_output normalization and fixes ip_output has a big chunk of code used to handle special cases with pfil consumers which also forces a reloop on it. Gather all this code together to make it readable and properly handle the reloop cases. Some of the issues identified: M_IP_NEXTHOP is not handled properly in existing code. route reference leaking is possible with in FIB number change route flags checking is not consistent in the function Differential Revision: https://reviews.freebsd.org/D3022 Reviewed by: gnn Approved by: gnn(mentor) MFC after: 4 weeks Modified: head/sys/netinet/ip_output.c Modified: head/sys/netinet/ip_output.c ============================================================================== --- head/sys/netinet/ip_output.c Wed Jul 29 17:59:13 2015 (r286027) +++ head/sys/netinet/ip_output.c Wed Jul 29 18:04:01 2015 (r286028) @@ -106,6 +106,94 @@ static void ip_mloopback extern int in_mcast_loop; extern struct protosw inetsw[]; +static inline int +ip_output_pfil(struct mbuf *m, struct ifnet *ifp, struct inpcb *inp, + struct sockaddr_in *dst, int *fibnum, int *error) +{ + struct m_tag *fwd_tag = NULL; + struct in_addr odst; + struct ip *ip; + + ip = mtod(m, struct ip *); + + /* Run through list of hooks for output packets. */ + odst.s_addr = ip->ip_dst.s_addr; + *error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); + if ((*error) != 0 || m == NULL) + return 1; /* Finished */ + + ip = mtod(m, struct ip *); + + /* See if destination IP address was changed by packet filter. */ + if (odst.s_addr != ip->ip_dst.s_addr) { + m->m_flags |= M_SKIP_FIREWALL; + /* If destination is now ourself drop to ip_input(). */ + if (in_localip(ip->ip_dst)) { + m->m_flags |= M_FASTFWD_OURS; + if (m->m_pkthdr.rcvif == NULL) + m->m_pkthdr.rcvif = V_loif; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } + m->m_pkthdr.csum_flags |= + CSUM_IP_CHECKED | CSUM_IP_VALID; +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) + m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; +#endif + *error = netisr_queue(NETISR_IP, m); + return 1; /* Finished */ + } + + bzero(dst, sizeof(*dst)); + dst->sin_family = AF_INET; + dst->sin_len = sizeof(*dst); + dst->sin_addr = ip->ip_dst; + + return -1; /* Reloop */ + } + /* See if fib was changed by packet filter. */ + if ((*fibnum) != M_GETFIB(m)) { + m->m_flags |= M_SKIP_FIREWALL; + *fibnum = M_GETFIB(m); + return -1; /* Reloop for FIB change */ + } + + /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ + if (m->m_flags & M_FASTFWD_OURS) { + if (m->m_pkthdr.rcvif == NULL) + m->m_pkthdr.rcvif = V_loif; + if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { + m->m_pkthdr.csum_flags |= + CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xffff; + } +#ifdef SCTP + if (m->m_pkthdr.csum_flags & CSUM_SCTP) + m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; +#endif + m->m_pkthdr.csum_flags |= + CSUM_IP_CHECKED | CSUM_IP_VALID; + + *error = netisr_queue(NETISR_IP, m); + return 1; /* Finished */ + } + /* Or forward to some other address? */ + if ((m->m_flags & M_IP_NEXTHOP) && + ((fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL)) { + bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); + m->m_flags |= M_SKIP_FIREWALL; + m->m_flags &= ~M_IP_NEXTHOP; + m_tag_delete(m, fwd_tag); + + return -1; /* Reloop for CHANGE of dst */ + } + + return 0; +} + /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -136,11 +224,8 @@ ip_output(struct mbuf *m, struct mbuf *o uint16_t ip_len, ip_off; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ - struct in_addr odst; - struct m_tag *fwd_tag = NULL; uint32_t fibnum; int have_ia_ref; - int needfiblookup; #ifdef IPSEC int no_route_but_check_spd = 0; #endif @@ -194,32 +279,20 @@ ip_output(struct mbuf *m, struct mbuf *o */ gw = dst = (struct sockaddr_in *)&ro->ro_dst; fibnum = (inp != NULL) ? inp->inp_inc.inc_fibnum : M_GETFIB(m); -again: - ia = NULL; - have_ia_ref = 0; + rte = ro->ro_rt; /* - * If there is a cached route, check that it is to the same - * destination and is still up. If not, free it and try again. * The address family should also be checked in case of sharing * the cache with IPv6. */ - rte = ro->ro_rt; - if (rte && ((rte->rt_flags & RTF_UP) == 0 || - rte->rt_ifp == NULL || - !RT_LINK_IS_UP(rte->rt_ifp) || - dst->sin_family != AF_INET || - dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { - RO_RTFREE(ro); - ro->ro_lle = NULL; - rte = NULL; - gw = dst; - } - if (rte == NULL && fwd_tag == NULL) { + if (rte == NULL || dst->sin_family != AF_INET) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } +again: + ia = NULL; + have_ia_ref = 0; /* * If routing to interface only, short circuit routing lookup. * The use of an all-ones broadcast address implies this; an @@ -282,6 +355,7 @@ again: rte = ro->ro_rt; } if (rte == NULL || + (rte->rt_flags & RTF_UP) == 0 || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) { #ifdef IPSEC @@ -307,6 +381,7 @@ again: else isbroadcast = in_broadcast(gw->sin_addr, ifp); } + /* * Calculate MTU. If we have a route that is up, use that, * otherwise use the interface's MTU. @@ -318,6 +393,7 @@ again: /* Catch a possible divide by zero later. */ KASSERT(mtu > 0, ("%s: mtu %d <= 0, rte=%p (rt_flags=0x%08x) ifp=%p", __func__, mtu, rte, (rte != NULL) ? rte->rt_flags : 0, ifp)); + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { m->m_flags |= M_MCAST; /* @@ -475,87 +551,29 @@ sendit: #endif /* IPSEC */ /* Jump over all PFIL processing if hooks are not active. */ - if (!PFIL_HOOKED(&V_inet_pfil_hook)) - goto passout; - - /* Run through list of hooks for output packets. */ - odst.s_addr = ip->ip_dst.s_addr; - error = pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_OUT, inp); - if (error != 0 || m == NULL) - goto done; + if (PFIL_HOOKED(&V_inet_pfil_hook)) { + switch (ip_output_pfil(m, ifp, inp, dst, &fibnum, &error)) { + case 1: /* Finished */ + goto done; - ip = mtod(m, struct ip *); - needfiblookup = 0; + case 0: /* Continue normally */ + ip = mtod(m, struct ip *); + break; - /* See if destination IP address was changed by packet filter. */ - if (odst.s_addr != ip->ip_dst.s_addr) { - m->m_flags |= M_SKIP_FIREWALL; - /* If destination is now ourself drop to ip_input(). */ - if (in_localip(ip->ip_dst)) { - m->m_flags |= M_FASTFWD_OURS; - if (m->m_pkthdr.rcvif == NULL) - m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; - } - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; -#ifdef SCTP - if (m->m_pkthdr.csum_flags & CSUM_SCTP) - m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; -#endif - error = netisr_queue(NETISR_IP, m); - goto done; - } else { + case -1: /* Need to try again */ + /* Reset everything for a new round */ + RO_RTFREE(ro); if (have_ia_ref) ifa_free(&ia->ia_ifa); - needfiblookup = 1; /* Redo the routing table lookup. */ - } - } - /* See if fib was changed by packet filter. */ - if (fibnum != M_GETFIB(m)) { - m->m_flags |= M_SKIP_FIREWALL; - fibnum = M_GETFIB(m); - RO_RTFREE(ro); - needfiblookup = 1; - } - if (needfiblookup) - goto again; + ro->ro_lle = NULL; + rte = NULL; + gw = dst; + ip = mtod(m, struct ip *); + goto again; - /* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */ - if (m->m_flags & M_FASTFWD_OURS) { - if (m->m_pkthdr.rcvif == NULL) - m->m_pkthdr.rcvif = V_loif; - if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) { - m->m_pkthdr.csum_flags |= - CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xffff; } -#ifdef SCTP - if (m->m_pkthdr.csum_flags & CSUM_SCTP) - m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; -#endif - m->m_pkthdr.csum_flags |= - CSUM_IP_CHECKED | CSUM_IP_VALID; - - error = netisr_queue(NETISR_IP, m); - goto done; - } - /* Or forward to some other address? */ - if ((m->m_flags & M_IP_NEXTHOP) && - (fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL)) != NULL) { - bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in)); - m->m_flags |= M_SKIP_FIREWALL; - m->m_flags &= ~M_IP_NEXTHOP; - m_tag_delete(m, fwd_tag); - if (have_ia_ref) - ifa_free(&ia->ia_ifa); - goto again; } -passout: /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {