Date: Tue, 17 Apr 2012 14:26:56 +0000 (UTC) From: Gleb Smirnoff <glebius@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r234382 - projects/pf/head/sys/contrib/pf/net Message-ID: <201204171426.q3HEQu0I085966@svn.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: glebius Date: Tue Apr 17 14:26:55 2012 New Revision: 234382 URL: http://svn.freebsd.org/changeset/base/234382 Log: Fixes to pf_route(), pf_route6(). These functions are very special, they may call into send side of IP stack, for example they call ifp->if_output(), icmp_error(), icmp6_error(), ip6_output(), and they also can perform route lookup. Even more, they can call pf_test() or pf_test6() recursively! Hence, they need dropping any pf(4) locks before. Fortunately, pf_route() and pf_route6() are called only at the very end of packet processing, so we can drop the pf(4) locks with no need re-acquire them, thus: o Drop locks in pf_route(), pf_route6() in all error cases. o Drop locks in pf_route(), pf_route6() as soon as we get information from the state, or if there is no state, or if we don't need state. More cleanups and fixes to pf_route(): - We don't need to check for m_len, pf_test() already did this. - To do route lookup we don't need to allocate 'struct route' on stack, we can just have a sockaddr and rtentry pointer. - We need rtentry for a quite short time, so it is better to avoid its unlocking and refcounting. - In case if new interface (after routing rule applied) is a loopback one, then put M_SKIP_FIREWALL on mbuf, otherwise we may end up with infinite processing. [1] - Update the copy-paste from ip_output() bringing in TSO and SCTP support. - Reduce number of byte order swaps. More cleanups and fixes to pf_route6(): - We don't need to check for m_len, pf_test() already did this. - We don't need struct route_in6 on stack, a sockaddr_in6 would be enough. - In case if new interface (after routing rule applied) is a loopback one, then put M_SKIP_FIREWALL on mbuf, otherwise we may end up with infinite processing. [1] Discussed with: eri [1] Modified: projects/pf/head/sys/contrib/pf/net/pf.c Modified: projects/pf/head/sys/contrib/pf/net/pf.c ============================================================================== --- projects/pf/head/sys/contrib/pf/net/pf.c Tue Apr 17 13:44:40 2012 (r234381) +++ projects/pf/head/sys/contrib/pf/net/pf.c Tue Apr 17 14:26:55 2012 (r234382) @@ -5112,18 +5112,13 @@ pf_route(struct mbuf **m, struct pf_rule struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0, *m1; - struct route iproute; - struct route *ro = NULL; - struct sockaddr_in *dst; + struct sockaddr_in dst; struct ip *ip; struct ifnet *ifp = NULL; struct pf_addr naddr; struct pf_src_node *sn = NULL; int error = 0; int sw_csum; -#ifdef IPSEC - struct m_tag *mtag; -#endif /* IPSEC */ KASSERT(m && *m && r && oifp, ("%s: invalid parameters", __func__)); KASSERT(dir == PF_IN || dir == PF_OUT, ("%s: invalid direction", @@ -5132,78 +5127,84 @@ pf_route(struct mbuf **m, struct pf_rule if (pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; - goto bad; + goto bad_locked; } if (r->rt == PF_DUPTO) { - if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); return; + } m0 = *m; } - if (m0->m_len < sizeof(struct ip)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); - goto bad; - } - ip = mtod(m0, struct ip *); - ro = &iproute; - bzero((caddr_t)ro, sizeof(*ro)); - dst = satosin(&ro->ro_dst); - dst->sin_family = AF_INET; - dst->sin_len = sizeof(*dst); - dst->sin_addr = ip->ip_dst; + bzero(&dst, sizeof(dst)); + dst.sin_family = AF_INET; + dst.sin_len = sizeof(dst); + dst.sin_addr = ip->ip_dst; if (r->rt == PF_FASTROUTE) { - in_rtalloc_ign(ro, 0, M_GETFIB(m0)); - if (ro->ro_rt == 0) { + struct rtentry *rt; + + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); + rt = rtalloc1_fib(sintosa(&dst), 0, 0, M_GETFIB(m0)); + if (rt == NULL) { + RTFREE_LOCKED(rt); KMOD_IPSTAT_INC(ips_noroute); + error = EHOSTUNREACH; goto bad; } - ifp = ro->ro_rt->rt_ifp; - ro->ro_rt->rt_use++; + ifp = rt->rt_ifp; + rt->rt_rmx.rmx_pksent++; - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - dst = satosin(ro->ro_rt->rt_gateway); + if (rt->rt_flags & RTF_GATEWAY) + bcopy(satosin(rt->rt_gateway), &dst, sizeof(dst)); + RTFREE_LOCKED(rt); } else { if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); - goto bad; + goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET, r, (struct pf_addr *)&ip->ip_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET)) - dst->sin_addr.s_addr = naddr.v4.s_addr; + dst.sin_addr.s_addr = naddr.v4.s_addr; ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; + PF_UNLOCK(); } else { if (!PF_AZERO(&s->rt_addr, AF_INET)) - dst->sin_addr.s_addr = + dst.sin_addr.s_addr = s->rt_addr.v4.s_addr; ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; + PF_STATE_UNLOCK(s); + PF_UNLOCK(); } } if (ifp == NULL) goto bad; if (oifp != ifp) { - PF_UNLOCK(); - if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) { - PF_LOCK(); + if (pf_test(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; - } else if (m0 == NULL) { - PF_LOCK(); + else if (m0 == NULL) goto done; - } - PF_LOCK(); if (m0->m_len < sizeof(struct ip)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: m0->m_len < sizeof(struct ip)\n", __func__)); @@ -5212,82 +5213,67 @@ pf_route(struct mbuf **m, struct pf_rule ip = mtod(m0, struct ip *); } - /* Copied from FreeBSD 5.1-CURRENT ip_output. */ + if (ifp->if_flags & IFF_LOOPBACK) + m0->m_flags |= M_SKIP_FIREWALL; + + /* Back to host byte order. */ + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + + /* Copied from FreeBSD 10.0-CURRENT ip_output. */ m0->m_pkthdr.csum_flags |= CSUM_IP; sw_csum = m0->m_pkthdr.csum_flags & ~ifp->if_hwassist; if (sw_csum & CSUM_DELAY_DATA) { - /* - * XXX: in_delayed_cksum assumes HBO for ip->ip_len (at least) - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); /* XXX: needed? */ in_delayed_cksum(m0); - HTONS(ip->ip_len); - HTONS(ip->ip_off); sw_csum &= ~CSUM_DELAY_DATA; } +#ifdef SCTP + if (sw_csum & CSUM_SCTP) { + sctp_delayed_cksum(m, (uint32_t)(ip->ip_hl << 2)); + sw_csum &= ~CSUM_SCTP; + } +#endif m0->m_pkthdr.csum_flags &= ifp->if_hwassist; - if (ntohs(ip->ip_len) <= ifp->if_mtu || - (ifp->if_hwassist & CSUM_FRAGMENT && - ((ip->ip_off & htons(IP_DF)) == 0))) { - /* - * ip->ip_len = htons(ip->ip_len); - * ip->ip_off = htons(ip->ip_off); - */ + /* + * If small enough for interface, or the interface will take + * care of the fragmentation for us, we can just send directly. + */ + if (ip->ip_len <= ifp->if_mtu || + (m0->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || + ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; - if (sw_csum & CSUM_DELAY_IP) { - /* From KAME */ - if (ip->ip_v == IPVERSION && - (ip->ip_hl << 2) == sizeof(*ip)) { - ip->ip_sum = in_cksum_hdr(ip); - } else { - ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); - } - } - PF_UNLOCK(); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), ro); - PF_LOCK(); + if (sw_csum & CSUM_DELAY_IP) + ip->ip_sum = in_cksum(m0, ip->ip_hl << 2); + m0->m_flags &= ~(M_PROTOFLAGS); + error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); goto done; } - /* - * Too large for interface; fragment if possible. - * Must be able to put at least 8 bytes per fragment. - */ - if (ip->ip_off & htons(IP_DF)) { + /* Balk when DF bit is set or the interface didn't support TSO. */ + if ((ip->ip_off & IP_DF) || (m0->m_pkthdr.csum_flags & CSUM_TSO)) { + error = EMSGSIZE; KMOD_IPSTAT_INC(ips_cantfrag); if (r->rt != PF_DUPTO) { - /* icmp_error() expects host byte ordering */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); - PF_UNLOCK(); icmp_error(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, ifp->if_mtu); - PF_LOCK(); goto done; } else goto bad; } - m1 = m0; - /* - * XXX: is cheaper + less error prone than own function - */ - NTOHS(ip->ip_len); - NTOHS(ip->ip_off); error = ip_fragment(ip, &m0, ifp->if_mtu, ifp->if_hwassist, sw_csum); if (error) goto bad; - for (m0 = m1; m0; m0 = m1) { + for (; m0; m0 = m1) { m1 = m0->m_nextpkt; - m0->m_nextpkt = 0; + m0->m_nextpkt = NULL; if (error == 0) { - PF_UNLOCK(); - error = (*ifp->if_output)(ifp, m0, sintosa(dst), - NULL); - PF_LOCK(); + m0->m_flags &= ~(M_PROTOFLAGS); + error = (*ifp->if_output)(ifp, m0, sintosa(&dst), NULL); } else m_freem(m0); } @@ -5298,10 +5284,12 @@ pf_route(struct mbuf **m, struct pf_rule done: if (r->rt != PF_DUPTO) *m = NULL; - if (ro == &iproute && ro->ro_rt) - RTFREE(ro->ro_rt); return; +bad_locked: + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); bad: m_freem(m0); goto done; @@ -5314,9 +5302,7 @@ pf_route6(struct mbuf **m, struct pf_rul struct pf_state *s, struct pf_pdesc *pd) { struct mbuf *m0; - struct route_in6 ip6route; - struct route_in6 *ro; - struct sockaddr_in6 *dst; + struct sockaddr_in6 dst; struct ip6_hdr *ip6; struct ifnet *ifp = NULL; struct pf_addr naddr; @@ -5329,36 +5315,39 @@ pf_route6(struct mbuf **m, struct pf_rul if (pd->pf_mtag->routed++ > 3) { m0 = *m; *m = NULL; - goto bad; + goto bad_locked; } if (r->rt == PF_DUPTO) { - if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) + if ((m0 = m_dup(*m, M_NOWAIT)) == NULL) { + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); return; + } } else { - if ((r->rt == PF_REPLYTO) == (r->direction == dir)) + if ((r->rt == PF_REPLYTO) == (r->direction == dir)) { + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); return; + } m0 = *m; } - if (m0->m_len < sizeof(struct ip6_hdr)) { - DPFPRINTF(PF_DEBUG_URGENT, - ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", __func__)); - goto bad; - } ip6 = mtod(m0, struct ip6_hdr *); - ro = &ip6route; - bzero((caddr_t)ro, sizeof(*ro)); - dst = (struct sockaddr_in6 *)&ro->ro_dst; - dst->sin6_family = AF_INET6; - dst->sin6_len = sizeof(*dst); - dst->sin6_addr = ip6->ip6_dst; + bzero(&dst, sizeof(dst)); + dst.sin6_family = AF_INET6; + dst.sin6_len = sizeof(dst); + dst.sin6_addr = ip6->ip6_dst; /* Cheat. XXX why only in the v6 case??? */ if (r->rt == PF_FASTROUTE) { - m0->m_flags |= M_SKIP_FIREWALL; + if (s) + PF_STATE_UNLOCK(s); PF_UNLOCK(); + m0->m_flags |= M_SKIP_FIREWALL; ip6_output(m0, NULL, NULL, 0, NULL, NULL, NULL); return; } @@ -5366,34 +5355,34 @@ pf_route6(struct mbuf **m, struct pf_rul if (TAILQ_EMPTY(&r->rpool.list)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: TAILQ_EMPTY(&r->rpool.list)\n", __func__)); - goto bad; + goto bad_locked; } if (s == NULL) { pf_map_addr(AF_INET6, r, (struct pf_addr *)&ip6->ip6_src, &naddr, NULL, &sn); if (!PF_AZERO(&naddr, AF_INET6)) - PF_ACPY((struct pf_addr *)&dst->sin6_addr, + PF_ACPY((struct pf_addr *)&dst.sin6_addr, &naddr, AF_INET6); ifp = r->rpool.cur->kif ? r->rpool.cur->kif->pfik_ifp : NULL; } else { if (!PF_AZERO(&s->rt_addr, AF_INET6)) - PF_ACPY((struct pf_addr *)&dst->sin6_addr, + PF_ACPY((struct pf_addr *)&dst.sin6_addr, &s->rt_addr, AF_INET6); ifp = s->rt_kif ? s->rt_kif->pfik_ifp : NULL; } + + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); + if (ifp == NULL) goto bad; if (oifp != ifp) { - PF_UNLOCK(); - if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) { - PF_LOCK(); + if (pf_test6(PF_OUT, ifp, &m0, NULL) != PF_PASS) goto bad; - } else if (m0 == NULL) { - PF_LOCK(); + else if (m0 == NULL) goto done; - } - PF_LOCK(); if (m0->m_len < sizeof(struct ip6_hdr)) { DPFPRINTF(PF_DEBUG_URGENT, ("%s: m0->m_len < sizeof(struct ip6_hdr)\n", @@ -5403,23 +5392,22 @@ pf_route6(struct mbuf **m, struct pf_rul ip6 = mtod(m0, struct ip6_hdr *); } + if (ifp->if_flags & IFF_LOOPBACK) + m0->m_flags |= M_SKIP_FIREWALL; + /* * If the packet is too large for the outgoing interface, * send back an icmp6 error. */ - if (IN6_IS_SCOPE_EMBED(&dst->sin6_addr)) - dst->sin6_addr.s6_addr16[1] = htons(ifp->if_index); - if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) { - PF_UNLOCK(); - nd6_output(ifp, ifp, m0, dst, NULL); - PF_LOCK(); - } else { + if (IN6_IS_SCOPE_EMBED(&dst.sin6_addr)) + dst.sin6_addr.s6_addr16[1] = htons(ifp->if_index); + if ((u_long)m0->m_pkthdr.len <= ifp->if_mtu) + nd6_output(ifp, ifp, m0, &dst, NULL); + else { in6_ifstat_inc(ifp, ifs6_in_toobig); - if (r->rt != PF_DUPTO) { - PF_UNLOCK(); + if (r->rt != PF_DUPTO) icmp6_error(m0, ICMP6_PACKET_TOO_BIG, 0, ifp->if_mtu); - PF_LOCK(); - } else + else goto bad; } @@ -5428,6 +5416,10 @@ done: *m = NULL; return; +bad_locked: + if (s) + PF_STATE_UNLOCK(s); + PF_UNLOCK(); bad: m_freem(m0); goto done; @@ -5916,9 +5908,11 @@ done: action = PF_PASS; break; default: - /* pf_route can free the mbuf causing *m0 to become NULL */ - if (r->rt) + /* pf_route() returns unlocked. */ + if (r->rt) { pf_route(m0, r, dir, kif->pfik_ifp, s, &pd); + return (action); + } break; } if (s) @@ -6297,9 +6291,11 @@ done: action = PF_PASS; break; default: - /* pf_route6 can free the mbuf causing *m0 to become NULL */ - if (r->rt) + /* pf_route6() returns unlocked. */ + if (r->rt) { pf_route6(m0, r, dir, kif->pfik_ifp, s, &pd); + return (action); + } break; }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204171426.q3HEQu0I085966>