Date: Fri, 5 Oct 2012 15:47:16 +0400 From: Gleb Smirnoff <glebius@FreeBSD.org> To: net@FreeBSD.org Subject: [PATCH] resolve byte order mess in ip_input/ip_output/pfil(9) Message-ID: <20121005114716.GP34622@FreeBSD.org>
next in thread | raw e-mail | index | archive | help
--r5Pyd7+fXNt84Ff3 Content-Type: text/plain; charset=koi8-r Content-Disposition: inline Hello, once the pfil(9) API was introduced in FreeBSD, our main packet filter, the ipfw(4) worked in host byte order, that's why the pfil(9) API was violated: the AF_INET hooks were entered with packet in host byte order. If you look into pfil(9) manpage you'll see that it still declares opposite :) Today, pf(4) and ipfw(4) both are working with net byte order. But pfil(9) still supplies packet to them in host byte order, contrary to what API manual says. Right now, we have tons of places where byte order is swapped there and back to resolve this mess: - when entering pf - when entering ipfw - when calling pfil hooks from enc(4) - when calling pfil hooks from if_bridge(4) - in ip_fastfwd.c Also, we've got ip_fragment() that accepts packet in host byte order and emits new ones in net byte order. Moreover, when we put packets into the NETISR_IP queue, we put them in different byte order: those that have M_FASTFWD_OURS flag are in host byte order, while all others are in net. Attached patch does the following: - all packets in NETISR_IP queue are in net byte order - ip_input() is entered in net byte order and converts packet to host byte order right _after_ processing pfil(9) hooks - ip_output() is entered in host byte order and converts packet to net byte order right _before_ processing pfil(9) hooks - ip_fragment() accepts and emits packet in net byte order - ip_forward(), ip_mloopback() use host byte order (untouched actually) - ip_fastforward() no longer modifies packet at all (except ip_ttl) - swapping of byte order there and back removed from the following modules: pf(4), ipfw(4), enc(4), if_bridge(4) - swapping of byte order added to ipfilter(4), based on __FreeBSD_version - __FreeBSD_version bumped - manual page updated -- Totus tuus, Glebius. --r5Pyd7+fXNt84Ff3 Content-Type: text/x-diff; charset=koi8-r Content-Disposition: attachment; filename="net_byte_order.diff" Index: netinet/ip_output.c =================================================================== --- netinet/ip_output.c (revision 241220) +++ netinet/ip_output.c (working copy) @@ -125,7 +125,8 @@ int error = 0; struct sockaddr_in *dst; struct in_ifaddr *ia; - int isbroadcast, sw_csum; + int isbroadcast; + uint16_t ip_len, ip_off, sw_csum; struct route iproute; struct rtentry *rte; /* cache for ro->ro_rt */ struct in_addr odst; @@ -501,6 +502,12 @@ hlen = ip->ip_hl << 2; #endif /* IPSEC */ + /* + * To network byte order. pfil(9) hooks and ip_fragment() expect this. + */ + ip->ip_len = htons(ip->ip_len); + ip->ip_off = htons(ip->ip_off); + /* Jump over all PFIL processing if hooks are not active. */ if (!PFIL_HOOKED(&V_inet_pfil_hook)) goto passout; @@ -537,6 +544,8 @@ } else { if (ia != NULL) ifa_free(&ia->ia_ifa); + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); goto again; /* Redo the routing table lookup. */ } } @@ -570,11 +579,16 @@ m_tag_delete(m, fwd_tag); if (ia != NULL) ifa_free(&ia->ia_ifa); + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); goto again; } #endif /* IPFIREWALL_FORWARD */ passout: + ip_len = ntohs(ip->ip_len); + ip_off = ntohs(ip->ip_off); + /* 127/8 must not appear on wire - RFC1122. */ if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { @@ -603,11 +617,9 @@ * If small enough for interface, or the interface will take * care of the fragmentation for us, we can just send directly. */ - if (ip->ip_len <= mtu || + if (ip_len <= mtu || (m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 || - ((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); + ((ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) { ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m, hlen); @@ -641,7 +653,7 @@ } /* Balk when DF bit is set or the interface didn't support TSO. */ - if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { + if ((ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) { error = EMSGSIZE; IPSTAT_INC(ips_cantfrag); goto bad; @@ -710,8 +722,12 @@ int firstlen; struct mbuf **mnext; int nfrags; + uint16_t ip_len, ip_off; - if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */ + ip_len = ntohs(ip->ip_len); + ip_off = ntohs(ip->ip_off); + + if (ip_off & IP_DF) { /* Fragmentation not allowed */ IPSTAT_INC(ips_cantfrag); return EMSGSIZE; } @@ -785,7 +801,7 @@ * The fragments are linked off the m_nextpkt of the original * packet, which after processing serves as the first fragment. */ - for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) { + for (nfrags = 1; off < ip_len; off += len, nfrags++) { struct ip *mhip; /* ip header on the fragment */ struct mbuf *m; int mhlen = sizeof (struct ip); @@ -811,10 +827,10 @@ mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; - /* XXX do we need to add ip->ip_off below ? */ - mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off; - if (off + len >= ip->ip_len) { /* last fragment */ - len = ip->ip_len - off; + /* XXX do we need to add ip_off below ? */ + mhip->ip_off = ((off - hlen) >> 3) + ip_off; + if (off + len >= ip_len) { /* last fragment */ + len = ip_len - off; m->m_flags |= M_LASTFRAG; } else mhip->ip_off |= IP_MF; @@ -849,11 +865,10 @@ * Update first fragment by trimming what's been copied out * and updating header. */ - m_adj(m0, hlen + firstlen - ip->ip_len); + m_adj(m0, hlen + firstlen - ip_len); m0->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m0->m_pkthdr.len); - ip->ip_off |= IP_MF; - ip->ip_off = htons(ip->ip_off); + ip->ip_off = htons(ip_off | IP_MF); ip->ip_sum = 0; if (sw_csum & CSUM_DELAY_IP) ip->ip_sum = in_cksum(m0, hlen); @@ -1279,6 +1294,8 @@ * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be a loopback interface -- evil, but easier than * replicating that code here. + * + * IP header in host byte order. */ static void ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst, Index: netinet/ip_input.c =================================================================== --- netinet/ip_input.c (revision 241220) +++ netinet/ip_input.c (working copy) @@ -380,20 +380,18 @@ struct ifaddr *ifa; struct ifnet *ifp; int checkif, hlen = 0; - u_short sum; + uint16_t sum, ip_len; int dchg = 0; /* dest changed after fw */ struct in_addr odst; /* original dst address */ M_ASSERTPKTHDR(m); if (m->m_flags & M_FASTFWD_OURS) { - /* - * Firewall or NAT changed destination to local. - * We expect ip_len and ip_off to be in host byte order. - */ m->m_flags &= ~M_FASTFWD_OURS; /* Set up some basics that will be used later. */ ip = mtod(m, struct ip *); + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); hlen = ip->ip_hl << 2; goto ours; } @@ -458,15 +456,11 @@ return; #endif - /* - * Convert fields to host representation. - */ - ip->ip_len = ntohs(ip->ip_len); - if (ip->ip_len < hlen) { + ip_len = ntohs(ip->ip_len); + if (ip_len < hlen) { IPSTAT_INC(ips_badlen); goto bad; } - ip->ip_off = ntohs(ip->ip_off); /* * Check that the amount of data in the buffers @@ -474,17 +468,17 @@ * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ - if (m->m_pkthdr.len < ip->ip_len) { + if (m->m_pkthdr.len < ip_len) { tooshort: IPSTAT_INC(ips_tooshort); goto bad; } - if (m->m_pkthdr.len > ip->ip_len) { + if (m->m_pkthdr.len > ip_len) { if (m->m_len == m->m_pkthdr.len) { - m->m_len = ip->ip_len; - m->m_pkthdr.len = ip->ip_len; + m->m_len = ip_len; + m->m_pkthdr.len = ip_len; } else - m_adj(m, ip->ip_len - m->m_pkthdr.len); + m_adj(m, ip_len - m->m_pkthdr.len); } #ifdef IPSEC /* @@ -519,6 +513,8 @@ #ifdef IPFIREWALL_FORWARD if (m->m_flags & M_FASTFWD_OURS) { m->m_flags &= ~M_FASTFWD_OURS; + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); goto ours; } if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) { @@ -527,6 +523,8 @@ * packets originally destined to us to some other directly * connected host. */ + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); ip_forward(m, dchg); return; } @@ -534,6 +532,13 @@ passin: /* + * From now and up to output pfil(9) processing in ip_output() + * the header is in host byte order. + */ + ip->ip_len = ntohs(ip->ip_len); + ip->ip_off = ntohs(ip->ip_off); + + /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message @@ -1360,6 +1365,8 @@ * * The srcrt parameter indicates whether the packet is being forwarded * via a source route. + * + * IP header in host byte order. */ void ip_forward(struct mbuf *m, int srcrt) Index: netinet/ip_fastfwd.c =================================================================== --- netinet/ip_fastfwd.c (revision 241220) +++ netinet/ip_fastfwd.c (working copy) @@ -164,7 +164,7 @@ struct sockaddr_in *dst = NULL; struct ifnet *ifp; struct in_addr odest, dest; - u_short sum, ip_len; + uint16_t sum, ip_len, ip_off; int error = 0; int hlen, mtu; #ifdef IPFIREWALL_FORWARD @@ -340,12 +340,6 @@ * Step 3: incoming packet firewall processing */ - /* - * Convert to host representation - */ - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - odest.s_addr = dest.s_addr = ip->ip_dst.s_addr; /* @@ -472,8 +466,6 @@ forwardlocal: /* * Return packet for processing by ip_input(). - * Keep host byte order as expected at ip_input's - * "ours"-label. */ m->m_flags |= M_FASTFWD_OURS; if (ro.ro_rt) @@ -500,6 +492,8 @@ /* * Step 6: send off the packet */ + ip_len = ntohs(ip->ip_len); + ip_off = ntohs(ip->ip_off); /* * Check if route is dampned (when ARP is unable to resolve) @@ -515,7 +509,7 @@ /* * Check if there is enough space in the interface queue */ - if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= + if ((ifp->if_snd.ifq_len + ip_len / ifp->if_mtu + 1) >= ifp->if_snd.ifq_maxlen) { IPSTAT_INC(ips_odropped); /* would send source quench here but that is depreciated */ @@ -539,14 +533,9 @@ else mtu = ifp->if_mtu; - if (ip->ip_len <= mtu || - (ifp->if_hwassist & CSUM_FRAGMENT && (ip->ip_off & IP_DF) == 0)) { + if (ip_len <= mtu || + (ifp->if_hwassist & CSUM_FRAGMENT && (ip_off & IP_DF) == 0)) { /* - * Restore packet header fields to original values - */ - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); - /* * Send off the packet via outgoing interface */ error = (*ifp->if_output)(ifp, m, @@ -555,7 +544,7 @@ /* * Handle EMSGSIZE with icmp reply needfrag for TCP MTU discovery */ - if (ip->ip_off & IP_DF) { + if (ip_off & IP_DF) { IPSTAT_INC(ips_cantfrag); icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 0, mtu); @@ -565,10 +554,6 @@ * We have to fragment the packet */ m->m_pkthdr.csum_flags |= CSUM_IP; - /* - * ip_fragment expects ip_len and ip_off in host byte - * order but returns all packets in network byte order - */ if (ip_fragment(ip, &m, mtu, ifp->if_hwassist, (~ifp->if_hwassist & CSUM_DELAY_IP))) { goto drop; Index: contrib/ipfilter/netinet/fil.c =================================================================== --- contrib/ipfilter/netinet/fil.c (revision 241220) +++ contrib/ipfilter/netinet/fil.c (working copy) @@ -2513,7 +2513,7 @@ } else #endif { -#if (defined(OpenBSD) && (OpenBSD >= 200311)) && defined(_KERNEL) +#if ((defined(OpenBSD) && (OpenBSD >= 200311)) || (defined(FreeBSD) && (__FreeBSD_version >= 1000019))) && defined(_KERNEL) ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); #endif @@ -2777,7 +2777,7 @@ RWLOCK_EXIT(&ipf_global); #ifdef _KERNEL -# if (defined(OpenBSD) && (OpenBSD >= 200311)) +# if (defined(OpenBSD) && (OpenBSD >= 200311)) || (defined(FreeBSD) && (__FreeBSD_version >= 1000019)) if (FR_ISPASS(pass) && (v == 4)) { ip = fin->fin_ip; ip->ip_len = ntohs(ip->ip_len); Index: netpfil/pf/pf_ioctl.c =================================================================== --- netpfil/pf/pf_ioctl.c (revision 241220) +++ netpfil/pf/pf_ioctl.c (working copy) @@ -3473,52 +3473,22 @@ pf_check_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp) { - /* - * XXX Wed Jul 9 22:03:16 2003 UTC - * OpenBSD has changed its byte ordering convention on ip_len/ip_off - * in network stack. OpenBSD's network stack have converted - * ip_len/ip_off to host byte order frist as FreeBSD. - * Now this is not true anymore , so we should convert back to network - * byte order. - */ - struct ip *h = NULL; int chk; - if ((*m)->m_pkthdr.len >= (int)sizeof(struct ip)) { - /* if m_pkthdr.len is less than ip header, pf will handle. */ - h = mtod(*m, struct ip *); - HTONS(h->ip_len); - HTONS(h->ip_off); - } - CURVNET_SET(ifp->if_vnet); chk = pf_test(PF_IN, ifp, m, inp); - CURVNET_RESTORE(); + if (chk && *m) { m_freem(*m); *m = NULL; } - if (*m != NULL) { - /* pf_test can change ip header location */ - h = mtod(*m, struct ip *); - NTOHS(h->ip_len); - NTOHS(h->ip_off); - } - return chk; + + return (chk); } static int pf_check_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp) { - /* - * XXX Wed Jul 9 22:03:16 2003 UTC - * OpenBSD has changed its byte ordering convention on ip_len/ip_off - * in network stack. OpenBSD's network stack have converted - * ip_len/ip_off to host byte order frist as FreeBSD. - * Now this is not true anymore , so we should convert back to network - * byte order. - */ - struct ip *h = NULL; int chk; /* We need a proper CSUM befor we start (s. OpenBSD ip_output) */ @@ -3526,26 +3496,14 @@ in_delayed_cksum(*m); (*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA; } - if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) { - /* if m_pkthdr.len is less than ip header, pf will handle. */ - h = mtod(*m, struct ip *); - HTONS(h->ip_len); - HTONS(h->ip_off); - } - CURVNET_SET(ifp->if_vnet); + chk = pf_test(PF_OUT, ifp, m, inp); - CURVNET_RESTORE(); if (chk && *m) { m_freem(*m); *m = NULL; } - if (*m != NULL) { - /* pf_test can change ip header location */ - h = mtod(*m, struct ip *); - NTOHS(h->ip_len); - NTOHS(h->ip_off); - } - return chk; + + return (chk); } #endif @@ -3554,10 +3512,6 @@ pf_check6_in(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp) { - - /* - * IPv6 is not affected by ip_len/ip_off byte order changes. - */ int chk; /* @@ -3579,9 +3533,6 @@ pf_check6_out(void *arg, struct mbuf **m, struct ifnet *ifp, int dir, struct inpcb *inp) { - /* - * IPv6 does not affected ip_len/ip_off byte order changes. - */ int chk; /* We need a proper CSUM before we start (s. OpenBSD ip_output) */ Index: netpfil/ipfw/ip_fw_pfil.c =================================================================== --- netpfil/ipfw/ip_fw_pfil.c (revision 241220) +++ netpfil/ipfw/ip_fw_pfil.c (working copy) @@ -125,10 +125,6 @@ int ipfw; int ret; - /* all the processing now uses ip_len in net format */ - if (mtod(*m0, struct ip *)->ip_v == 4) - SET_NET_IPLEN(mtod(*m0, struct ip *)); - /* convert dir to IPFW values */ dir = (dir == PFIL_IN) ? DIR_IN : DIR_OUT; bzero(&args, sizeof(args)); @@ -288,8 +284,7 @@ FREE_PKT(*m0); *m0 = NULL; } - if (*m0 && mtod(*m0, struct ip *)->ip_v == 4) - SET_HOST_IPLEN(mtod(*m0, struct ip *)); + return ret; } Index: sys/param.h =================================================================== --- sys/param.h (revision 241220) +++ sys/param.h (working copy) @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1000018 /* Master, propagated to newvers */ +#define __FreeBSD_version 1000019 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, Index: net/if_bridge.c =================================================================== --- net/if_bridge.c (revision 241220) +++ net/if_bridge.c (working copy) @@ -3093,15 +3093,6 @@ switch (ether_type) { case ETHERTYPE_IP: /* - * before calling the firewall, swap fields the same as - * IP does. here we assume the header is contiguous - */ - ip = mtod(*mp, struct ip *); - - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - - /* * Run pfil on the member interface and the bridge, both can * be skipped by clearing pfil_member or pfil_bridge. * @@ -3139,7 +3130,7 @@ } } - /* Recalculate the ip checksum and restore byte ordering */ + /* Recalculate the ip checksum. */ ip = mtod(*mp, struct ip *); hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) @@ -3151,8 +3142,6 @@ if (ip == NULL) goto bad; } - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); ip->ip_sum = 0; if (hlen == sizeof(struct ip)) ip->ip_sum = in_cksum_hdr(ip); Index: net/if_enc.c =================================================================== --- net/if_enc.c (revision 241220) +++ net/if_enc.c (working copy) @@ -270,23 +270,8 @@ switch (ip->ip_v) { #ifdef INET case 4: - /* - * before calling the firewall, swap fields the same as - * IP does. here we assume the header is contiguous - */ - ip->ip_len = ntohs(ip->ip_len); - ip->ip_off = ntohs(ip->ip_off); - error = pfil_run_hooks(&V_inet_pfil_hook, mp, encif, dir, NULL); - - if (*mp == NULL || error != 0) - break; - - /* restore byte ordering */ - ip = mtod(*mp, struct ip *); - ip->ip_len = htons(ip->ip_len); - ip->ip_off = htons(ip->ip_off); break; #endif #ifdef INET6 --r5Pyd7+fXNt84Ff3--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20121005114716.GP34622>