Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 25 Dec 2015 15:12:12 +0000 (UTC)
From:      Kristof Provost <kp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r292731 - stable/9/sys/contrib/pf/net
Message-ID:  <201512251512.tBPFCCfv027548@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kp
Date: Fri Dec 25 15:12:11 2015
New Revision: 292731
URL: https://svnweb.freebsd.org/changeset/base/292731

Log:
  pf: Fix TSO issues
  
  In certain configurations (mostly but not exclusively as a VM on Xen) pf
  produced packets with an invalid TCP checksum.
  
  The problem was that pf could only handle packets with a full checksum. The
  FreeBSD IP stack produces TCP packets with a pseudo-header checksum (only
  addresses, length and protocol).
  Certain network interfaces expect to see the pseudo-header checksum, so they
  end up producing packets with invalid checksums.
  
  To fix this stop calculating the full checksum and teach pf to only update TCP
  checksums if TSO is disabled or the change affects the pseudo-header checksum.
  
  PR:             154428, 193579, 198868
  Sponsored by:   RootBSD

Modified:
  stable/9/sys/contrib/pf/net/pf.c
  stable/9/sys/contrib/pf/net/pf_ioctl.c
  stable/9/sys/contrib/pf/net/pf_norm.c
  stable/9/sys/contrib/pf/net/pfvar.h

Modified: stable/9/sys/contrib/pf/net/pf.c
==============================================================================
--- stable/9/sys/contrib/pf/net/pf.c	Fri Dec 25 14:51:36 2015	(r292730)
+++ stable/9/sys/contrib/pf/net/pf.c	Fri Dec 25 15:12:11 2015	(r292731)
@@ -239,7 +239,7 @@ void			 pf_init_threshold(struct pf_thre
 void			 pf_add_threshold(struct pf_threshold *);
 int			 pf_check_threshold(struct pf_threshold *);
 
-void			 pf_change_ap(struct pf_addr *, u_int16_t *,
+void			 pf_change_ap(struct mbuf *, struct pf_addr *, u_int16_t *,
 			    u_int16_t *, u_int16_t *, struct pf_addr *,
 			    u_int16_t, u_int8_t, sa_family_t);
 int			 pf_modulate_sack(struct mbuf *, int, struct pf_pdesc *,
@@ -2007,6 +2007,22 @@ pf_addr_wrap_neq(struct pf_addr_wrap *aw
 	}
 }
 
+/**
+ * Checksum updates are a little complicated because the checksum in the TCP/UDP
+ * header isn't always a full checksum. In some cases (i.e. output) it's a
+ * pseudo-header checksum, which is a partial checksum over src/dst IP
+ * addresses, protocol number and length.
+ *
+ * That means we have the following cases:
+ *  * Input or forwarding: we don't have TSO, the checksum fields are full
+ *  	checksums, we need to update the checksum whenever we change anything.
+ *  * Output (i.e. the checksum is a pseudo-header checksum):
+ *  	x The field being updated is src/dst address or affects the length of
+ *  	the packet. We need to update the pseudo-header checksum (note that this
+ *  	checksum is not ones' complement).
+ *  	x Some other field is being modified (e.g. src/dst port numbers): We
+ *  	don't have to update anything.
+ **/
 u_int16_t
 pf_cksum_fixup(u_int16_t cksum, u_int16_t old, u_int16_t new, u_int8_t udp)
 {
@@ -2022,9 +2038,20 @@ pf_cksum_fixup(u_int16_t cksum, u_int16_
 	return (l);
 }
 
+u_int16_t
+pf_proto_cksum_fixup(struct mbuf *m, u_int16_t cksum, u_int16_t old,
+        u_int16_t new, u_int8_t udp)
+{
+	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+		return (cksum);
+
+	return (pf_cksum_fixup(cksum, old, new, udp));
+}
+
 void
-pf_change_ap(struct pf_addr *a, u_int16_t *p, u_int16_t *ic, u_int16_t *pc,
-    struct pf_addr *an, u_int16_t pn, u_int8_t u, sa_family_t af)
+pf_change_ap(struct mbuf *m, struct pf_addr *a, u_int16_t *p, u_int16_t *ic,
+        u_int16_t *pc, struct pf_addr *an, u_int16_t pn, u_int8_t u,
+        sa_family_t af)
 {
 	struct pf_addr	ao;
 	u_int16_t	po = *p;
@@ -2032,6 +2059,9 @@ pf_change_ap(struct pf_addr *a, u_int16_
 	PF_ACPY(&ao, a, af);
 	PF_ACPY(a, an, af);
 
+	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | CSUM_DELAY_DATA_IPV6))
+		*pc = ~*pc;
+
 	*p = pn;
 
 	switch (af) {
@@ -2041,17 +2071,19 @@ pf_change_ap(struct pf_addr *a, u_int16_
 		    ao.addr16[0], an->addr16[0], 0),
 		    ao.addr16[1], an->addr16[1], 0);
 		*p = pn;
-		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+
+		*pc = pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
-		    ao.addr16[1], an->addr16[1], u),
-		    po, pn, u);
+		    ao.addr16[1], an->addr16[1], u);
+
+		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
 		break;
 #endif /* INET */
 #ifdef INET6
 	case AF_INET6:
 		*pc = pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
 		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(
-		    pf_cksum_fixup(pf_cksum_fixup(pf_cksum_fixup(*pc,
+		    pf_cksum_fixup(pf_cksum_fixup(*pc,
 		    ao.addr16[0], an->addr16[0], u),
 		    ao.addr16[1], an->addr16[1], u),
 		    ao.addr16[2], an->addr16[2], u),
@@ -2059,13 +2091,20 @@ pf_change_ap(struct pf_addr *a, u_int16_
 		    ao.addr16[4], an->addr16[4], u),
 		    ao.addr16[5], an->addr16[5], u),
 		    ao.addr16[6], an->addr16[6], u),
-		    ao.addr16[7], an->addr16[7], u),
-		    po, pn, u);
+		    ao.addr16[7], an->addr16[7], u);
+
+		*pc = pf_proto_cksum_fixup(m, *pc, po, pn, u);
 		break;
 #endif /* INET6 */
 	}
-}
 
+	if (m->m_pkthdr.csum_flags & (CSUM_DELAY_DATA | 
+	    CSUM_DELAY_DATA_IPV6)) {
+		*pc = ~*pc;
+		if (! *pc)
+			*pc = 0xffff;
+	}
+}
 
 /* Changes a u_int32_t.  Uses a void * so there are no align restrictions */
 void
@@ -2079,6 +2118,19 @@ pf_change_a(void *a, u_int16_t *c, u_int
 	    ao % 65536, an % 65536, u);
 }
 
+void
+pf_change_proto_a(struct mbuf *m, void *a, u_int16_t *c, u_int32_t an, u_int8_t udp)
+{
+	u_int32_t	ao;
+
+	memcpy(&ao, a, sizeof(ao));
+	memcpy(a, &an, sizeof(u_int32_t));
+
+	*c = pf_proto_cksum_fixup(m,
+	    pf_proto_cksum_fixup(m, *c, ao / 65536, an / 65536, udp),
+	    ao % 65536, an % 65536, udp);
+}
+
 #ifdef INET6
 void
 pf_change_a6(struct pf_addr *a, u_int16_t *c, struct pf_addr *an, u_int8_t u)
@@ -2228,12 +2280,10 @@ pf_modulate_sack(struct mbuf *m, int off
 				for (i = 2; i + TCPOLEN_SACK <= olen;
 				    i += TCPOLEN_SACK) {
 					memcpy(&sack, &opt[i], sizeof(sack));
-					pf_change_a(&sack.start, &th->th_sum,
-					    htonl(ntohl(sack.start) -
-					    dst->seqdiff), 0);
-					pf_change_a(&sack.end, &th->th_sum,
-					    htonl(ntohl(sack.end) -
-					    dst->seqdiff), 0);
+					pf_change_proto_a(m, &sack.start, &th->th_sum,
+					    htonl(ntohl(sack.start) - dst->seqdiff), 0);
+					pf_change_proto_a(m, &sack.end, &th->th_sum,
+					    htonl(ntohl(sack.end) - dst->seqdiff), 0);
 					memcpy(&opt[i], &sack, sizeof(sack));
 				}
 				copyback = 1;
@@ -3400,7 +3450,7 @@ pf_test_rule(struct pf_rule **rm, struct
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
-				pf_change_ap(saddr, &th->th_sport, pd->ip_sum,
+				pf_change_ap(m, saddr, &th->th_sport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 0, af);
 				pd->sport = &th->th_sport;
@@ -3409,7 +3459,7 @@ pf_test_rule(struct pf_rule **rm, struct
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
-				pf_change_ap(daddr, &th->th_dport, pd->ip_sum,
+				pf_change_ap(m, daddr, &th->th_dport, pd->ip_sum,
 				    &th->th_sum, &nk->addr[pd->didx],
 				    nk->port[pd->didx], 0, af);
 				dport = th->th_dport;
@@ -3423,7 +3473,7 @@ pf_test_rule(struct pf_rule **rm, struct
 
 			if (PF_ANEQ(saddr, &nk->addr[pd->sidx], af) ||
 			    nk->port[pd->sidx] != sport) {
-				pf_change_ap(saddr, &pd->hdr.udp->uh_sport,
+				pf_change_ap(m, saddr, &pd->hdr.udp->uh_sport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->sidx],
 				    nk->port[pd->sidx], 1, af);
@@ -3433,7 +3483,7 @@ pf_test_rule(struct pf_rule **rm, struct
 
 			if (PF_ANEQ(daddr, &nk->addr[pd->didx], af) ||
 			    nk->port[pd->didx] != dport) {
-				pf_change_ap(daddr, &pd->hdr.udp->uh_dport,
+				pf_change_ap(m, daddr, &pd->hdr.udp->uh_dport,
 				    pd->ip_sum, &pd->hdr.udp->uh_sum,
 				    &nk->addr[pd->didx],
 				    nk->port[pd->didx], 1, af);
@@ -3845,7 +3895,7 @@ pf_create_state(struct pf_rule *r, struc
 			if ((s->src.seqdiff = pf_tcp_iss(pd) - s->src.seqlo) ==
 			    0)
 				s->src.seqdiff = 1;
-			pf_change_a(&th->th_seq, &th->th_sum,
+			pf_change_proto_a(m, &th->th_seq, &th->th_sum,
 			    htonl(s->src.seqlo + s->src.seqdiff), 0);
 			*rewrite = 1;
 		} else
@@ -4175,9 +4225,9 @@ pf_tcp_track_full(struct pf_state_peer *
 			while ((src->seqdiff = arc4random() - seq) == 0)
 				;
 			ack = ntohl(th->th_ack) - dst->seqdiff;
-			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
-			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		} else {
 			ack = ntohl(th->th_ack);
@@ -4227,9 +4277,9 @@ pf_tcp_track_full(struct pf_state_peer *
 		ack = ntohl(th->th_ack) - dst->seqdiff;
 		if (src->seqdiff) {
 			/* Modulate sequence numbers */
-			pf_change_a(&th->th_seq, &th->th_sum, htonl(seq +
+			pf_change_proto_a(m, &th->th_seq, &th->th_sum, htonl(seq +
 			    src->seqdiff), 0);
-			pf_change_a(&th->th_ack, &th->th_sum, htonl(ack), 0);
+			pf_change_proto_a(m, &th->th_ack, &th->th_sum, htonl(ack), 0);
 			*copyback = 1;
 		}
 		end = seq + pd->p_len;
@@ -4729,14 +4779,14 @@ pf_test_state_tcp(struct pf_state **stat
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != th->th_sport)
-			pf_change_ap(pd->src, &th->th_sport, pd->ip_sum,
-			    &th->th_sum, &nk->addr[pd->sidx],
+			pf_change_ap(m, pd->src, &th->th_sport,
+			    pd->ip_sum, &th->th_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 0, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != th->th_dport)
-			pf_change_ap(pd->dst, &th->th_dport, pd->ip_sum,
-			    &th->th_sum, &nk->addr[pd->didx],
+			pf_change_ap(m, pd->dst, &th->th_dport,
+			    pd->ip_sum, &th->th_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 0, pd->af);
 		copyback = 1;
 	}
@@ -4807,13 +4857,13 @@ pf_test_state_udp(struct pf_state **stat
 
 		if (PF_ANEQ(pd->src, &nk->addr[pd->sidx], pd->af) ||
 		    nk->port[pd->sidx] != uh->uh_sport)
-			pf_change_ap(pd->src, &uh->uh_sport, pd->ip_sum,
+			pf_change_ap(m, pd->src, &uh->uh_sport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->sidx],
 			    nk->port[pd->sidx], 1, pd->af);
 
 		if (PF_ANEQ(pd->dst, &nk->addr[pd->didx], pd->af) ||
 		    nk->port[pd->didx] != uh->uh_dport)
-			pf_change_ap(pd->dst, &uh->uh_dport, pd->ip_sum,
+			pf_change_ap(m, pd->dst, &uh->uh_dport, pd->ip_sum,
 			    &uh->uh_sum, &nk->addr[pd->didx],
 			    nk->port[pd->didx], 1, pd->af);
 #ifdef __FreeBSD__

Modified: stable/9/sys/contrib/pf/net/pf_ioctl.c
==============================================================================
--- stable/9/sys/contrib/pf/net/pf_ioctl.c	Fri Dec 25 14:51:36 2015	(r292730)
+++ stable/9/sys/contrib/pf/net/pf_ioctl.c	Fri Dec 25 15:12:11 2015	(r292731)
@@ -4158,11 +4158,6 @@ pf_check_out(void *arg, struct mbuf **m,
 	struct ip *h = NULL;
 	int chk;
 
-	/* We need a proper CSUM befor we start (s. OpenBSD ip_output) */
-	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-		in_delayed_cksum(*m);
-		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-	}
 	if ((*m)->m_pkthdr.len >= (int)sizeof(*h)) {
 		/* if m_pkthdr.len is less than ip header, pf will handle. */
 		h = mtod(*m, struct ip *);
@@ -4222,14 +4217,6 @@ pf_check6_out(void *arg, struct mbuf **m
 	 */
 	int chk;
 
-	/* We need a proper CSUM before we start (s. OpenBSD ip_output) */
-	if ((*m)->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
-#ifdef INET
-		/* XXX-BZ copy&paste error from r126261? */
-		in_delayed_cksum(*m);
-#endif
-		(*m)->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
-	}
 	CURVNET_SET(ifp->if_vnet);
 	chk = pf_test6(PF_OUT, ifp, m, NULL, inp);
 	CURVNET_RESTORE();

Modified: stable/9/sys/contrib/pf/net/pf_norm.c
==============================================================================
--- stable/9/sys/contrib/pf/net/pf_norm.c	Fri Dec 25 14:51:36 2015	(r292730)
+++ stable/9/sys/contrib/pf/net/pf_norm.c	Fri Dec 25 15:12:11 2015	(r292731)
@@ -1657,13 +1657,14 @@ pf_normalize_tcp(int dir, struct pfi_kif
 		th->th_x2 = 0;
 		nv = *(u_int16_t *)(&th->th_ack + 1);
 
-		th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
+		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, ov, nv, 0);
 		rewrite = 1;
 	}
 
 	/* Remove urgent pointer, if TH_URG is not set */
 	if (!(flags & TH_URG) && th->th_urp) {
-		th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
+		th->th_sum = pf_proto_cksum_fixup(m, th->th_sum, th->th_urp,
+		    0, 0);
 		th->th_urp = 0;
 		rewrite = 1;
 	}
@@ -1889,7 +1890,7 @@ pf_normalize_tcp_stateful(struct mbuf *m
 					    (src->scrub->pfss_flags &
 					    PFSS_TIMESTAMP)) {
 						tsval = ntohl(tsval);
-						pf_change_a(&opt[2],
+						pf_change_proto_a(m, &opt[2],
 						    &th->th_sum,
 						    htonl(tsval +
 						    src->scrub->pfss_ts_mod),
@@ -1905,7 +1906,7 @@ pf_normalize_tcp_stateful(struct mbuf *m
 					    PFSS_TIMESTAMP)) {
 						tsecr = ntohl(tsecr)
 						    - dst->scrub->pfss_ts_mod;
-						pf_change_a(&opt[6],
+						pf_change_proto_a(m, &opt[6],
 						    &th->th_sum, htonl(tsecr),
 						    0);
 						copyback = 1;
@@ -2286,8 +2287,8 @@ pf_normalize_tcpopt(struct pf_rule *r, s
 		case TCPOPT_MAXSEG:
 			mss = (u_int16_t *)(optp + 2);
 			if ((ntohs(*mss)) > r->max_mss) {
-				th->th_sum = pf_cksum_fixup(th->th_sum,
-				    *mss, htons(r->max_mss), 0);
+				th->th_sum = pf_proto_cksum_fixup(m,
+				    th->th_sum, *mss, htons(r->max_mss), 0);
 				*mss = htons(r->max_mss);
 				rewrite = 1;
 			}

Modified: stable/9/sys/contrib/pf/net/pfvar.h
==============================================================================
--- stable/9/sys/contrib/pf/net/pfvar.h	Fri Dec 25 14:51:36 2015	(r292730)
+++ stable/9/sys/contrib/pf/net/pfvar.h	Fri Dec 25 15:12:11 2015	(r292731)
@@ -1909,6 +1909,8 @@ extern void			 pf_print_state(struct pf_
 extern void			 pf_print_flags(u_int8_t);
 extern u_int16_t		 pf_cksum_fixup(u_int16_t, u_int16_t, u_int16_t,
 				    u_int8_t);
+extern u_int16_t		 pf_proto_cksum_fixup(struct mbuf *, u_int16_t,
+				    u_int16_t, u_int16_t, u_int8_t);
 
 #ifdef __FreeBSD__
 VNET_DECLARE(struct ifnet *,		 sync_ifp);
@@ -1954,6 +1956,9 @@ u_int32_t	pf_new_isn(struct pf_state *);
 void   *pf_pull_hdr(struct mbuf *, int, void *, int, u_short *, u_short *,
 	    sa_family_t);
 void	pf_change_a(void *, u_int16_t *, u_int32_t, u_int8_t);
+void	pf_change_proto_a(struct mbuf *, void *, u_int16_t *, u_int32_t,
+	    u_int8_t);
+void	pf_change_tcp_a(struct mbuf *, void *, u_int16_t *, u_int32_t);
 int	pflog_packet(struct pfi_kif *, struct mbuf *, sa_family_t, u_int8_t,
 	    u_int8_t, struct pf_rule *, struct pf_rule *, struct pf_ruleset *,
 	    struct pf_pdesc *);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201512251512.tBPFCCfv027548>