Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 18 Jun 2015 20:28:53 +0000 (UTC)
From:      Kristof Provost <kp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-10@freebsd.org
Subject:   svn commit: r284569 - in stable/10/sys: net netpfil/pf
Message-ID:  <201506182028.t5IKSrxo022901@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kp
Date: Thu Jun 18 20:28:52 2015
New Revision: 284569
URL: https://svnweb.freebsd.org/changeset/base/284569

Log:
  Merge r278831, r278834
  
  Update the pf fragment handling code to closer match recent OpenBSD.
  That partially fixes IPv6 fragment handling.
  
  Differential Revision:	https://reviews.freebsd.org/D2814
  Reviewed by:	gnn

Modified:
  stable/10/sys/net/pfvar.h
  stable/10/sys/netpfil/pf/pf.c
  stable/10/sys/netpfil/pf/pf_norm.c
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/sys/net/pfvar.h
==============================================================================
--- stable/10/sys/net/pfvar.h	Thu Jun 18 20:21:02 2015	(r284568)
+++ stable/10/sys/net/pfvar.h	Thu Jun 18 20:28:52 2015	(r284569)
@@ -1668,6 +1668,8 @@ int		 pfi_clear_flags(const char *, int)
 
 int		 pf_match_tag(struct mbuf *, struct pf_rule *, int *, int);
 int		 pf_tag_packet(struct mbuf *, struct pf_pdesc *, int);
+int		 pf_addr_cmp(struct pf_addr *, struct pf_addr *,
+		    sa_family_t);
 void		 pf_qid2qname(u_int32_t, char *);
 
 VNET_DECLARE(struct pf_kstatus, pf_status);

Modified: stable/10/sys/netpfil/pf/pf.c
==============================================================================
--- stable/10/sys/netpfil/pf/pf.c	Thu Jun 18 20:21:02 2015	(r284568)
+++ stable/10/sys/netpfil/pf/pf.c	Thu Jun 18 20:28:52 2015	(r284569)
@@ -368,6 +368,45 @@ VNET_DEFINE(void *, pf_swi_cookie);
 VNET_DEFINE(uint32_t, pf_hashseed);
 #define	V_pf_hashseed	VNET(pf_hashseed)
 
+int
+pf_addr_cmp(struct pf_addr *a, struct pf_addr *b, sa_family_t af)
+{
+
+	switch (af) {
+#ifdef INET
+	case AF_INET:
+		if (a->addr32[0] > b->addr32[0])
+			return (1);
+		if (a->addr32[0] < b->addr32[0])
+			return (-1);
+		break;
+#endif /* INET */
+#ifdef INET6
+	case AF_INET6:
+		if (a->addr32[3] > b->addr32[3])
+			return (1);
+		if (a->addr32[3] < b->addr32[3])
+			return (-1);
+		if (a->addr32[2] > b->addr32[2])
+			return (1);
+		if (a->addr32[2] < b->addr32[2])
+			return (-1);
+		if (a->addr32[1] > b->addr32[1])
+			return (1);
+		if (a->addr32[1] < b->addr32[1])
+			return (-1);
+		if (a->addr32[0] > b->addr32[0])
+			return (1);
+		if (a->addr32[0] < b->addr32[0])
+			return (-1);
+		break;
+#endif /* INET6 */
+	default:
+		panic("%s: unknown address family %u", __func__, af);
+	}
+	return (0);
+}
+
 static __inline uint32_t
 pf_hashkey(struct pf_state_key *sk)
 {

Modified: stable/10/sys/netpfil/pf/pf_norm.c
==============================================================================
--- stable/10/sys/netpfil/pf/pf_norm.c	Thu Jun 18 20:21:02 2015	(r284568)
+++ stable/10/sys/netpfil/pf/pf_norm.c	Thu Jun 18 20:28:52 2015	(r284569)
@@ -1,5 +1,6 @@
 /*-
  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
+ * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -48,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
+#include <netinet6/ip6_var.h>
 #include <netinet/tcp.h>
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_seq.h>
@@ -57,38 +59,51 @@ __FBSDID("$FreeBSD$");
 #endif /* INET6 */
 
 struct pf_frent {
-	LIST_ENTRY(pf_frent) fr_next;
-	union {
-		struct {
-			struct ip *_fr_ip;
-			struct mbuf *_fr_m;
-		} _frag;
-		struct {
-			uint16_t _fr_off;
-			uint16_t _fr_end;
-		} _cache;
-	} _u;
+	TAILQ_ENTRY(pf_frent)	fr_next;
+	struct mbuf	*fe_m;
+	uint16_t	fe_hdrlen;	/* ipv4 header lenght with ip options
+					   ipv6, extension, fragment header */
+	uint16_t	fe_extoff;	/* last extension header offset or 0 */
+	uint16_t	fe_len;		/* fragment length */
+	uint16_t	fe_off;		/* fragment offset */
+	uint16_t	fe_mff;		/* more fragment flag */
+};
+
+struct pf_fragment_cmp {
+	struct pf_addr	frc_src;
+	struct pf_addr	frc_dst;
+	uint32_t	frc_id;
+	sa_family_t	frc_af;
+	uint8_t		frc_proto;
+	uint8_t		frc_direction;
 };
-#define	fr_ip	_u._frag._fr_ip
-#define	fr_m	_u._frag._fr_m
-#define	fr_off	_u._cache._fr_off
-#define	fr_end	_u._cache._fr_end
 
 struct pf_fragment {
+	struct pf_fragment_cmp	fr_key;
+#define fr_src	fr_key.frc_src
+#define fr_dst	fr_key.frc_dst
+#define fr_id	fr_key.frc_id
+#define fr_af	fr_key.frc_af
+#define fr_proto	fr_key.frc_proto
+#define fr_direction	fr_key.frc_direction
+
 	RB_ENTRY(pf_fragment) fr_entry;
 	TAILQ_ENTRY(pf_fragment) frag_next;
-	struct in_addr	fr_src;
-	struct in_addr	fr_dst;
-	u_int8_t	fr_p;		/* protocol of this fragment */
-	u_int8_t	fr_flags;	/* status flags */
-#define PFFRAG_SEENLAST	0x0001		/* Seen the last fragment for this */
-#define PFFRAG_NOBUFFER	0x0002		/* Non-buffering fragment cache */
-#define PFFRAG_DROP	0x0004		/* Drop all fragments */
+	uint8_t		fr_flags;	/* status flags */
+#define PFFRAG_SEENLAST		0x0001	/* Seen the last fragment for this */
+#define PFFRAG_NOBUFFER		0x0002	/* Non-buffering fragment cache */
+#define PFFRAG_DROP		0x0004	/* Drop all fragments */
 #define BUFFER_FRAGMENTS(fr)	(!((fr)->fr_flags & PFFRAG_NOBUFFER))
-	u_int16_t	fr_id;		/* fragment id for reassemble */
-	u_int16_t	fr_max;		/* fragment data max */
-	u_int32_t	fr_timeout;
-	LIST_HEAD(, pf_frent) fr_queue;
+	uint16_t	fr_max;		/* fragment data max */
+	uint32_t	fr_timeout;
+	uint16_t	fr_maxlen;	/* maximum length of single fragment */
+	TAILQ_HEAD(pf_fragq, pf_frent) fr_queue;
+};
+
+struct pf_fragment_tag {
+	uint16_t	ft_hdrlen;	/* header length of reassembled pkt */
+	uint16_t	ft_extoff;	/* last extension header offset or 0 */
+	uint16_t	ft_maxlen;	/* maximum fragment payload length */
 };
 
 static struct mtx pf_frag_mtx;
@@ -125,15 +140,25 @@ static void		 pf_remove_fragment(struct 
 static int		 pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
 			    struct tcphdr *, int, sa_family_t);
 #ifdef INET
-static void		 pf_ip2key(struct pf_fragment *, struct ip *);
 static void		 pf_scrub_ip(struct mbuf **, u_int32_t, u_int8_t,
 			    u_int8_t);
 static void		 pf_flush_fragments(void);
-static struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
-static struct mbuf	*pf_reassemble(struct mbuf **, struct pf_fragment **,
-			    struct pf_frent *, int);
+static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
+			struct pf_frag_tree *tree);
+struct pf_frent		*pf_create_fragment(u_short *);
+static int		pf_reassemble(struct mbuf **, struct ip *, int,
+			    u_short *);
+int			pf_reassemble6(struct mbuf **, struct ip6_hdr *,
+			    struct ip6_frag *, uint16_t, uint16_t, int,
+			    u_short *);
 static struct mbuf	*pf_fragcache(struct mbuf **, struct ip*,
 			    struct pf_fragment **, int, int, int *);
+static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
+			    struct pf_frent *, u_short *);
+int			pf_isfull_fragment(struct pf_fragment *);
+struct mbuf		*pf_join_fragment(struct pf_fragment *);
+
+
 #endif /* INET */
 #ifdef INET6
 static void		 pf_scrub_ip6(struct mbuf **, u_int8_t);
@@ -145,6 +170,18 @@ static void		 pf_scrub_ip6(struct mbuf *
 	}						\
 } while(0)
 
+static void
+pf_ip2key(struct ip *ip, int dir, struct pf_fragment_cmp *key)
+{
+
+	key->frc_src.v4 = ip->ip_src;
+	key->frc_dst.v4 = ip->ip_dst;
+	key->frc_af = AF_INET;
+	key->frc_proto = ip->ip_p;
+	key->frc_id = ip->ip_id;
+	key->frc_direction = dir;
+}
+
 void
 pf_normalize_init(void)
 {
@@ -184,18 +221,16 @@ pf_frag_compare(struct pf_fragment *a, s
 {
 	int	diff;
 
-	if ((diff = a->fr_id - b->fr_id))
+	if ((diff = a->fr_id - b->fr_id) != 0)
 		return (diff);
-	else if ((diff = a->fr_p - b->fr_p))
+	if ((diff = a->fr_proto - b->fr_proto) != 0)
+		return (diff);
+	if ((diff = a->fr_af - b->fr_af) != 0)
+		return (diff);
+	if ((diff = pf_addr_cmp(&a->fr_src, &b->fr_src, a->fr_af)) != 0)
+		return (diff);
+	if ((diff = pf_addr_cmp(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0)
 		return (diff);
-	else if (a->fr_src.s_addr < b->fr_src.s_addr)
-		return (-1);
-	else if (a->fr_src.s_addr > b->fr_src.s_addr)
-		return (1);
-	else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
-		return (-1);
-	else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
-		return (1);
 	return (0);
 }
 
@@ -270,23 +305,23 @@ pf_free_fragment(struct pf_fragment *fra
 
 	/* Free all fragments */
 	if (BUFFER_FRAGMENTS(frag)) {
-		for (frent = LIST_FIRST(&frag->fr_queue); frent;
-		    frent = LIST_FIRST(&frag->fr_queue)) {
-			LIST_REMOVE(frent, fr_next);
+		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+		    frent = TAILQ_FIRST(&frag->fr_queue)) {
+			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
 
-			m_freem(frent->fr_m);
+			m_freem(frent->fe_m);
 			uma_zfree(V_pf_frent_z, frent);
 		}
 	} else {
-		for (frent = LIST_FIRST(&frag->fr_queue); frent;
-		    frent = LIST_FIRST(&frag->fr_queue)) {
-			LIST_REMOVE(frent, fr_next);
-
-			KASSERT((LIST_EMPTY(&frag->fr_queue) ||
-			    LIST_FIRST(&frag->fr_queue)->fr_off >
-			    frent->fr_end),
-			    ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
-			    " frent->fr_end): %s", __func__));
+		for (frent = TAILQ_FIRST(&frag->fr_queue); frent;
+		    frent = TAILQ_FIRST(&frag->fr_queue)) {
+			TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+
+			KASSERT((TAILQ_EMPTY(&frag->fr_queue) ||
+			    TAILQ_FIRST(&frag->fr_queue)->fe_off >
+			    frent->fe_len),
+			    ("! (TAILQ_EMPTY() || TAILQ_FIRST()->fe_off >"
+			    " frent->fe_len): %s", __func__));
 
 			uma_zfree(V_pf_frent_z, frent);
 		}
@@ -296,26 +331,14 @@ pf_free_fragment(struct pf_fragment *fra
 }
 
 #ifdef INET
-static void
-pf_ip2key(struct pf_fragment *key, struct ip *ip)
-{
-	key->fr_p = ip->ip_p;
-	key->fr_id = ip->ip_id;
-	key->fr_src.s_addr = ip->ip_src.s_addr;
-	key->fr_dst.s_addr = ip->ip_dst.s_addr;
-}
-
 static struct pf_fragment *
-pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
+pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree)
 {
-	struct pf_fragment	 key;
 	struct pf_fragment	*frag;
 
 	PF_FRAG_ASSERT();
 
-	pf_ip2key(&key, ip);
-
-	frag = RB_FIND(pf_frag_tree, tree, &key);
+	frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key);
 	if (frag != NULL) {
 		/* XXX Are we sure we want to update the timeout? */
 		frag->fr_timeout = time_uptime;
@@ -352,210 +375,412 @@ pf_remove_fragment(struct pf_fragment *f
 }
 
 #ifdef INET
-#define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
-static struct mbuf *
-pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
-    struct pf_frent *frent, int mff)
+struct pf_frent *
+pf_create_fragment(u_short *reason)
 {
-	struct mbuf	*m = *m0, *m2;
-	struct pf_frent	*frea, *next;
-	struct pf_frent	*frep = NULL;
-	struct ip	*ip = frent->fr_ip;
-	int		 hlen = ip->ip_hl << 2;
-	u_int16_t	 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
-	u_int16_t	 ip_len = ntohs(ip->ip_len) - ip->ip_hl * 4;
-	u_int16_t	 max = ip_len + off;
+	struct pf_frent *frent;
 
 	PF_FRAG_ASSERT();
-	KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
-	    ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __FUNCTION__));
 
-	/* Strip off ip header */
-	m->m_data += hlen;
-	m->m_len -= hlen;
+	frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+	if (frent == NULL) {
+		pf_flush_fragments();
+		frent = uma_zalloc(V_pf_frent_z, M_NOWAIT);
+		if (frent == NULL) {
+			REASON_SET(reason, PFRES_MEMORY);
+			return (NULL);
+		}
+	}
+
+	return (frent);
+}
 
-	/* Create a new reassembly queue for this packet */
-	if (*frag == NULL) {
-		*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
-		if (*frag == NULL) {
+struct pf_fragment *
+pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
+		u_short *reason)
+{
+	struct pf_frent		*after, *next, *prev;
+	struct pf_fragment	*frag;
+	uint16_t		total;
+
+	PF_FRAG_ASSERT();
+
+	/* No empty fragments. */
+	if (frent->fe_len == 0) {
+		DPFPRINTF(("bad fragment: len 0"));
+		goto bad_fragment;
+	}
+
+	/* All fragments are 8 byte aligned. */
+	if (frent->fe_mff && (frent->fe_len & 0x7)) {
+		DPFPRINTF(("bad fragment: mff and len %d", frent->fe_len));
+		goto bad_fragment;
+	}
+
+	/* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET. */
+	if (frent->fe_off + frent->fe_len > IP_MAXPACKET) {
+		DPFPRINTF(("bad fragment: max packet %d",
+		    frent->fe_off + frent->fe_len));
+		goto bad_fragment;
+	}
+
+	DPFPRINTF((key->frc_af == AF_INET ?
+	    "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d",
+	    key->frc_id, frent->fe_off, frent->fe_off + frent->fe_len));
+
+	/* Fully buffer all of the fragments in this fragment queue. */
+	frag = pf_find_fragment(key, &V_pf_frag_tree);
+
+	/* Create a new reassembly queue for this packet. */
+	if (frag == NULL) {
+		frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+		if (frag == NULL) {
 			pf_flush_fragments();
-			*frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
-			if (*frag == NULL)
+			frag = uma_zalloc(V_pf_frag_z, M_NOWAIT);
+			if (frag == NULL) {
+				REASON_SET(reason, PFRES_MEMORY);
 				goto drop_fragment;
+			}
 		}
 
-		(*frag)->fr_flags = 0;
-		(*frag)->fr_max = 0;
-		(*frag)->fr_src = frent->fr_ip->ip_src;
-		(*frag)->fr_dst = frent->fr_ip->ip_dst;
-		(*frag)->fr_p = frent->fr_ip->ip_p;
-		(*frag)->fr_id = frent->fr_ip->ip_id;
-		(*frag)->fr_timeout = time_uptime;
-		LIST_INIT(&(*frag)->fr_queue);
+		*(struct pf_fragment_cmp *)frag = *key;
+		frag->fr_timeout = time_second;
+		frag->fr_maxlen = frent->fe_len;
+		TAILQ_INIT(&frag->fr_queue);
+
+		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag);
+		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 
-		RB_INSERT(pf_frag_tree, &V_pf_frag_tree, *frag);
-		TAILQ_INSERT_HEAD(&V_pf_fragqueue, *frag, frag_next);
+		/* We do not have a previous fragment. */
+		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
 
-		/* We do not have a previous fragment */
-		frep = NULL;
-		goto insert;
+		return (frag);
 	}
 
-	/*
-	 * Find a fragment after the current one:
-	 *  - off contains the real shifted offset.
-	 */
-	LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
-		if (FR_IP_OFF(frea) > off)
+	KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue"));
+
+	/* Remember maximum fragment len for refragmentation. */
+	if (frent->fe_len > frag->fr_maxlen)
+		frag->fr_maxlen = frent->fe_len;
+
+	/* Maximum data we have seen already. */
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+
+	/* Non terminal fragments must have more fragments flag. */
+	if (frent->fe_off + frent->fe_len < total && !frent->fe_mff)
+		goto bad_fragment;
+
+	/* Check if we saw the last fragment already. */
+	if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) {
+		if (frent->fe_off + frent->fe_len > total ||
+		    (frent->fe_off + frent->fe_len == total && frent->fe_mff))
+			goto bad_fragment;
+	} else {
+		if (frent->fe_off + frent->fe_len == total && !frent->fe_mff)
+			goto bad_fragment;
+	}
+
+	/* Find a fragment after the current one. */
+	prev = NULL;
+	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
+		if (after->fe_off > frent->fe_off)
 			break;
-		frep = frea;
+		prev = after;
 	}
 
-	KASSERT((frep != NULL || frea != NULL),
-	    ("!(frep != NULL || frea != NULL): %s", __FUNCTION__));;
+	KASSERT(prev != NULL || after != NULL,
+	    ("prev != NULL || after != NULL"));
 
-	if (frep != NULL &&
-	    FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl *
-	    4 > off)
-	{
-		u_int16_t	precut;
+	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
+		uint16_t precut;
 
-		precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
-		    frep->fr_ip->ip_hl * 4 - off;
-		if (precut >= ip_len)
-			goto drop_fragment;
-		m_adj(frent->fr_m, precut);
-		DPFPRINTF(("overlap -%d\n", precut));
-		/* Enforce 8 byte boundaries */
-		ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
-		off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
-		ip_len -= precut;
-		ip->ip_len = htons(ip_len);
-	}
-
-	for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
-	    frea = next)
-	{
-		u_int16_t	aftercut;
-
-		aftercut = ip_len + off - FR_IP_OFF(frea);
-		DPFPRINTF(("adjust overlap %d\n", aftercut));
-		if (aftercut < ntohs(frea->fr_ip->ip_len) - frea->fr_ip->ip_hl
-		    * 4)
-		{
-			frea->fr_ip->ip_len =
-			    htons(ntohs(frea->fr_ip->ip_len) - aftercut);
-			frea->fr_ip->ip_off = htons(ntohs(frea->fr_ip->ip_off) +
-			    (aftercut >> 3));
-			m_adj(frea->fr_m, aftercut);
+		precut = prev->fe_off + prev->fe_len - frent->fe_off;
+		if (precut >= frent->fe_len)
+			goto bad_fragment;
+		DPFPRINTF(("overlap -%d", precut));
+		m_adj(frent->fe_m, precut);
+		frent->fe_off += precut;
+		frent->fe_len -= precut;
+	}
+
+	for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off;
+	    after = next) {
+		uint16_t aftercut;
+
+		aftercut = frent->fe_off + frent->fe_len - after->fe_off;
+		DPFPRINTF(("adjust overlap %d", aftercut));
+		if (aftercut < after->fe_len) {
+			m_adj(after->fe_m, aftercut);
+			after->fe_off += aftercut;
+			after->fe_len -= aftercut;
 			break;
 		}
 
-		/* This fragment is completely overlapped, lose it */
-		next = LIST_NEXT(frea, fr_next);
-		m_freem(frea->fr_m);
-		LIST_REMOVE(frea, fr_next);
-		uma_zfree(V_pf_frent_z, frea);
+		/* This fragment is completely overlapped, lose it. */
+		next = TAILQ_NEXT(after, fr_next);
+		m_freem(after->fe_m);
+		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
+		uma_zfree(V_pf_frent_z, after);
 	}
 
- insert:
-	/* Update maximum data size */
-	if ((*frag)->fr_max < max)
-		(*frag)->fr_max = max;
-	/* This is the last segment */
-	if (!mff)
-		(*frag)->fr_flags |= PFFRAG_SEENLAST;
-
-	if (frep == NULL)
-		LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
+	if (prev == NULL)
+		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
 	else
-		LIST_INSERT_AFTER(frep, frent, fr_next);
+		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+
+	return (frag);
+
+bad_fragment:
+	REASON_SET(reason, PFRES_FRAG);
+drop_fragment:
+	uma_zfree(V_pf_frent_z, frent);
+	return (NULL);
+}
+
+int
+pf_isfull_fragment(struct pf_fragment *frag)
+{
+	struct pf_frent	*frent, *next;
+	uint16_t off, total;
 
 	/* Check if we are completely reassembled */
-	if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
-		return (NULL);
+	if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff)
+		return (0);
+
+	/* Maximum data we have seen already */
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
 
 	/* Check if we have all the data */
 	off = 0;
-	for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
-		next = LIST_NEXT(frep, fr_next);
+	for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) {
+		next = TAILQ_NEXT(frent, fr_next);
 
-		off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
-		if (off < (*frag)->fr_max &&
-		    (next == NULL || FR_IP_OFF(next) != off))
-		{
-			DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
-			    off, next == NULL ? -1 : FR_IP_OFF(next),
-			    (*frag)->fr_max));
-			return (NULL);
+		off += frent->fe_len;
+		if (off < total && (next == NULL || next->fe_off != off)) {
+			DPFPRINTF(("missing fragment at %d, next %d, total %d",
+			    off, next == NULL ? -1 : next->fe_off, total));
+			return (0);
 		}
 	}
-	DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
-	if (off < (*frag)->fr_max)
-		return (NULL);
+	DPFPRINTF(("%d < %d?", off, total));
+	if (off < total)
+		return (0);
+	KASSERT(off == total, ("off == total"));
 
-	/* We have all the data */
-	frent = LIST_FIRST(&(*frag)->fr_queue);
-	KASSERT((frent != NULL), ("frent == NULL: %s", __FUNCTION__));
-	if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
-		DPFPRINTF(("drop: too big: %d\n", off));
-		pf_free_fragment(*frag);
-		*frag = NULL;
-		return (NULL);
-	}
-	next = LIST_NEXT(frent, fr_next);
+	return (1);
+}
 
-	/* Magic from ip_input */
-	ip = frent->fr_ip;
-	m = frent->fr_m;
+struct mbuf *
+pf_join_fragment(struct pf_fragment *frag)
+{
+	struct mbuf *m, *m2;
+	struct pf_frent	*frent, *next;
+
+	frent = TAILQ_FIRST(&frag->fr_queue);
+	next = TAILQ_NEXT(frent, fr_next);
+
+	/* Magic from ip_input. */
+	m = frent->fe_m;
 	m2 = m->m_next;
 	m->m_next = NULL;
 	m_cat(m, m2);
 	uma_zfree(V_pf_frent_z, frent);
 	for (frent = next; frent != NULL; frent = next) {
-		next = LIST_NEXT(frent, fr_next);
+		next = TAILQ_NEXT(frent, fr_next);
 
-		m2 = frent->fr_m;
+		m2 = frent->fe_m;
+		/* Strip off ip header. */
+		m_adj(m2, frent->fe_hdrlen);
 		uma_zfree(V_pf_frent_z, frent);
-		m->m_pkthdr.csum_flags &= m2->m_pkthdr.csum_flags;
-		m->m_pkthdr.csum_data += m2->m_pkthdr.csum_data;
 		m_cat(m, m2);
 	}
 
-	while (m->m_pkthdr.csum_data & 0xffff0000)
-		m->m_pkthdr.csum_data = (m->m_pkthdr.csum_data & 0xffff) +
-		    (m->m_pkthdr.csum_data >> 16);
-	ip->ip_src = (*frag)->fr_src;
-	ip->ip_dst = (*frag)->fr_dst;
-
-	/* Remove from fragment queue */
-	pf_remove_fragment(*frag);
-	*frag = NULL;
-
-	hlen = ip->ip_hl << 2;
-	ip->ip_len = htons(off + hlen);
-	m->m_len += hlen;
-	m->m_data -= hlen;
+	/* Remove from fragment queue. */
+	pf_remove_fragment(frag);
+
+	return (m);
+}
+
+#define FR_IP_OFF(fr)	((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
+static int
+pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason)
+{
+	struct mbuf		*m = *m0;
+	struct pf_frent		*frent;
+	struct pf_fragment	*frag;
+	struct pf_fragment_cmp	key;
+	uint16_t		total, hdrlen;
+
+	/* Get an entry for the fragment queue */
+	if ((frent = pf_create_fragment(reason)) == NULL)
+		return (PF_DROP);
+
+	frent->fe_m = m;
+	frent->fe_hdrlen = ip->ip_hl << 2;
+	frent->fe_extoff = 0;
+	frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
+	frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
+	frent->fe_mff = ntohs(ip->ip_off) & IP_MF;
+
+	pf_ip2key(ip, dir, &key);
+
+	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+		return (PF_DROP);
+
+	/* The mbuf is part of the fragment entry, no direct free or access */
+	m = *m0 = NULL;
+
+	if (!pf_isfull_fragment(frag))
+		return (PF_PASS);  /* drop because *m0 is NULL, no error */
+
+	/* We have all the data */
+	frent = TAILQ_FIRST(&frag->fr_queue);
+	KASSERT(frent != NULL, ("frent != NULL"));
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+	hdrlen = frent->fe_hdrlen;
+
+	m = *m0 = pf_join_fragment(frag);
+	frag = NULL;
 
-	/* some debugging cruft by sklower, below, will go away soon */
-	/* XXX this should be done elsewhere */
 	if (m->m_flags & M_PKTHDR) {
 		int plen = 0;
-		for (m2 = m; m2; m2 = m2->m_next)
-			plen += m2->m_len;
+		for (m = *m0; m; m = m->m_next)
+			plen += m->m_len;
+		m = *m0;
 		m->m_pkthdr.len = plen;
 	}
 
+	ip = mtod(m, struct ip *);
+	ip->ip_len = htons(hdrlen + total);
+	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
+
+	if (hdrlen + total > IP_MAXPACKET) {
+		DPFPRINTF(("drop: too big: %d", total));
+		ip->ip_len = 0;
+		REASON_SET(reason, PFRES_SHORT);
+		/* PF_DROP requires a valid mbuf *m0 in pf_test() */
+		return (PF_DROP);
+	}
+
 	DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
-	return (m);
+	return (PF_PASS);
+}
 
- drop_fragment:
-	/* Oops - fail safe - drop packet */
-	uma_zfree(V_pf_frent_z, frent);
-	m_freem(m);
-	return (NULL);
+#ifdef INET6
+int
+pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr,
+		uint16_t hdrlen, uint16_t extoff, int dir, u_short *reason)
+{
+	struct mbuf		*m = *m0;
+	struct pf_frent		*frent;
+	struct pf_fragment	*frag;
+	struct pf_fragment_cmp	 key;
+	int			 off;
+	uint16_t		 total, maxlen;
+	uint8_t			 proto;
+
+	PF_FRAG_LOCK();
+
+	/* Get an entry for the fragment queue. */
+	if ((frent = pf_create_fragment(reason)) == NULL) {
+		PF_FRAG_UNLOCK();
+		return (PF_DROP);
+	}
+
+	frent->fe_m = m;
+	frent->fe_hdrlen = hdrlen;
+	frent->fe_extoff = extoff;
+	frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen;
+	frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK);
+	frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG;
+
+	key.frc_src.v6 = ip6->ip6_src;
+	key.frc_dst.v6 = ip6->ip6_dst;
+	key.frc_af = AF_INET6;
+	/* Only the first fragment's protocol is relevant. */
+	key.frc_proto = 0;
+	key.frc_id = fraghdr->ip6f_ident;
+	key.frc_direction = dir;
+
+	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+		PF_FRAG_UNLOCK();
+		return (PF_DROP);
+	}
+
+	/* The mbuf is part of the fragment entry, no direct free or access. */
+	m = *m0 = NULL;
+
+	if (!pf_isfull_fragment(frag)) {
+		PF_FRAG_UNLOCK();
+		return (PF_PASS);  /* Drop because *m0 is NULL, no error. */
+	}
+
+	/* We have all the data. */
+	extoff = frent->fe_extoff;
+	maxlen = frag->fr_maxlen;
+	frent = TAILQ_FIRST(&frag->fr_queue);
+	KASSERT(frent != NULL, ("frent != NULL"));
+	total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off +
+		TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len;
+	hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag);
+
+	m = *m0 = pf_join_fragment(frag);
+	frag = NULL;
+
+	PF_FRAG_UNLOCK();
+
+	/* Take protocol from first fragment header. */
+	m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), &off);
+	KASSERT(m, ("%s: short mbuf chain", __func__));
+	proto = *(mtod(m, caddr_t) + off);
+	m = *m0;
+
+	/* Delete frag6 header */
+	if (ip6_deletefraghdr(m, hdrlen, M_NOWAIT) != 0)
+		goto fail;
+
+	if (m->m_flags & M_PKTHDR) {
+		int plen = 0;
+		for (m = *m0; m; m = m->m_next)
+			plen += m->m_len;
+		m = *m0;
+		m->m_pkthdr.len = plen;
+	}
+
+	ip6 = mtod(m, struct ip6_hdr *);
+	ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total);
+	if (extoff) {
+		/* Write protocol into next field of last extension header. */
+		m = m_getptr(m, extoff + offsetof(struct ip6_ext, ip6e_nxt),
+		    &off);
+		KASSERT(m, ("%s: short mbuf chain", __func__));
+		*(mtod(m, char *) + off) = proto;
+		m = *m0;
+	} else
+		ip6->ip6_nxt = proto;
+
+	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+		DPFPRINTF(("drop: too big: %d", total));
+		ip6->ip6_plen = 0;
+		REASON_SET(reason, PFRES_SHORT);
+		/* PF_DROP requires a valid mbuf *m0 in pf_test6(). */
+		return (PF_DROP);
+	}
+
+	DPFPRINTF(("complete: %p(%d)", m, ntohs(ip6->ip6_plen)));
+	return (PF_PASS);
+
+fail:
+	REASON_SET(reason, PFRES_MEMORY);
+	/* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later. */
+	return (PF_DROP);
 }
 
+#endif
+
 static struct mbuf *
 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
     int drop, int *nomem)
@@ -591,16 +816,15 @@ pf_fragcache(struct mbuf **m0, struct ip
 
 		(*frag)->fr_flags = PFFRAG_NOBUFFER;
 		(*frag)->fr_max = 0;
-		(*frag)->fr_src = h->ip_src;
-		(*frag)->fr_dst = h->ip_dst;
-		(*frag)->fr_p = h->ip_p;
+		(*frag)->fr_src.v4 = h->ip_src;
+		(*frag)->fr_dst.v4 = h->ip_dst;
 		(*frag)->fr_id = h->ip_id;
 		(*frag)->fr_timeout = time_uptime;
 
-		cur->fr_off = off;
-		cur->fr_end = max;
-		LIST_INIT(&(*frag)->fr_queue);
-		LIST_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
+		cur->fe_off = off;
+		cur->fe_len = max; /* TODO: fe_len = max - off ? */
+		TAILQ_INIT(&(*frag)->fr_queue);
+		TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 
 		RB_INSERT(pf_frag_tree, &V_pf_cache_tree, *frag);
 		TAILQ_INSERT_HEAD(&V_pf_cachequeue, *frag, frag_next);
@@ -615,8 +839,8 @@ pf_fragcache(struct mbuf **m0, struct ip
 	 *  - off contains the real shifted offset.
 	 */
 	frp = NULL;
-	LIST_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
-		if (fra->fr_off > off)
+	TAILQ_FOREACH(fra, &(*frag)->fr_queue, fr_next) {
+		if (fra->fe_off > off)
 			break;
 		frp = fra;
 	}
@@ -627,18 +851,18 @@ pf_fragcache(struct mbuf **m0, struct ip
 	if (frp != NULL) {
 		int	precut;
 
-		precut = frp->fr_end - off;
+		precut = frp->fe_len - off;
 		if (precut >= ip_len) {
 			/* Fragment is entirely a duplicate */
 			DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
-			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
+			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
 			goto drop_fragment;
 		}
 		if (precut == 0) {
 			/* They are adjacent.  Fixup cache entry */
 			DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
-			    h->ip_id, frp->fr_off, frp->fr_end, off, max));
-			frp->fr_end = max;
+			    h->ip_id, frp->fe_off, frp->fe_len, off, max));
+			frp->fe_len = max;
 		} else if (precut > 0) {
 			/* The first part of this payload overlaps with a
 			 * fragment that has already been passed.
@@ -648,13 +872,13 @@ pf_fragcache(struct mbuf **m0, struct ip
 			 */
 
 			DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
-			    h->ip_id, precut, frp->fr_off, frp->fr_end, off,
+			    h->ip_id, precut, frp->fe_off, frp->fe_len, off,
 			    max));
 
 			off += precut;
 			max -= precut;
 			/* Update the previous frag to encompass this one */
-			frp->fr_end = max;
+			frp->fe_len = max;
 
 			if (!drop) {
 				/* XXX Optimization opportunity
@@ -702,16 +926,16 @@ pf_fragcache(struct mbuf **m0, struct ip
 			/* There is a gap between fragments */
 
 			DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
-			    h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
+			    h->ip_id, -precut, frp->fe_off, frp->fe_len, off,
 			    max));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
-			cur->fr_off = off;
-			cur->fr_end = max;
-			LIST_INSERT_AFTER(frp, cur, fr_next);
+			cur->fe_off = off;
+			cur->fe_len = max;
+			TAILQ_INSERT_AFTER(&(*frag)->fr_queue, frp, cur, fr_next);
 		}
 	}
 
@@ -719,19 +943,19 @@ pf_fragcache(struct mbuf **m0, struct ip
 		int	aftercut;
 		int	merge = 0;
 
-		aftercut = max - fra->fr_off;
+		aftercut = max - fra->fe_off;
 		if (aftercut == 0) {
 			/* Adjacent fragments */
 			DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
-			    h->ip_id, off, max, fra->fr_off, fra->fr_end));
-			fra->fr_off = off;
+			    h->ip_id, off, max, fra->fe_off, fra->fe_len));
+			fra->fe_off = off;
 			merge = 1;
 		} else if (aftercut > 0) {
 			/* Need to chop off the tail of this fragment */
 			DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
-			    h->ip_id, aftercut, off, max, fra->fr_off,
-			    fra->fr_end));
-			fra->fr_off = off;
+			    h->ip_id, aftercut, off, max, fra->fe_off,
+			    fra->fe_len));
+			fra->fe_off = off;
 			max -= aftercut;
 
 			merge = 1;
@@ -756,42 +980,42 @@ pf_fragcache(struct mbuf **m0, struct ip
 		} else if (frp == NULL) {
 			/* There is a gap between fragments */
 			DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
-			    h->ip_id, -aftercut, off, max, fra->fr_off,
-			    fra->fr_end));
+			    h->ip_id, -aftercut, off, max, fra->fe_off,
+			    fra->fe_len));
 
 			cur = uma_zalloc(V_pf_frent_z, M_NOWAIT);
 			if (cur == NULL)
 				goto no_mem;
 
-			cur->fr_off = off;
-			cur->fr_end = max;
-			LIST_INSERT_BEFORE(fra, cur, fr_next);
+			cur->fe_off = off;
+			cur->fe_len = max;
+			TAILQ_INSERT_HEAD(&(*frag)->fr_queue, cur, fr_next);
 		}
 

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201506182028.t5IKSrxo022901>