Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 2 Nov 2018 15:26:51 +0000 (UTC)
From:      Kristof Provost <kp@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r340061 - in head/sys: net netpfil/pf
Message-ID:  <201811021526.wA2FQpqs083337@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: kp
Date: Fri Nov  2 15:26:51 2018
New Revision: 340061
URL: https://svnweb.freebsd.org/changeset/base/340061

Log:
  pf: Split the fragment reassembly queue into smaller parts
  
  Remember 16 entry points based on the fragment offset.  Instead of
  a worst case of 8196 list traversals we now check a maximum of 512
  list entries or 16 array elements.
  
  Obtained from:	OpenBSD
  Differential Revision:	https://reviews.freebsd.org/D17733

Modified:
  head/sys/net/pfvar.h
  head/sys/netpfil/pf/pf_norm.c

Modified: head/sys/net/pfvar.h
==============================================================================
--- head/sys/net/pfvar.h	Fri Nov  2 15:23:57 2018	(r340060)
+++ head/sys/net/pfvar.h	Fri Nov  2 15:26:51 2018	(r340061)
@@ -1205,6 +1205,12 @@ struct pf_divert {
 #define PFR_KENTRY_HIWAT	200000	/* Number of table entries */
 
 /*
+ * Limit the length of the fragment queue traversal.  Remember
+ * search entry points based on the fragment offset.
+ */
+#define PF_FRAG_ENTRY_POINTS		16
+
+/*
  * ioctl parameter structures
  */
 

Modified: head/sys/netpfil/pf/pf_norm.c
==============================================================================
--- head/sys/netpfil/pf/pf_norm.c	Fri Nov  2 15:23:57 2018	(r340060)
+++ head/sys/netpfil/pf/pf_norm.c	Fri Nov  2 15:26:51 2018	(r340061)
@@ -87,6 +87,7 @@ struct pf_fragment {
 #define fr_af	fr_key.frc_af
 #define fr_proto	fr_key.frc_proto
 
+	struct pf_frent	*fr_firstoff[PF_FRAG_ENTRY_POINTS];
 	RB_ENTRY(pf_fragment) fr_entry;
 	TAILQ_ENTRY(pf_fragment) frag_next;
 	uint32_t	fr_timeout;
@@ -136,6 +137,13 @@ static struct pf_frent *pf_create_fragment(u_short *);
 static int	pf_frent_holes(struct pf_frent *frent);
 static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key,
 		    struct pf_frag_tree *tree);
+static inline int	pf_frent_index(struct pf_frent *);
+static void	pf_frent_insert(struct pf_fragment *,
+			    struct pf_frent *, struct pf_frent *);
+void			pf_frent_remove(struct pf_fragment *,
+			    struct pf_frent *);
+struct pf_frent		*pf_frent_previous(struct pf_fragment *,
+			    struct pf_frent *);
 static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *,
 		    struct pf_frent *, u_short *);
 static struct mbuf *pf_join_fragment(struct pf_fragment *);
@@ -308,6 +316,7 @@ pf_remove_fragment(struct pf_fragment *frag)
 {
 
 	PF_FRAG_ASSERT();
+	KASSERT(frag, ("frag != NULL"));
 
 	RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag);
 	TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next);
@@ -367,9 +376,150 @@ pf_frent_holes(struct pf_frent *frent)
 	return holes;
 }
 
+static inline int
+pf_frent_index(struct pf_frent *frent)
+{
+	/*
+	 * We have an array of 16 entry points to the queue.  A full size
+	 * 65535 octet IP packet can have 8192 fragments.  So the queue
+	 * traversal length is at most 512 and at most 16 entry points are
+	 * checked.  We need 128 additional bytes on a 64 bit architecture.
+	 */
+	CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) ==
+	    16 - 1);
+	CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1);
+
+	return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS);
+}
+
+static void
+pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent,
+    struct pf_frent *prev)
+{
+	int index;
+
+	if (prev == NULL) {
+		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
+	} else {
+		KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off,
+		    ("overlapping fragment"));
+		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
+	}
+
+	index = pf_frent_index(frent);
+	if (frag->fr_firstoff[index] == NULL) {
+		KASSERT(prev == NULL || pf_frent_index(prev) < index,
+		    ("prev == NULL || pf_frent_index(pref) < index"));
+		frag->fr_firstoff[index] = frent;
+	} else {
+		if (frent->fe_off < frag->fr_firstoff[index]->fe_off) {
+			KASSERT(prev == NULL || pf_frent_index(prev) < index,
+			    ("prev == NULL || pf_frent_index(pref) < index"));
+			frag->fr_firstoff[index] = frent;
+		} else {
+			KASSERT(prev != NULL, ("prev != NULL"));
+			KASSERT(pf_frent_index(prev) == index,
+			    ("pf_frent_index(prev) == index"));
+		}
+	}
+
+	frag->fr_holes += pf_frent_holes(frent);
+}
+
+void
+pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent)
+{
+	struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next);
+	struct pf_frent *next = TAILQ_NEXT(frent, fr_next);
+	int index;
+
+	frag->fr_holes -= pf_frent_holes(frent);
+
+	index = pf_frent_index(frent);
+	KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found"));
+	if (frag->fr_firstoff[index]->fe_off == frent->fe_off) {
+		if (next == NULL) {
+			frag->fr_firstoff[index] = NULL;
+		} else {
+			KASSERT(frent->fe_off + frent->fe_len <= next->fe_off,
+			    ("overlapping fragment"));
+			if (pf_frent_index(next) == index) {
+				frag->fr_firstoff[index] = next;
+			} else {
+				frag->fr_firstoff[index] = NULL;
+			}
+		}
+	} else {
+		KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off,
+		    ("frag->fr_firstoff[index]->fe_off < frent->fe_off"));
+		KASSERT(prev != NULL, ("prev != NULL"));
+		KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off,
+		    ("overlapping fragment"));
+		KASSERT(pf_frent_index(prev) == index,
+		    ("pf_frent_index(prev) == index"));
+	}
+
+	TAILQ_REMOVE(&frag->fr_queue, frent, fr_next);
+}
+
+struct pf_frent *
+pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent)
+{
+	struct pf_frent *prev, *next;
+	int index;
+
+	/*
+	 * If there are no fragments after frag, take the final one.  Assume
+	 * that the global queue is not empty.
+	 */
+	prev = TAILQ_LAST(&frag->fr_queue, pf_fragq);
+	KASSERT(prev != NULL, ("prev != NULL"));
+	if (prev->fe_off <= frent->fe_off)
+		return prev;
+	/*
+	 * We want to find a fragment entry that is before frag, but still
+	 * close to it.  Find the first fragment entry that is in the same
+	 * entry point or in the first entry point after that.  As we have
+	 * already checked that there are entries behind frag, this will
+	 * succeed.
+	 */
+	for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS;
+	    index++) {
+		prev = frag->fr_firstoff[index];
+		if (prev != NULL)
+			break;
+	}
+	KASSERT(prev != NULL, ("prev != NULL"));
+	/*
+	 * In prev we may have a fragment from the same entry point that is
+	 * before frent, or one that is just one position behind frent.
+	 * In the latter case, we go back one step and have the predecessor.
+	 * There may be none if the new fragment will be the first one.
+	 */
+	if (prev->fe_off > frent->fe_off) {
+		prev = TAILQ_PREV(prev, pf_fragq, fr_next);
+		if (prev == NULL)
+			return NULL;
+		KASSERT(prev->fe_off <= frent->fe_off,
+		    ("prev->fe_off <= frent->fe_off"));
+		return prev;
+	}
+	/*
+	 * In prev is the first fragment of the entry point.  The offset
+	 * of frag is behind it.  Find the closest previous fragment.
+	 */
+	for (next = TAILQ_NEXT(prev, fr_next); next != NULL;
+	    next = TAILQ_NEXT(next, fr_next)) {
+		if (next->fe_off > frent->fe_off)
+			break;
+		prev = next;
+	}
+	return prev;
+}
+
 static struct pf_fragment *
 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent,
-		u_short *reason)
+    u_short *reason)
 {
 	struct pf_frent		*after, *next, *prev;
 	struct pf_fragment	*frag;
@@ -416,6 +566,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
 		}
 
 		*(struct pf_fragment_cmp *)frag = *key;
+		memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff));
 		frag->fr_timeout = time_uptime;
 		frag->fr_maxlen = frent->fe_len;
 		frag->fr_holes = 1;
@@ -425,8 +576,7 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
 		TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next);
 
 		/* We do not have a previous fragment. */
-		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-		frag->fr_holes += pf_frent_holes(frent);
+		pf_frent_insert(frag, frent, NULL);
 
 		return (frag);
 	}
@@ -455,17 +605,15 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
 			goto bad_fragment;
 	}
 
-	/* Find a fragment after the current one. */
-	prev = NULL;
-	TAILQ_FOREACH(after, &frag->fr_queue, fr_next) {
-		if (after->fe_off > frent->fe_off)
-			break;
-		prev = after;
+	/* Find neighbors for newly inserted fragment */
+	prev = pf_frent_previous(frag, frent);
+	if (prev == NULL) {
+		after = TAILQ_FIRST(&frag->fr_queue);
+		KASSERT(after != NULL, ("after != NULL"));
+	} else {
+		after = TAILQ_NEXT(prev, fr_next);
 	}
 
-	KASSERT(prev != NULL || after != NULL,
-	    ("prev != NULL || after != NULL"));
-
 	if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) {
 		uint16_t precut;
 
@@ -493,17 +641,12 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct
 
 		/* This fragment is completely overlapped, lose it. */
 		next = TAILQ_NEXT(after, fr_next);
-		frag->fr_holes -= pf_frent_holes(after);
+		pf_frent_remove(frag, after);
 		m_freem(after->fe_m);
-		TAILQ_REMOVE(&frag->fr_queue, after, fr_next);
 		uma_zfree(V_pf_frent_z, after);
 	}
 
-	if (prev == NULL)
-		TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next);
-	else
-		TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next);
-	frag->fr_holes += pf_frent_holes(frent);
+	pf_frent_insert(frag, frent, prev);
 
 	return (frag);
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201811021526.wA2FQpqs083337>