Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 7 Jul 2023 16:11:02 GMT
From:      Doug Moore <dougm@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 8df38859d0f9 - main - radix_trie: replace node count with popmap
Message-ID:  <202307071611.367GB2EM040820@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by dougm:

URL: https://cgit.FreeBSD.org/src/commit/?id=8df38859d0f92025540bcbe99c9a291a584327f2

commit 8df38859d0f92025540bcbe99c9a291a584327f2
Author:     Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2023-07-07 16:09:36 +0000
Commit:     Doug Moore <dougm@FreeBSD.org>
CommitDate: 2023-07-07 16:09:36 +0000

    radix_trie: replace node count with popmap
    
    Replace the 'count' field in a trie node with a bitmap that
    identifies non-NULL children. Drop the 'last' field, and use the
    last bit set in the bitmap instead.  In lookup_le, lookup_ge,
    remove, and reclaim_all, use the bitmap to find the
    previous/next/only/every non-null child in constant time by
    examining the bitmask instead of looping across array elements
    and null-checking them one-by-one.
    
    A buildworld test suggests that this reduces the cycle count on
    those functions that eliminate some null-checks by 4.9%, 1.5%,
    0.0% and 13.3%.
    Reviewed by:    alc
    Tested by:      pho
    Differential Revision:  https://reviews.freebsd.org/D40775
---
 sys/kern/subr_pctrie.c | 192 +++++++++++++++++++++++--------------------------
 sys/vm/vm_radix.c      | 191 +++++++++++++++++++++++-------------------------
 2 files changed, 179 insertions(+), 204 deletions(-)

diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index 0f28e5ebb2f1..cf09903556ec 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -67,6 +67,18 @@ __FBSDID("$FreeBSD$");
 #define	PCTRIE_MASK	(PCTRIE_COUNT - 1)
 #define	PCTRIE_LIMIT	(howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) - 1)
 
+#if PCTRIE_WIDTH == 3
+typedef uint8_t pn_popmap_t;
+#elif PCTRIE_WIDTH == 4
+typedef uint16_t pn_popmap_t;
+#elif PCTRIE_WIDTH == 5
+typedef uint32_t pn_popmap_t;
+#else
+#error Unsupported width
+#endif
+_Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
+    "pn_popmap_t too wide");
+
 /* Flag bits stored in node pointers. */
 #define	PCTRIE_ISLEAF	0x1
 #define	PCTRIE_FLAGS	0x1
@@ -81,9 +93,8 @@ typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
 
 struct pctrie_node {
 	uint64_t	pn_owner;			/* Owner of record. */
-	uint16_t	pn_count;			/* Valid children. */
+	pn_popmap_t	pn_popmap;			/* Valid children. */
 	uint8_t		pn_clev;			/* Current level. */
-	int8_t		pn_last;			/* Zero last ptr. */
 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
 };
 
@@ -127,13 +138,12 @@ pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index,
 	 * has exited so lookup can not return false negatives.  It is done
 	 * here because it will be cache-cold in the dtor callback.
 	 */
-	if (node->pn_last != 0) {
-		pctrie_node_store(&node->pn_child[node->pn_last - 1], NULL,
-		    PCTRIE_UNSERIALIZED);
-		node->pn_last = 0;
+	if (node->pn_popmap != 0) {
+		pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1],
+		    NULL, PCTRIE_UNSERIALIZED);
+		node->pn_popmap = 0;
 	}
 	node->pn_owner = pctrie_trimkey(index, clevel + 1);
-	node->pn_count = 2;
 	node->pn_clev = clevel;
 	return (node);
 }
@@ -143,22 +153,21 @@ pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index,
  */
 static __inline void
 pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
-    pctrie_free_t freefn, int8_t last)
+    pctrie_free_t freefn)
 {
 #ifdef INVARIANTS
 	int slot;
 
-	KASSERT(node->pn_count == 0,
-	    ("pctrie_node_put: node %p has %d children", node,
-	    node->pn_count));
+	KASSERT(powerof2(node->pn_popmap),
+	    ("pctrie_node_put: node %p has too many children %04x", node,
+	    node->pn_popmap));
 	for (slot = 0; slot < PCTRIE_COUNT; slot++) {
-		if (slot == last)
+		if ((node->pn_popmap & (1 << slot)) != 0)
 			continue;
 		KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
 		    NULL, ("pctrie_node_put: node %p has a child", node));
 	}
 #endif
-	node->pn_last = last + 1;
 	freefn(ptree, node);
 }
 
@@ -258,6 +267,9 @@ pctrie_addval(struct pctrie_node *node, uint64_t index, uint16_t clev,
 	slot = pctrie_slot(index, clev);
 	pctrie_node_store(&node->pn_child[slot],
 	    pctrie_toleaf(val), access);
+	node->pn_popmap ^= 1 << slot;
+	KASSERT((node->pn_popmap & (1 << slot)) != 0,
+	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
 }
 
 /*
@@ -305,20 +317,19 @@ pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct pctrie_node *node,
 	struct pctrie_node *child;
 	int slot;
 
-	KASSERT(node->pn_count <= PCTRIE_COUNT,
-	    ("pctrie_reclaim_allnodes_int: bad count in node %p", node));
-	for (slot = 0; node->pn_count != 0; slot++) {
+	while (node->pn_popmap != 0) {
+		slot = ffs(node->pn_popmap) - 1;
 		child = pctrie_node_load(&node->pn_child[slot], NULL,
 		    PCTRIE_UNSERIALIZED);
-		if (child == NULL)
-			continue;
+		KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
+		    __func__, slot, node));
 		if (!pctrie_isleaf(child))
 			pctrie_reclaim_allnodes_int(ptree, child, freefn);
+		node->pn_popmap ^= 1 << slot;
 		pctrie_node_store(&node->pn_child[slot], NULL,
 		    PCTRIE_UNSERIALIZED);
-		node->pn_count--;
 	}
-	pctrie_node_put(ptree, node, freefn, -1);
+	pctrie_node_put(ptree, node, freefn);
 }
 
 /*
@@ -330,7 +341,7 @@ pctrie_zone_init(void *mem, int size __unused, int flags __unused)
 	struct pctrie_node *node;
 
 	node = mem;
-	node->pn_last = 0;
+	node->pn_popmap = 0;
 	memset(node->pn_child, 0, sizeof(node->pn_child));
 	return (0);
 }
@@ -391,7 +402,6 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
 		parentp = &node->pn_child[slot];
 		tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED);
 		if (tmp == NULL) {
-			node->pn_count++;
 			pctrie_addval(node, index, node->pn_clev, val,
 			    PCTRIE_LOCKED);
 			return (0);
@@ -413,6 +423,7 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
 	/* These writes are not yet visible due to ordering. */
 	pctrie_addval(tmp, index, clev, val, PCTRIE_UNSERIALIZED);
 	pctrie_node_store(&tmp->pn_child[slot], node, PCTRIE_UNSERIALIZED);
+	tmp->pn_popmap ^= 1 << slot;
 	/* Synchronize to make the above visible. */
 	pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
 
@@ -483,7 +494,6 @@ uint64_t *
 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
 {
 	struct pctrie_node *stack[PCTRIE_LIMIT];
-	uint64_t inc;
 	uint64_t *m;
 	struct pctrie_node *child, *node;
 #ifdef INVARIANTS
@@ -552,35 +562,24 @@ ascend:
 		} else if (child != NULL)
 			goto descend;
 
-		/*
-		 * Look for an available edge or val within the current
-		 * bisection node.
-		 */
-                if (slot < (PCTRIE_COUNT - 1)) {
-			inc = PCTRIE_UNITLEVEL(node->pn_clev);
-			index = pctrie_trimkey(index, node->pn_clev);
-			do {
-				index += inc;
-				slot++;
-				child = pctrie_node_load(&node->pn_child[slot],
-				    NULL, PCTRIE_LOCKED);
-				if (pctrie_isleaf(child)) {
-					m = pctrie_toval(child);
-					KASSERT(*m >= index,
-					    ("pctrie_lookup_ge: leaf < index"));
-					return (m);
-				} else if (child != NULL)
-					goto descend;
-			} while (slot < (PCTRIE_COUNT - 1));
+		/* Find the first set bit beyond the first slot+1 bits. */
+		slot = ffs(node->pn_popmap & (-2 << slot)) - 1;
+		if (slot < 0) {
+			/*
+			 * A value or edge greater than the search slot is not
+			 * found in the current node; ascend to the next
+			 * higher-level node.
+			 */
+			goto ascend;
 		}
-		KASSERT(child == NULL || pctrie_isleaf(child),
-		    ("pctrie_lookup_ge: child is radix node"));
-
-		/*
-		 * If a value or edge greater than the search slot is not found
-		 * in the current node, ascend to the next higher-level node.
-		 */
-		goto ascend;
+		child = pctrie_node_load(&node->pn_child[slot],
+		    NULL, PCTRIE_LOCKED);
+		KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
+		    __func__, slot, node));
+		if (pctrie_isleaf(child))
+			return (pctrie_toval(child));
+		index = pctrie_trimkey(index, node->pn_clev + 1) +
+		    slot * PCTRIE_UNITLEVEL(node->pn_clev);
 descend:
 		KASSERT(node->pn_clev > 0,
 		    ("pctrie_lookup_ge: pushing leaf's parent"));
@@ -599,7 +598,6 @@ uint64_t *
 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
 {
 	struct pctrie_node *stack[PCTRIE_LIMIT];
-	uint64_t inc;
 	uint64_t *m;
 	struct pctrie_node *child, *node;
 #ifdef INVARIANTS
@@ -670,35 +668,22 @@ ascend:
 		} else if (child != NULL)
 			goto descend;
 
-		/*
-		 * Look for an available edge or value within the current
-		 * bisection node.
-		 */
-		if (slot > 0) {
-			inc = PCTRIE_UNITLEVEL(node->pn_clev);
-			index |= inc - 1;
-			do {
-				index -= inc;
-				slot--;
-				child = pctrie_node_load(&node->pn_child[slot],
-				    NULL, PCTRIE_LOCKED);
-				if (pctrie_isleaf(child)) {
-					m = pctrie_toval(child);
-					KASSERT(*m <= index,
-					    ("pctrie_lookup_le: leaf > index"));
-					return (m);
-				} else if (child != NULL)
-					goto descend;
-			} while (slot > 0);
+		/* Find the last set bit among the first slot bits. */
+		slot = fls(node->pn_popmap & ((1 << slot) - 1)) - 1;
+		if (slot < 0) {
+			/*
+			 * A value or edge smaller than the search slot is not
+			 * found in the current node; ascend to the next
+			 * higher-level node.
+			 */
+			goto ascend;
 		}
-		KASSERT(child == NULL || pctrie_isleaf(child),
-		    ("pctrie_lookup_le: child is radix node"));
-
-		/*
-		 * If a value or edge smaller than the search slot is not found
-		 * in the current node, ascend to the next higher-level node.
-		 */
-		goto ascend;
+		child = pctrie_node_load(&node->pn_child[slot],
+		    NULL, PCTRIE_LOCKED);
+		if (pctrie_isleaf(child))
+			return (pctrie_toval(child));
+		index = pctrie_trimkey(index, node->pn_clev + 1) +
+		    (slot + 1) * PCTRIE_UNITLEVEL(node->pn_clev) - 1;
 descend:
 		KASSERT(node->pn_clev > 0,
 		    ("pctrie_lookup_le: pushing leaf's parent"));
@@ -718,7 +703,7 @@ pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
 {
 	struct pctrie_node *node, *parent, *tmp;
 	uint64_t *m;
-	int i, slot;
+	int slot;
 
 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
 	if (pctrie_isleaf(node)) {
@@ -739,19 +724,22 @@ pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
 			m = pctrie_toval(tmp);
 			if (*m != index)
 				panic("%s: invalid key found", __func__);
+			KASSERT((node->pn_popmap & (1 << slot)) != 0,
+			    ("%s: bad popmap slot %d in node %p",
+			    __func__, slot, node));
+			node->pn_popmap ^= 1 << slot;
 			pctrie_node_store(&node->pn_child[slot], NULL,
 			    PCTRIE_LOCKED);
-			node->pn_count--;
-			if (node->pn_count > 1)
+			if (!powerof2(node->pn_popmap))
 				break;
-			for (i = 0; i < PCTRIE_COUNT; i++) {
-				tmp = pctrie_node_load(&node->pn_child[i],
-				    NULL, PCTRIE_LOCKED);
-				if (tmp != NULL)
-					break;
-			}
+			KASSERT(node->pn_popmap != 0,
+			    ("%s: bad popmap all zeroes", __func__));
+			slot = ffs(node->pn_popmap) - 1;
+			tmp = pctrie_node_load(&node->pn_child[slot],
+			    NULL, PCTRIE_LOCKED);
 			KASSERT(tmp != NULL,
-			    ("%s: invalid node configuration", __func__));
+			    ("%s: bad popmap slot %d in node %p",
+			    __func__, slot, node));
 			if (parent == NULL)
 				pctrie_root_store(ptree, tmp, PCTRIE_LOCKED);
 			else {
@@ -767,8 +755,7 @@ pctrie_remove(struct pctrie *ptree, uint64_t index, pctrie_free_t freefn)
 			 * The child is still valid and we can not zero the
 			 * pointer until all SMR references are gone.
 			 */
-			node->pn_count--;
-			pctrie_node_put(ptree, node, freefn, i);
+			pctrie_node_put(ptree, node, freefn);
 			break;
 		}
 		parent = node;
@@ -801,22 +788,23 @@ pctrie_reclaim_allnodes(struct pctrie *ptree, pctrie_free_t freefn)
 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
 {
 	struct pctrie_node *node, *tmp;
-	int i;
+	int slot;
+	pn_popmap_t popmap;
 
         if (!have_addr)
                 return;
 	node = (struct pctrie_node *)addr;
-	db_printf("node %p, owner %jx, children count %u, level %u:\n",
-	    (void *)node, (uintmax_t)node->pn_owner, node->pn_count,
+	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
+	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
 	    node->pn_clev);
-	for (i = 0; i < PCTRIE_COUNT; i++) {
-		tmp = pctrie_node_load(&node->pn_child[i], NULL,
+	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
+		slot = ffs(popmap) - 1;
+		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
 		    PCTRIE_UNSERIALIZED);
-		if (tmp != NULL)
-			db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
-			    i, (void *)tmp,
-			    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
-			    node->pn_clev);
+		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
+		    slot, (void *)tmp,
+		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
+		    node->pn_clev);
 	}
 }
 #endif /* DDB */
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
index b3d0d92f9969..d2cd2c2536fd 100644
--- a/sys/vm/vm_radix.c
+++ b/sys/vm/vm_radix.c
@@ -91,6 +91,18 @@ __FBSDID("$FreeBSD$");
 #define	VM_RADIX_LIMIT							\
 	(howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
 
+#if VM_RADIX_WIDTH == 3
+typedef uint8_t rn_popmap_t;
+#elif VM_RADIX_WIDTH == 4
+typedef uint16_t rn_popmap_t;
+#elif VM_RADIX_WIDTH == 5
+typedef uint32_t rn_popmap_t;
+#else
+#error Unsupported width
+#endif
+_Static_assert(sizeof(rn_popmap_t) <= sizeof(int),
+    "rn_popmap_t too wide");
+
 /* Flag bits stored in node pointers. */
 #define	VM_RADIX_ISLEAF	0x1
 #define	VM_RADIX_FLAGS	0x1
@@ -107,9 +119,8 @@ typedef SMR_POINTER(struct vm_radix_node *) smrnode_t;
 
 struct vm_radix_node {
 	vm_pindex_t	rn_owner;			/* Owner of record. */
-	uint16_t	rn_count;			/* Valid children. */
+	rn_popmap_t	rn_popmap;			/* Valid children. */
 	uint8_t		rn_clev;			/* Current level. */
-	int8_t		rn_last;			/* zero last ptr. */
 	smrnode_t	rn_child[VM_RADIX_COUNT];	/* Child nodes. */
 };
 
@@ -152,13 +163,12 @@ vm_radix_node_get(vm_pindex_t index, uint16_t clevel)
 	 * has exited so lookup can not return false negatives.  It is done
 	 * here because it will be cache-cold in the dtor callback.
 	 */
-	if (rnode->rn_last != 0) {
-		vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1],
+	if (rnode->rn_popmap != 0) {
+		vm_radix_node_store(&rnode->rn_child[ffs(rnode->rn_popmap) - 1],
 		    NULL, UNSERIALIZED);
-		rnode->rn_last = 0;
+		rnode->rn_popmap = 0;
 	}
 	rnode->rn_owner = vm_radix_trimkey(index, clevel + 1);
-	rnode->rn_count = 2;
 	rnode->rn_clev = clevel;
 	return (rnode);
 }
@@ -167,23 +177,21 @@ vm_radix_node_get(vm_pindex_t index, uint16_t clevel)
  * Free radix node.
  */
 static __inline void
-vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
+vm_radix_node_put(struct vm_radix_node *rnode)
 {
 #ifdef INVARIANTS
 	int slot;
 
-	KASSERT(rnode->rn_count == 0,
-	    ("vm_radix_node_put: rnode %p has %d children", rnode,
-	    rnode->rn_count));
+	KASSERT(powerof2(rnode->rn_popmap),
+	    ("vm_radix_node_put: rnode %p has too many children %04x", rnode,
+	    rnode->rn_popmap));
 	for (slot = 0; slot < VM_RADIX_COUNT; slot++) {
-		if (slot == last)
+		if ((rnode->rn_popmap & (1 << slot)) != 0)
 			continue;
 		KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
 		    NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
 	}
 #endif
-	/* Off by one so a freshly zero'd node is not assigned to. */
-	rnode->rn_last = last + 1;
 	uma_zfree_smr(vm_radix_node_zone, rnode);
 }
 
@@ -284,6 +292,9 @@ vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
 	slot = vm_radix_slot(index, clev);
 	vm_radix_node_store(&rnode->rn_child[slot],
 	    vm_radix_toleaf(page), access);
+	rnode->rn_popmap ^= 1 << slot;
+	KASSERT((rnode->rn_popmap & (1 << slot)) != 0,
+	    ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode));
 }
 
 /*
@@ -330,19 +341,19 @@ vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
 	struct vm_radix_node *child;
 	int slot;
 
-	KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
-	    ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
-	for (slot = 0; rnode->rn_count != 0; slot++) {
+	while (rnode->rn_popmap != 0) {
+		slot = ffs(rnode->rn_popmap) - 1;
 		child = vm_radix_node_load(&rnode->rn_child[slot],
 		    UNSERIALIZED);
-		if (child == NULL)
-			continue;
+		KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
+		    __func__, slot, rnode));
 		if (!vm_radix_isleaf(child))
 			vm_radix_reclaim_allnodes_int(child);
-		vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
-		rnode->rn_count--;
+		rnode->rn_popmap ^= 1 << slot;
+		vm_radix_node_store(&rnode->rn_child[slot], NULL,
+		    UNSERIALIZED);
 	}
-	vm_radix_node_put(rnode, -1);
+	vm_radix_node_put(rnode);
 }
 
 #ifndef UMA_MD_SMALL_ALLOC
@@ -430,7 +441,6 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
 		parentp = &rnode->rn_child[slot];
 		tmp = vm_radix_node_load(parentp, LOCKED);
 		if (tmp == NULL) {
-			rnode->rn_count++;
 			vm_radix_addpage(rnode, index, rnode->rn_clev, page,
 			    LOCKED);
 			return (0);
@@ -452,6 +462,7 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
 	/* These writes are not yet visible due to ordering. */
 	vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
 	vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
+	tmp->rn_popmap ^= 1 << slot;
 	/* Serializing write to make the above visible. */
 	vm_radix_node_store(parentp, tmp, LOCKED);
 
@@ -522,7 +533,6 @@ vm_page_t
 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
 {
 	struct vm_radix_node *stack[VM_RADIX_LIMIT];
-	vm_pindex_t inc;
 	vm_page_t m;
 	struct vm_radix_node *child, *rnode;
 #ifdef INVARIANTS
@@ -589,35 +599,23 @@ ascend:
 		} else if (child != NULL)
 			goto descend;
 
-		/*
-		 * Look for an available edge or page within the current
-		 * bisection node.
-		 */
-                if (slot < (VM_RADIX_COUNT - 1)) {
-			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
-			index = vm_radix_trimkey(index, rnode->rn_clev);
-			do {
-				index += inc;
-				slot++;
-				child = vm_radix_node_load(&rnode->rn_child[slot],
-				    LOCKED);
-				if (vm_radix_isleaf(child)) {
-					m = vm_radix_topage(child);
-					KASSERT(m->pindex >= index,
-					    ("vm_radix_lookup_ge: leaf<index"));
-					return (m);
-				} else if (child != NULL)
-					goto descend;
-			} while (slot < (VM_RADIX_COUNT - 1));
+		/* Find the first set bit beyond the first slot+1 bits. */
+		slot = ffs(rnode->rn_popmap & (-2 << slot)) - 1;
+		if (slot < 0) {
+			/*
+			 * A page or edge greater than the search slot is not
+			 * found in the current node; ascend to the next
+			 * higher-level node.
+			 */
+			goto ascend;
 		}
-		KASSERT(child == NULL || vm_radix_isleaf(child),
-		    ("vm_radix_lookup_ge: child is radix node"));
-
-		/*
-		 * If a page or edge greater than the search slot is not found
-		 * in the current node, ascend to the next higher-level node.
-		 */
-		goto ascend;
+		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+		KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
+		    __func__, slot, rnode));
+		if (vm_radix_isleaf(child))
+			return (vm_radix_topage(child));
+		index = vm_radix_trimkey(index, rnode->rn_clev + 1) +
+		    slot * VM_RADIX_UNITLEVEL(rnode->rn_clev);
 descend:
 		KASSERT(rnode->rn_clev > 0,
 		    ("vm_radix_lookup_ge: pushing leaf's parent"));
@@ -635,7 +633,6 @@ vm_page_t
 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
 {
 	struct vm_radix_node *stack[VM_RADIX_LIMIT];
-	vm_pindex_t inc;
 	vm_page_t m;
 	struct vm_radix_node *child, *rnode;
 #ifdef INVARIANTS
@@ -704,35 +701,23 @@ ascend:
 		} else if (child != NULL)
 			goto descend;
 
-		/*
-		 * Look for an available edge or page within the current
-		 * bisection node.
-		 */
-		if (slot > 0) {
-			inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
-			index |= inc - 1;
-			do {
-				index -= inc;
-				slot--;
-				child = vm_radix_node_load(&rnode->rn_child[slot],
-				    LOCKED);
-				if (vm_radix_isleaf(child)) {
-					m = vm_radix_topage(child);
-					KASSERT(m->pindex <= index,
-					    ("vm_radix_lookup_le: leaf>index"));
-					return (m);
-				} else if (child != NULL)
-					goto descend;
-			} while (slot > 0);
+		/* Find the last set bit among the first slot bits. */
+		slot = fls(rnode->rn_popmap & ((1 << slot) - 1)) - 1;
+		if (slot < 0) {
+			/*
+			 * A page or edge smaller than the search slot is not
+			 * found in the current node; ascend to the next
+			 * higher-level node.
+			 */
+			goto ascend;
 		}
-		KASSERT(child == NULL || vm_radix_isleaf(child),
-		    ("vm_radix_lookup_le: child is radix node"));
-
-		/*
-		 * If a page or edge smaller than the search slot is not found
-		 * in the current node, ascend to the next higher-level node.
-		 */
-		goto ascend;
+		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+		KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
+		    __func__, slot, rnode));
+		if (vm_radix_isleaf(child))
+			return (vm_radix_topage(child));
+		index = vm_radix_trimkey(index, rnode->rn_clev + 1) +
+		    (slot + 1) * VM_RADIX_UNITLEVEL(rnode->rn_clev) - 1;
 descend:
 		KASSERT(rnode->rn_clev > 0,
 		    ("vm_radix_lookup_le: pushing leaf's parent"));
@@ -752,7 +737,7 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
 {
 	struct vm_radix_node *rnode, *parent, *tmp;
 	vm_page_t m;
-	int i, slot;
+	int slot;
 
 	rnode = vm_radix_root_load(rtree, LOCKED);
 	if (vm_radix_isleaf(rnode)) {
@@ -772,19 +757,21 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
 			m = vm_radix_topage(tmp);
 			if (m->pindex != index)
 				return (NULL);
+			KASSERT((rnode->rn_popmap & (1 << slot)) != 0,
+			    ("%s: bad popmap slot %d in rnode %p",
+			    __func__, slot, rnode));
+			rnode->rn_popmap ^= 1 << slot;
 			vm_radix_node_store(
 			    &rnode->rn_child[slot], NULL, LOCKED);
-			rnode->rn_count--;
-			if (rnode->rn_count > 1)
+			if (!powerof2(rnode->rn_popmap))
 				return (m);
-			for (i = 0; i < VM_RADIX_COUNT; i++) {
-				tmp = vm_radix_node_load(&rnode->rn_child[i],
-				    LOCKED);
-				if (tmp != NULL)
-					break;
-			}
+			KASSERT(rnode->rn_popmap != 0,
+			    ("%s: bad popmap all zeroes", __func__));
+			slot = ffs(rnode->rn_popmap) - 1;
+			tmp = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
 			KASSERT(tmp != NULL,
-			    ("%s: invalid node configuration", __func__));
+			    ("%s: bad popmap slot %d in rnode %p",
+			    __func__, slot, rnode));
 			if (parent == NULL)
 				vm_radix_root_store(rtree, tmp, LOCKED);
 			else {
@@ -799,8 +786,7 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
 			 * The child is still valid and we can not zero the
 			 * pointer until all smr references are gone.
 			 */
-			rnode->rn_count--;
-			vm_radix_node_put(rnode, i);
+			vm_radix_node_put(rnode);
 			return (m);
 		}
 		parent = rnode;
@@ -880,21 +866,22 @@ vm_radix_wait(void)
 DB_SHOW_COMMAND(radixnode, db_show_radixnode)
 {
 	struct vm_radix_node *rnode, *tmp;
-	int i;
+	int slot;
+	rn_popmap_t popmap;
 
         if (!have_addr)
                 return;
 	rnode = (struct vm_radix_node *)addr;
-	db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
-	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
+	db_printf("radixnode %p, owner %jx, children popmap %04x, level %u:\n",
+	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_popmap,
 	    rnode->rn_clev);
-	for (i = 0; i < VM_RADIX_COUNT; i++) {
-		tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
-		if (tmp != NULL)
-			db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
-			    i, (void *)tmp,
-			    vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
-			    rnode->rn_clev);
+	for (popmap = rnode->rn_popmap; popmap != 0; popmap ^= 1 << slot) {
+		slot = ffs(popmap) - 1;
+		tmp = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
+		db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
+		    slot, (void *)tmp,
+		    vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
+		    rnode->rn_clev);
 	}
 }
 #endif /* DDB */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202307071611.367GB2EM040820>