Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 19 Jul 2023 14:45:21 GMT
From:      Doug Moore <dougm@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 6f251ef228e6 - main - radix_trie: simplify ge, le lookups
Message-ID:  <202307191445.36JEjLsT049967@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by dougm:

URL: https://cgit.FreeBSD.org/src/commit/?id=6f251ef228e6ea3891cd7364c1e6d161297a2f90

commit 6f251ef228e6ea3891cd7364c1e6d161297a2f90
Author:     Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2023-07-19 14:43:31 +0000
Commit:     Doug Moore <dougm@FreeBSD.org>
CommitDate: 2023-07-19 14:43:31 +0000

    radix_trie: simplify ge, le lookups
    
    Replace the implementations of lookup_le and lookup_ge with ones
    that do not use a stack or climb back up the tree, and instead
    exploit the popmap field to quickly identify the place to resume
    searching if the straightforward indexed search fails.
    
    The code size of the original functions shrinks by a combined 160
    bytes on amd64, and the cumulative cycle count per invocation of
    the two functions together is reduced 20% in a buildworld test.
    
    Reviewed by:    alc, markj
    Tested by:      pho
    Differential Revision:  https://reviews.freebsd.org/D40936
---
 sys/kern/subr_pctrie.c | 290 ++++++++++++++++++++-----------------------------
 sys/vm/vm_radix.c      | 285 +++++++++++++++++++-----------------------------
 2 files changed, 228 insertions(+), 347 deletions(-)

diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index ae8408a6e1ef..4cd7f4b085ba 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -472,211 +472,151 @@ pctrie_lookup_unlocked(struct pctrie *ptree, uint64_t index, smr_t smr)
 }
 
 /*
- * Look up the nearest entry at a position bigger than or equal to index,
- * assuming access is externally synchronized by a lock.
+ * Returns the value with the least index that is greater than or equal to the
+ * specified index, or NULL if there are no such values.
+ *
+ * Requires that access be externally synchronized by a lock.
  */
 uint64_t *
 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
 {
-	struct pctrie_node *stack[PCTRIE_LIMIT];
+	struct pctrie_node *node, *succ;
 	uint64_t *m;
-	struct pctrie_node *child, *node;
-#ifdef INVARIANTS
-	int loops = 0;
-#endif
-	unsigned tos;
 	int slot;
 
+	/*
+	 * Descend the trie as if performing an ordinary lookup for the
+	 * specified value.  However, unlike an ordinary lookup, as we descend
+	 * the trie, we use "succ" to remember the last branching-off point,
+	 * that is, the interior node under which the least value that is both
+	 * outside our current path down the trie and greater than the specified
+	 * index resides.  (The node's popmap makes it fast and easy to
+	 * recognize a branching-off point.)  If our ordinary lookup fails to
+	 * yield a value that is greater than or equal to the specified index,
+	 * then we will exit this loop and perform a lookup starting from
+	 * "succ".  If "succ" is not NULL, then that lookup is guaranteed to
+	 * succeed.
+	 */
 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
-	if (node == NULL)
-		return (NULL);
-	else if (pctrie_isleaf(node)) {
-		m = pctrie_toval(node);
-		if (*m >= index)
-			return (m);
-		else
-			return (NULL);
-	}
-	tos = 0;
-	for (;;) {
-		/*
-		 * If the keys differ before the current bisection node,
-		 * then the search key might rollback to the earliest
-		 * available bisection node or to the smallest key
-		 * in the current node (if the owner is greater than the
-		 * search key).
-		 */
-		if (pctrie_keybarr(node, index)) {
-			if (index > node->pn_owner) {
-ascend:
-				KASSERT(++loops < 1000,
-				    ("pctrie_lookup_ge: too many loops"));
-
-				/*
-				 * Pop nodes from the stack until either the
-				 * stack is empty or a node that could have a
-				 * matching descendant is found.
-				 */
-				do {
-					if (tos == 0)
-						return (NULL);
-					node = stack[--tos];
-				} while (pctrie_slot(index,
-				    node->pn_clev) == (PCTRIE_COUNT - 1));
-
-				/*
-				 * The following computation cannot overflow
-				 * because index's slot at the current level
-				 * is less than PCTRIE_COUNT - 1.
-				 */
-				index = pctrie_trimkey(index,
-				    node->pn_clev);
-				index += PCTRIE_UNITLEVEL(node->pn_clev);
-			} else
-				index = node->pn_owner;
-			KASSERT(!pctrie_keybarr(node, index),
-			    ("pctrie_lookup_ge: keybarr failed"));
-		}
-		slot = pctrie_slot(index, node->pn_clev);
-		child = pctrie_node_load(&node->pn_child[slot], NULL,
-		    PCTRIE_LOCKED);
-		if (pctrie_isleaf(child)) {
-			m = pctrie_toval(child);
+	succ = NULL;
+	while (node != NULL) {
+		if (pctrie_isleaf(node)) {
+			m = pctrie_toval(node);
 			if (*m >= index)
 				return (m);
-		} else if (child != NULL)
-			goto descend;
-
-		/* Find the first set bit beyond the first slot+1 bits. */
-		slot = ffs(node->pn_popmap & (-2 << slot)) - 1;
-		if (slot < 0) {
+			break;
+		}
+		if (pctrie_keybarr(node, index)) {
 			/*
-			 * A value or edge greater than the search slot is not
-			 * found in the current node; ascend to the next
-			 * higher-level node.
+			 * If all values in this subtree are > index, then the
+			 * least value in this subtree is the answer.
 			 */
-			goto ascend;
+			if (node->pn_owner > index)
+				succ = node;
+			break;
 		}
-		child = pctrie_node_load(&node->pn_child[slot],
-		    NULL, PCTRIE_LOCKED);
-		KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
-		    __func__, slot, node));
-		if (pctrie_isleaf(child))
-			return (pctrie_toval(child));
-		index = pctrie_trimkey(index, node->pn_clev + 1) +
-		    slot * PCTRIE_UNITLEVEL(node->pn_clev);
-descend:
-		KASSERT(node->pn_clev > 0,
-		    ("pctrie_lookup_ge: pushing leaf's parent"));
-		KASSERT(tos < PCTRIE_LIMIT,
-		    ("pctrie_lookup_ge: stack overflow"));
-		stack[tos++] = node;
-		node = child;
+		slot = pctrie_slot(index, node->pn_clev);
+
+		/*
+		 * Just in case the next search step leads to a subtree of all
+		 * values < index, check popmap to see if a next bigger step, to
+		 * a subtree of all pages with values > index, is available.  If
+		 * so, remember to restart the search here.
+		 */
+		if ((node->pn_popmap >> slot) > 1)
+			succ = node;
+		node = pctrie_node_load(&node->pn_child[slot], NULL,
+		    PCTRIE_LOCKED);
 	}
+
+	/*
+	 * Restart the search from the last place visited in the subtree that
+	 * included some values > index, if there was such a place.
+	 */
+	if (succ == NULL)
+		return (NULL);
+	if (succ != node) {
+		/*
+		 * Take a step to the next bigger sibling of the node chosen
+		 * last time.  In that subtree, all values > index.
+		 */
+		slot = pctrie_slot(index, succ->pn_clev) + 1;
+		KASSERT((succ->pn_popmap >> slot) != 0,
+		    ("%s: no popmap siblings past slot %d in node %p",
+		    __func__, slot, succ));
+		slot += ffs(succ->pn_popmap >> slot) - 1;
+		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
+		    PCTRIE_LOCKED);
+	}
+
+	/* 
+	 * Find the value in the subtree rooted at "succ" with the least index.
+	 */
+	while (!pctrie_isleaf(succ)) {
+		KASSERT(succ->pn_popmap != 0,
+		    ("%s: no popmap children in node %p",  __func__, succ));
+		slot = ffs(succ->pn_popmap) - 1;
+		succ = pctrie_node_load(&succ->pn_child[slot], NULL,
+		    PCTRIE_LOCKED);
+	}
+	return (pctrie_toval(succ));
 }
 
 /*
- * Look up the nearest entry at a position less than or equal to index,
- * assuming access is externally synchronized by a lock.
+ * Returns the value with the greatest index that is less than or equal to the
+ * specified index, or NULL if there are no such values.
+ *
+ * Requires that access be externally synchronized by a lock.
  */
 uint64_t *
 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
 {
-	struct pctrie_node *stack[PCTRIE_LIMIT];
+	struct pctrie_node *node, *pred;
 	uint64_t *m;
-	struct pctrie_node *child, *node;
-#ifdef INVARIANTS
-	int loops = 0;
-#endif
-	unsigned tos;
 	int slot;
 
+	/*
+	 * Mirror the implementation of pctrie_lookup_ge, described above.
+	 */
 	node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
-	if (node == NULL)
-		return (NULL);
-	else if (pctrie_isleaf(node)) {
-		m = pctrie_toval(node);
-		if (*m <= index)
-			return (m);
-		else
-			return (NULL);
-	}
-	tos = 0;
-	for (;;) {
-		/*
-		 * If the keys differ before the current bisection node,
-		 * then the search key might rollback to the earliest
-		 * available bisection node or to the largest key
-		 * in the current node (if the owner is smaller than the
-		 * search key).
-		 */
+	pred = NULL;
+	while (node != NULL) {
+		if (pctrie_isleaf(node)) {
+			m = pctrie_toval(node);
+			if (*m <= index)
+				return (m);
+			break;
+		}
 		if (pctrie_keybarr(node, index)) {
-			if (index > node->pn_owner) {
-				index = node->pn_owner + PCTRIE_COUNT *
-				    PCTRIE_UNITLEVEL(node->pn_clev);
-			} else {
-ascend:
-				KASSERT(++loops < 1000,
-				    ("pctrie_lookup_le: too many loops"));
-
-				/*
-				 * Pop nodes from the stack until either the
-				 * stack is empty or a node that could have a
-				 * matching descendant is found.
-				 */
-				do {
-					if (tos == 0)
-						return (NULL);
-					node = stack[--tos];
-				} while (pctrie_slot(index,
-				    node->pn_clev) == 0);
-
-				/*
-				 * The following computation cannot overflow
-				 * because index's slot at the current level
-				 * is greater than 0.
-				 */
-				index = pctrie_trimkey(index,
-				    node->pn_clev);
-			}
-			index--;
-			KASSERT(!pctrie_keybarr(node, index),
-			    ("pctrie_lookup_le: keybarr failed"));
+			if (node->pn_owner < index)
+				pred = node;
+			break;
 		}
 		slot = pctrie_slot(index, node->pn_clev);
-		child = pctrie_node_load(&node->pn_child[slot], NULL,
+		if ((node->pn_popmap & ((1 << slot) - 1)) != 0)
+			pred = node;
+		node = pctrie_node_load(&node->pn_child[slot], NULL,
+		    PCTRIE_LOCKED);
+	}
+	if (pred == NULL)
+		return (NULL);
+	if (pred != node) {
+		slot = pctrie_slot(index, pred->pn_clev);
+		KASSERT((pred->pn_popmap & ((1 << slot) - 1)) != 0,
+		    ("%s: no popmap siblings before slot %d in node %p",
+		    __func__, slot, pred));
+		slot = fls(pred->pn_popmap & ((1 << slot) - 1)) - 1;
+		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
+		    PCTRIE_LOCKED);
+	}
+	while (!pctrie_isleaf(pred)) {
+		KASSERT(pred->pn_popmap != 0,
+		    ("%s: no popmap children in node %p",  __func__, pred));
+		slot = fls(pred->pn_popmap) - 1;
+		pred = pctrie_node_load(&pred->pn_child[slot], NULL,
 		    PCTRIE_LOCKED);
-		if (pctrie_isleaf(child)) {
-			m = pctrie_toval(child);
-			if (*m <= index)
-				return (m);
-		} else if (child != NULL)
-			goto descend;
-
-		/* Find the last set bit among the first slot bits. */
-		slot = fls(node->pn_popmap & ((1 << slot) - 1)) - 1;
-		if (slot < 0) {
-			/*
-			 * A value or edge smaller than the search slot is not
-			 * found in the current node; ascend to the next
-			 * higher-level node.
-			 */
-			goto ascend;
-		}
-		child = pctrie_node_load(&node->pn_child[slot],
-		    NULL, PCTRIE_LOCKED);
-		if (pctrie_isleaf(child))
-			return (pctrie_toval(child));
-		index = pctrie_trimkey(index, node->pn_clev + 1) +
-		    (slot + 1) * PCTRIE_UNITLEVEL(node->pn_clev) - 1;
-descend:
-		KASSERT(node->pn_clev > 0,
-		    ("pctrie_lookup_le: pushing leaf's parent"));
-		KASSERT(tos < PCTRIE_LIMIT,
-		    ("pctrie_lookup_le: stack overflow"));
-		stack[tos++] = node;
-		node = child;
 	}
+	return (pctrie_toval(pred));
 }
 
 /*
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
index f6bdda70539b..6504d5031460 100644
--- a/sys/vm/vm_radix.c
+++ b/sys/vm/vm_radix.c
@@ -513,205 +513,146 @@ vm_radix_lookup_unlocked(struct vm_radix *rtree, vm_pindex_t index)
 }
 
 /*
- * Look up the nearest entry at a position greater than or equal to index.
+ * Returns the page with the least pindex that is greater than or equal to the
+ * specified pindex, or NULL if there are no such pages.
+ *
+ * Requires that access be externally synchronized by a lock.
  */
 vm_page_t
 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
 {
-	struct vm_radix_node *stack[VM_RADIX_LIMIT];
+	struct vm_radix_node *rnode, *succ;
 	vm_page_t m;
-	struct vm_radix_node *child, *rnode;
-#ifdef INVARIANTS
-	int loops = 0;
-#endif
-	int slot, tos;
+	int slot;
 
+	/*
+	 * Descend the trie as if performing an ordinary lookup for the page
+	 * with the specified pindex.  However, unlike an ordinary lookup, as we
+	 * descend the trie, we use "succ" to remember the last branching-off
+	 * point, that is, the interior node under which the page with the least
+	 * pindex that is both outside our current path down the trie and more
+	 * than the specified pindex resides.  (The node's popmap makes it fast
+	 * and easy to recognize a branching-off point.)  If our ordinary lookup
+	 * fails to yield a page with a pindex that is greater than or equal to
+	 * the specified pindex, then we will exit this loop and perform a
+	 * lookup starting from "succ".  If "succ" is not NULL, then that lookup
+	 * is guaranteed to succeed.
+	 */
 	rnode = vm_radix_root_load(rtree, LOCKED);
-	if (rnode == NULL)
-		return (NULL);
-	else if (vm_radix_isleaf(rnode)) {
-		m = vm_radix_topage(rnode);
-		if (m->pindex >= index)
-			return (m);
-		else
-			return (NULL);
-	}
-	tos = 0;
-	for (;;) {
-		/*
-		 * If the keys differ before the current bisection node,
-		 * then the search key might rollback to the earliest
-		 * available bisection node or to the smallest key
-		 * in the current node (if the owner is greater than the
-		 * search key).
-		 */
-		if (vm_radix_keybarr(rnode, index)) {
-			if (index > rnode->rn_owner) {
-ascend:
-				KASSERT(++loops < 1000,
-				    ("vm_radix_lookup_ge: too many loops"));
-
-				/*
-				 * Pop nodes from the stack until either the
-				 * stack is empty or a node that could have a
-				 * matching descendant is found.
-				 */
-				do {
-					if (tos == 0)
-						return (NULL);
-					rnode = stack[--tos];
-				} while (vm_radix_slot(index,
-				    rnode->rn_clev) == (VM_RADIX_COUNT - 1));
-
-				/*
-				 * The following computation cannot overflow
-				 * because index's slot at the current level
-				 * is less than VM_RADIX_COUNT - 1.
-				 */
-				index = vm_radix_trimkey(index,
-				    rnode->rn_clev);
-				index += VM_RADIX_UNITLEVEL(rnode->rn_clev);
-			} else
-				index = rnode->rn_owner;
-			KASSERT(!vm_radix_keybarr(rnode, index),
-			    ("vm_radix_lookup_ge: keybarr failed"));
-		}
-		slot = vm_radix_slot(index, rnode->rn_clev);
-		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
-		if (vm_radix_isleaf(child)) {
-			m = vm_radix_topage(child);
+	succ = NULL;
+	while (rnode != NULL) {
+		if (vm_radix_isleaf(rnode)) {
+			m = vm_radix_topage(rnode);
 			if (m->pindex >= index)
 				return (m);
-		} else if (child != NULL)
-			goto descend;
-
-		/* Find the first set bit beyond the first slot+1 bits. */
-		slot = ffs(rnode->rn_popmap & (-2 << slot)) - 1;
-		if (slot < 0) {
+			break;
+		}
+		if (vm_radix_keybarr(rnode, index)) {
 			/*
-			 * A page or edge greater than the search slot is not
-			 * found in the current node; ascend to the next
-			 * higher-level node.
+			 * If all pages in this subtree have pindex > index,
+			 * then the page in this subtree with the least pindex
+			 * is the answer.
 			 */
-			goto ascend;
+			if (rnode->rn_owner > index)
+				succ = rnode;
+			break;
 		}
-		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
-		KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
-		    __func__, slot, rnode));
-		if (vm_radix_isleaf(child))
-			return (vm_radix_topage(child));
-		index = vm_radix_trimkey(index, rnode->rn_clev + 1) +
-		    slot * VM_RADIX_UNITLEVEL(rnode->rn_clev);
-descend:
-		KASSERT(rnode->rn_clev > 0,
-		    ("vm_radix_lookup_ge: pushing leaf's parent"));
-		KASSERT(tos < VM_RADIX_LIMIT,
-		    ("vm_radix_lookup_ge: stack overflow"));
-		stack[tos++] = rnode;
-		rnode = child;
+		slot = vm_radix_slot(index, rnode->rn_clev);
+
+		/*
+		 * Just in case the next search step leads to a subtree of all
+		 * pages with pindex < index, check popmap to see if a next
+		 * bigger step, to a subtree of all pages with pindex > index,
+		 * is available.  If so, remember to restart the search here.
+		 */
+		if ((rnode->rn_popmap >> slot) > 1)
+			succ = rnode;
+		rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+	}
+
+	/*
+	 * Restart the search from the last place visited in the subtree that
+	 * included some pages with pindex > index, if there was such a place.
+	 */
+	if (succ == NULL)
+		return (NULL);
+	if (succ != rnode) {
+		/*
+		 * Take a step to the next bigger sibling of the node chosen
+		 * last time.  In that subtree, all pages have pindex > index.
+		 */
+		slot = vm_radix_slot(index, succ->rn_clev) + 1;
+		KASSERT((succ->rn_popmap >> slot) != 0,
+		    ("%s: no popmap siblings past slot %d in node %p",
+		    __func__, slot, succ));
+		slot += ffs(succ->rn_popmap >> slot) - 1;
+		succ = vm_radix_node_load(&succ->rn_child[slot], LOCKED);
 	}
+
+	/*
+	 * Find the page in the subtree rooted at "succ" with the least pindex.
+	 */
+	while (!vm_radix_isleaf(succ)) {
+		KASSERT(succ->rn_popmap != 0,
+		    ("%s: no popmap children in node %p",  __func__, succ));
+		slot = ffs(succ->rn_popmap) - 1;
+		succ = vm_radix_node_load(&succ->rn_child[slot], LOCKED);
+	}
+	return (vm_radix_topage(succ));
 }
 
 /*
- * Look up the nearest entry at a position less than or equal to index.
+ * Returns the page with the greatest pindex that is less than or equal to the
+ * specified pindex, or NULL if there are no such pages.
+ *
+ * Requires that access be externally synchronized by a lock.
  */
 vm_page_t
 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
 {
-	struct vm_radix_node *stack[VM_RADIX_LIMIT];
+	struct vm_radix_node *pred, *rnode;
 	vm_page_t m;
-	struct vm_radix_node *child, *rnode;
-#ifdef INVARIANTS
-	int loops = 0;
-#endif
-	int slot, tos;
+	int slot;
 
+	/*
+	 * Mirror the implementation of vm_radix_lookup_ge, described above.
+	 */
 	rnode = vm_radix_root_load(rtree, LOCKED);
-	if (rnode == NULL)
-		return (NULL);
-	else if (vm_radix_isleaf(rnode)) {
-		m = vm_radix_topage(rnode);
-		if (m->pindex <= index)
-			return (m);
-		else
-			return (NULL);
-	}
-	tos = 0;
-	for (;;) {
-		/*
-		 * If the keys differ before the current bisection node,
-		 * then the search key might rollback to the earliest
-		 * available bisection node or to the largest key
-		 * in the current node (if the owner is smaller than the
-		 * search key).
-		 */
-		if (vm_radix_keybarr(rnode, index)) {
-			if (index > rnode->rn_owner) {
-				index = rnode->rn_owner + VM_RADIX_COUNT *
-				    VM_RADIX_UNITLEVEL(rnode->rn_clev);
-			} else {
-ascend:
-				KASSERT(++loops < 1000,
-				    ("vm_radix_lookup_le: too many loops"));
-
-				/*
-				 * Pop nodes from the stack until either the
-				 * stack is empty or a node that could have a
-				 * matching descendant is found.
-				 */
-				do {
-					if (tos == 0)
-						return (NULL);
-					rnode = stack[--tos];
-				} while (vm_radix_slot(index,
-				    rnode->rn_clev) == 0);
-
-				/*
-				 * The following computation cannot overflow
-				 * because index's slot at the current level
-				 * is greater than 0.
-				 */
-				index = vm_radix_trimkey(index,
-				    rnode->rn_clev);
-			}
-			index--;
-			KASSERT(!vm_radix_keybarr(rnode, index),
-			    ("vm_radix_lookup_le: keybarr failed"));
-		}
-		slot = vm_radix_slot(index, rnode->rn_clev);
-		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
-		if (vm_radix_isleaf(child)) {
-			m = vm_radix_topage(child);
+	pred = NULL;
+	while (rnode != NULL) {
+		if (vm_radix_isleaf(rnode)) {
+			m = vm_radix_topage(rnode);
 			if (m->pindex <= index)
 				return (m);
-		} else if (child != NULL)
-			goto descend;
-
-		/* Find the last set bit among the first slot bits. */
-		slot = fls(rnode->rn_popmap & ((1 << slot) - 1)) - 1;
-		if (slot < 0) {
-			/*
-			 * A page or edge smaller than the search slot is not
-			 * found in the current node; ascend to the next
-			 * higher-level node.
-			 */
-			goto ascend;
+			break;
 		}
-		child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
-		KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
-		    __func__, slot, rnode));
-		if (vm_radix_isleaf(child))
-			return (vm_radix_topage(child));
-		index = vm_radix_trimkey(index, rnode->rn_clev + 1) +
-		    (slot + 1) * VM_RADIX_UNITLEVEL(rnode->rn_clev) - 1;
-descend:
-		KASSERT(rnode->rn_clev > 0,
-		    ("vm_radix_lookup_le: pushing leaf's parent"));
-		KASSERT(tos < VM_RADIX_LIMIT,
-		    ("vm_radix_lookup_le: stack overflow"));
-		stack[tos++] = rnode;
-		rnode = child;
+		if (vm_radix_keybarr(rnode, index)) {
+			if (rnode->rn_owner < index)
+				pred = rnode;
+			break;
+		}
+		slot = vm_radix_slot(index, rnode->rn_clev);
+		if ((rnode->rn_popmap & ((1 << slot) - 1)) != 0)
+			pred = rnode;
+		rnode = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+	}
+	if (pred == NULL)
+		return (NULL);
+	if (pred != rnode) {
+		slot = vm_radix_slot(index, pred->rn_clev);
+		KASSERT((pred->rn_popmap & ((1 << slot) - 1)) != 0,
+		    ("%s: no popmap siblings before slot %d in node %p",
+		    __func__, slot, pred));
+		slot = fls(pred->rn_popmap & ((1 << slot) - 1)) - 1;
+		pred = vm_radix_node_load(&pred->rn_child[slot], LOCKED);
+	}
+	while (!vm_radix_isleaf(pred)) {
+		KASSERT(pred->rn_popmap != 0,
+		    ("%s: no popmap children in node %p",  __func__, pred));
+		slot = fls(pred->rn_popmap) - 1;
+		pred = vm_radix_node_load(&pred->rn_child[slot], LOCKED);
 	}
+	return (vm_radix_topage(pred));
 }
 
 /*



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202307191445.36JEjLsT049967>