Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 30 Jul 2023 06:27:40 GMT
From:      Doug Moore <dougm@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 38f5cb1bfbe1 - main - radix_tree: redefine the clev field
Message-ID:  <202307300627.36U6ReV9064140@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by dougm:

URL: https://cgit.FreeBSD.org/src/commit/?id=38f5cb1bfbe11bad92ba0266251ff3b0b8fcda73

commit 38f5cb1bfbe11bad92ba0266251ff3b0b8fcda73
Author:     Doug Moore <dougm@FreeBSD.org>
AuthorDate: 2023-07-30 06:20:07 +0000
Commit:     Doug Moore <dougm@FreeBSD.org>
CommitDate: 2023-07-30 06:20:07 +0000

    radix_tree: redefine the clev field
    
    The clev field in the node struct is almost always multiplied by
    WIDTH; occasionally, it is incremented and then multiplied by
    WIDTH. Instructions can be saved by storing it always multiplied by
    WIDTH.
    
    For the computation of slot(), this just eliminates a
    multiplication. For trimkey(), where the caller always adds one to
    clev before passing it as an argument, this change has the caller, not
    the caller, do that. Trimkey() handles it not by adding WIDTH to the
    input parameter, but by shifting COUNT, and not 1. That produces the
    same result, and it relieves keybarr of the need to test to avoid
    shifting by more than 63 bits, since level is always <= 63.
    
    This takes 3 instrutions and 14 bytes out of the basic lookup loop on
    amd64.
    
    Reviewed by:    kib
    Tested by:      pho (as part of a larger change)
    Differential Revision:  https://reviews.freebsd.org/D41226
---
 sys/kern/subr_pctrie.c | 76 +++++++++++++++++++-----------------------------
 sys/vm/vm_radix.c      | 78 ++++++++++++++++++++------------------------------
 2 files changed, 60 insertions(+), 94 deletions(-)

diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index 6d45d9762a78..c08e4b5910a5 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -83,17 +83,13 @@ _Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
 #define	PCTRIE_FLAGS	(PCTRIE_ISLEAF)
 #define	PCTRIE_PAD	PCTRIE_FLAGS
 
-/* Returns one unit associated with specified level. */
-#define	PCTRIE_UNITLEVEL(lev)						\
-	((uint64_t)1 << ((lev) * PCTRIE_WIDTH))
-
 struct pctrie_node;
 typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
 
 struct pctrie_node {
 	uint64_t	pn_owner;			/* Owner of record. */
 	pn_popmap_t	pn_popmap;			/* Valid children. */
-	uint8_t		pn_clev;			/* Current level. */
+	uint8_t		pn_clev;			/* Level * WIDTH. */
 	smr_pctnode_t	pn_child[PCTRIE_COUNT];		/* Child nodes. */
 };
 
@@ -108,14 +104,14 @@ static __inline void pctrie_node_store(smr_pctnode_t *p, void *val,
 static __inline int
 pctrie_slot(uint64_t index, uint16_t level)
 {
-	return ((index >> (level * PCTRIE_WIDTH)) & PCTRIE_MASK);
+	return ((index >> level) & PCTRIE_MASK);
 }
 
-/* Computes the key (index) with the low-order 'level' radix-digits zeroed. */
+/* Computes the key (index) with the low-order 'level' + 1 radix-digits zeroed. */
 static __inline uint64_t
 pctrie_trimkey(uint64_t index, uint16_t level)
 {
-	return (index & -PCTRIE_UNITLEVEL(level));
+	return (index & -((uint64_t)PCTRIE_COUNT << level));
 }
 
 /*
@@ -124,7 +120,7 @@ pctrie_trimkey(uint64_t index, uint16_t level)
  */
 static struct pctrie_node *
 pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index,
-    uint16_t clevel)
+    uint64_t newind)
 {
 	struct pctrie_node *node;
 
@@ -142,8 +138,20 @@ pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t allocfn, uint64_t index,
 		    PCTRIE_NULL, PCTRIE_UNSERIALIZED);
 		node->pn_popmap = 0;
 	}
-	node->pn_owner = pctrie_trimkey(index, clevel + 1);
-	node->pn_clev = clevel;
+
+	/*
+	 * From the highest-order bit where the indexes differ,
+	 * compute the highest level in the trie where they differ.  Then,
+	 * compute the least index of this subtrie.
+	 */
+	KASSERT(index != newind, ("%s: passing the same key value %jx",
+	    __func__, (uintmax_t)index));
+	_Static_assert(sizeof(long long) >= sizeof(uint64_t),
+	    "uint64 too wide");
+	_Static_assert(sizeof(uint64_t) * NBBY <=
+	    (1 << (sizeof(node->pn_clev) * NBBY)), "pn_clev too narrow");
+	node->pn_clev = rounddown(flsll(index ^ newind) - 1, PCTRIE_WIDTH);
+	node->pn_owner = pctrie_trimkey(index, node->pn_clev);
 	return (node);
 }
 
@@ -259,37 +267,18 @@ pctrie_toval(struct pctrie_node *node)
  * Make 'child' a child of 'node'.
  */
 static __inline void
-pctrie_addnode(struct pctrie_node *node, uint64_t index, uint16_t clev,
+pctrie_addnode(struct pctrie_node *node, uint64_t index,
     struct pctrie_node *child, enum pctrie_access access)
 {
 	int slot;
 
-	slot = pctrie_slot(index, clev);
+	slot = pctrie_slot(index, node->pn_clev);
 	pctrie_node_store(&node->pn_child[slot], child, access);
 	node->pn_popmap ^= 1 << slot;
 	KASSERT((node->pn_popmap & (1 << slot)) != 0,
 	    ("%s: bad popmap slot %d in node %p", __func__, slot, node));
 }
 
-/*
- * Returns the level where two keys differ.
- * It cannot accept 2 equal keys.
- */
-static __inline uint16_t
-pctrie_keydiff(uint64_t index1, uint64_t index2)
-{
-
-	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
-	    __func__, (uintmax_t)index1));
-	CTASSERT(sizeof(long long) >= sizeof(uint64_t));
-
-	/*
-	 * From the highest-order bit where the indexes differ,
-	 * compute the highest level in the trie where they differ.
-	 */
-	return ((flsll(index1 ^ index2) - 1) / PCTRIE_WIDTH);
-}
-
 /*
  * Returns TRUE if it can be determined that key does not belong to the
  * specified node.  Otherwise, returns FALSE.
@@ -297,12 +286,8 @@ pctrie_keydiff(uint64_t index1, uint64_t index2)
 static __inline bool
 pctrie_keybarr(struct pctrie_node *node, uint64_t idx)
 {
-
-	if (node->pn_clev < PCTRIE_LIMIT) {
-		idx = pctrie_trimkey(idx, node->pn_clev + 1);
-		return (idx != node->pn_owner);
-	}
-	return (false);
+	idx = pctrie_trimkey(idx, node->pn_clev);
+	return (idx != node->pn_owner);
 }
 
 /*
@@ -366,7 +351,6 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
 	struct pctrie_node *leaf, *node, *parent;
 	smr_pctnode_t *parentp;
 	int slot;
-	uint16_t clev;
 
 	index = *val;
 	leaf = pctrie_toleaf(val);
@@ -383,8 +367,7 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
 				if (parent == NULL)
 					ptree->pt_root = leaf;
 				else
-					pctrie_addnode(parent, index,
-					    parent->pn_clev, leaf,
+					pctrie_addnode(parent, index, leaf,
 					    PCTRIE_LOCKED);
 				return (0);
 			}
@@ -411,13 +394,12 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, pctrie_alloc_t allocfn)
 	 */
 	parentp = (parent != NULL) ? &parent->pn_child[slot]:
 	    (smr_pctnode_t *)&ptree->pt_root;
-	clev = pctrie_keydiff(newind, index);
-	parent = pctrie_node_get(ptree, allocfn, index, clev);
+	parent = pctrie_node_get(ptree, allocfn, index, newind);
 	if (parent == NULL)
 		return (ENOMEM);
 	/* These writes are not yet visible due to ordering. */
-	pctrie_addnode(parent, index, clev, leaf, PCTRIE_UNSERIALIZED);
-	pctrie_addnode(parent, newind, clev, node, PCTRIE_UNSERIALIZED);
+	pctrie_addnode(parent, index, leaf, PCTRIE_UNSERIALIZED);
+	pctrie_addnode(parent, newind, node, PCTRIE_UNSERIALIZED);
 	/* Synchronize to make the above visible. */
 	pctrie_node_store(parentp, parent, PCTRIE_LOCKED);
 	return (0);
@@ -714,7 +696,7 @@ DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
 	node = (struct pctrie_node *)addr;
 	db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
 	    (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
-	    node->pn_clev);
+	    node->pn_clev / PCTRIE_WIDTH);
 	for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
 		slot = ffs(popmap) - 1;
 		tmp = pctrie_node_load(&node->pn_child[slot], NULL,
@@ -722,7 +704,7 @@ DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
 		db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
 		    slot, (void *)tmp,
 		    pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
-		    node->pn_clev);
+		    node->pn_clev / PCTRIE_WIDTH);
 	}
 }
 #endif /* DDB */
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
index 3c4ea9568108..f50f9e3605a1 100644
--- a/sys/vm/vm_radix.c
+++ b/sys/vm/vm_radix.c
@@ -107,10 +107,6 @@ _Static_assert(sizeof(rn_popmap_t) <= sizeof(int),
 #define	VM_RADIX_FLAGS	(VM_RADIX_ISLEAF)
 #define	VM_RADIX_PAD	VM_RADIX_FLAGS
 
-/* Returns one unit associated with specified level. */
-#define	VM_RADIX_UNITLEVEL(lev)						\
-	((vm_pindex_t)1 << ((lev) * VM_RADIX_WIDTH))
-
 enum vm_radix_access { SMR, LOCKED, UNSERIALIZED };
 
 struct vm_radix_node;
@@ -119,7 +115,7 @@ typedef SMR_POINTER(struct vm_radix_node *) smrnode_t;
 struct vm_radix_node {
 	vm_pindex_t	rn_owner;			/* Owner of record. */
 	rn_popmap_t	rn_popmap;			/* Valid children. */
-	uint8_t		rn_clev;			/* Current level. */
+	uint8_t		rn_clev;			/* Level * WIDTH. */
 	smrnode_t	rn_child[VM_RADIX_COUNT];	/* Child nodes. */
 };
 
@@ -135,21 +131,22 @@ static void vm_radix_node_store(smrnode_t *p, struct vm_radix_node *v,
 static __inline int
 vm_radix_slot(vm_pindex_t index, uint16_t level)
 {
-	return ((index >> (level * VM_RADIX_WIDTH)) & VM_RADIX_MASK);
+	return ((index >> level) & VM_RADIX_MASK);
 }
 
-/* Computes the key (index) with the low-order 'level' radix-digits zeroed. */
+/* Computes the key (index) with the low-order 'level' + 1 radix-digits
+ * zeroed. */
 static __inline vm_pindex_t
 vm_radix_trimkey(vm_pindex_t index, uint16_t level)
 {
-	return (index & -VM_RADIX_UNITLEVEL(level));
+	return (index & -((vm_pindex_t)VM_RADIX_COUNT << level));
 }
 
 /*
  * Allocate a radix node.
  */
 static struct vm_radix_node *
-vm_radix_node_get(vm_pindex_t index, uint16_t clevel)
+vm_radix_node_get(vm_pindex_t index, vm_pindex_t newind)
 {
 	struct vm_radix_node *rnode;
 
@@ -167,8 +164,20 @@ vm_radix_node_get(vm_pindex_t index, uint16_t clevel)
 		    VM_RADIX_NULL, UNSERIALIZED);
 		rnode->rn_popmap = 0;
 	}
-	rnode->rn_owner = vm_radix_trimkey(index, clevel + 1);
-	rnode->rn_clev = clevel;
+
+	/*
+	 * From the highest-order bit where the indexes differ,
+	 * compute the highest level in the trie where they differ.  Then,
+	 * compute the least index of this subtrie.
+	 */
+	KASSERT(index != newind, ("%s: passing the same key value %jx",
+	    __func__, (uintmax_t)index));
+	_Static_assert(sizeof(long long) >= sizeof(vm_pindex_t),
+	    "vm_pindex_t too wide");
+	_Static_assert(sizeof(vm_pindex_t) * NBBY <=
+	    (1 << (sizeof(rnode->rn_clev) * NBBY)), "rn_clev too narrow");
+	rnode->rn_clev = rounddown(flsll(index ^ newind) - 1, VM_RADIX_WIDTH);
+	rnode->rn_owner = vm_radix_trimkey(index, rnode->rn_clev);
 	return (rnode);
 }
 
@@ -284,37 +293,18 @@ vm_radix_topage(struct vm_radix_node *rnode)
  * Make 'child' a child of 'rnode'.
  */
 static __inline void
-vm_radix_addnode(struct vm_radix_node *rnode, vm_pindex_t index, uint16_t clev,
+vm_radix_addnode(struct vm_radix_node *rnode, vm_pindex_t index,
     struct vm_radix_node *child, enum vm_radix_access access)
 {
 	int slot;
 
-	slot = vm_radix_slot(index, clev);
+	slot = vm_radix_slot(index, rnode->rn_clev);
 	vm_radix_node_store(&rnode->rn_child[slot], child, access);
 	rnode->rn_popmap ^= 1 << slot;
 	KASSERT((rnode->rn_popmap & (1 << slot)) != 0,
 	    ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode));
 }
 
-/*
- * Returns the level where two keys differ.
- * It cannot accept 2 equal keys.
- */
-static __inline uint16_t
-vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
-{
-
-	KASSERT(index1 != index2, ("%s: passing the same key value %jx",
-	    __func__, (uintmax_t)index1));
-	CTASSERT(sizeof(long long) >= sizeof(vm_pindex_t));
-
-	/*
-	 * From the highest-order bit where the indexes differ,
-	 * compute the highest level in the trie where they differ.
-	 */
-	return ((flsll(index1 ^ index2) - 1) / VM_RADIX_WIDTH);
-}
-
 /*
  * Returns TRUE if it can be determined that key does not belong to the
  * specified rnode.  Otherwise, returns FALSE.
@@ -322,12 +312,8 @@ vm_radix_keydiff(vm_pindex_t index1, vm_pindex_t index2)
 static __inline bool
 vm_radix_keybarr(struct vm_radix_node *rnode, vm_pindex_t idx)
 {
-
-	if (rnode->rn_clev < VM_RADIX_LIMIT) {
-		idx = vm_radix_trimkey(idx, rnode->rn_clev + 1);
-		return (idx != rnode->rn_owner);
-	}
-	return (false);
+	idx = vm_radix_trimkey(idx, rnode->rn_clev);
+	return (idx != rnode->rn_owner);
 }
 
 /*
@@ -420,7 +406,6 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
 	struct vm_radix_node *leaf, *parent, *rnode;
 	smrnode_t *parentp;
 	int slot;
-	uint16_t clev;
 
 	index = page->pindex;
 	leaf = vm_radix_toleaf(page);
@@ -437,8 +422,8 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
 				if (parent == NULL)
 					rtree->rt_root = leaf;
 				else
-					vm_radix_addnode(parent, index,
-					    parent->rn_clev, leaf, LOCKED);
+					vm_radix_addnode(parent, index, leaf,
+					    LOCKED);
 				return (0);
 			}
 			newind = vm_radix_topage(rnode)->pindex;
@@ -463,13 +448,12 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
 	 */
 	parentp = (parent != NULL) ? &parent->rn_child[slot]:
 	    (smrnode_t *)&rtree->rt_root;
-	clev = vm_radix_keydiff(newind, index);
-	parent = vm_radix_node_get(index, clev);
+	parent = vm_radix_node_get(index, newind);
 	if (parent == NULL)
 		return (ENOMEM);
 	/* These writes are not yet visible due to ordering. */
-	vm_radix_addnode(parent, index, clev, leaf, UNSERIALIZED);
-	vm_radix_addnode(parent, newind, clev, rnode, UNSERIALIZED);
+	vm_radix_addnode(parent, index, leaf, UNSERIALIZED);
+	vm_radix_addnode(parent, newind, rnode, UNSERIALIZED);
 	/* Serializing write to make the above visible. */
 	vm_radix_node_store(parentp, parent, LOCKED);
 	return (0);
@@ -808,14 +792,14 @@ DB_SHOW_COMMAND(radixnode, db_show_radixnode)
 	rnode = (struct vm_radix_node *)addr;
 	db_printf("radixnode %p, owner %jx, children popmap %04x, level %u:\n",
 	    (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_popmap,
-	    rnode->rn_clev);
+	    rnode->rn_clev / VM_RADIX_WIDTH);
 	for (popmap = rnode->rn_popmap; popmap != 0; popmap ^= 1 << slot) {
 		slot = ffs(popmap) - 1;
 		tmp = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
 		db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
 		    slot, (void *)tmp,
 		    vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
-		    rnode->rn_clev);
+		    rnode->rn_clev / VM_RADIX_WIDTH);
 	}
 }
 #endif /* DDB */



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202307300627.36U6ReV9064140>