Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 3 Oct 2025 19:22:20 GMT
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 249ec85352b5 - main - nullfs: smr-protected hash lookup
Message-ID:  <202510031922.593JMK1d047368@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=249ec85352b5b69d70f71e3a329d16334ca71f59

commit 249ec85352b5b69d70f71e3a329d16334ca71f59
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2025-09-30 15:16:30 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2025-10-03 19:16:04 +0000

    nullfs: smr-protected hash lookup
    
    Vast majority of real-world contention on the hash comes from lookups,
    notably seen during highly parallel poudriere runs.
    
    Lockless lookup largely alleviates the problem.
    
    Reviewed by:            kib
    Tested by:              pho (previous version)
    Differential Revision:  https://reviews.freebsd.org/D38761
---
 sys/fs/nullfs/null.h       |  9 ++++---
 sys/fs/nullfs/null_subr.c  | 63 +++++++++++++++++++++++++++++++++-------------
 sys/fs/nullfs/null_vnops.c |  2 +-
 3 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index 0a93878c859f..dd6cb4f71f07 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -37,6 +37,9 @@
 
 #define	NULLM_CACHE	0x0001
 
+#include <sys/ck.h>
+#include <vm/uma.h>
+
 struct null_mount {
 	struct mount	*nullm_vfs;
 	struct vnode	*nullm_lowerrootvp;	/* Ref to lower root vnode */
@@ -50,7 +53,7 @@ struct null_mount {
  * A cache of vnode references
  */
 struct null_node {
-	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
+	CK_LIST_ENTRY(null_node) null_hash;	/* Hash list */
 	struct vnode	        *null_lowervp;	/* VREFed once */
 	struct vnode		*null_vnode;	/* Back pointer */
 	u_int			null_flags;
@@ -79,9 +82,7 @@ struct vnode *null_checkvp(struct vnode *vp, char *fil, int lno);
 
 extern struct vop_vector null_vnodeops;
 
-#ifdef MALLOC_DECLARE
-MALLOC_DECLARE(M_NULLFSNODE);
-#endif
+extern uma_zone_t null_node_zone;
 
 #ifdef NULLFS_DEBUG
 #define NULLFSDEBUG(format, args...) printf(format ,## args)
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
index 4db0bc475791..146d3bbdaedd 100644
--- a/sys/fs/nullfs/null_subr.c
+++ b/sys/fs/nullfs/null_subr.c
@@ -41,9 +41,14 @@
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
+#include <sys/smr.h>
 
 #include <fs/nullfs/null.h>
 
+#include <vm/uma.h>
+
+VFS_SMR_DECLARE;
+
 /*
  * Null layer cache:
  * Each cache entry holds a reference to the lower vnode
@@ -54,12 +59,12 @@
 
 #define	NULL_NHASH(vp) (&null_node_hashtbl[vfs_hash_index(vp) & null_hash_mask])
 
-static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
+static CK_LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
 static struct rwlock null_hash_lock;
 static u_long null_hash_mask;
 
 static MALLOC_DEFINE(M_NULLFSHASH, "nullfs_hash", "NULLFS hash table");
-MALLOC_DEFINE(M_NULLFSNODE, "nullfs_node", "NULLFS vnode private part");
+uma_zone_t __read_mostly null_node_zone;
 
 static void null_hashins(struct mount *, struct null_node *);
 
@@ -73,6 +78,10 @@ nullfs_init(struct vfsconf *vfsp)
 	null_node_hashtbl = hashinit(desiredvnodes, M_NULLFSHASH,
 	    &null_hash_mask);
 	rw_init(&null_hash_lock, "nullhs");
+	null_node_zone = uma_zcreate("nullfs node", sizeof(struct null_node),
+	    NULL, NULL, NULL, NULL, 0, UMA_ZONE_ZINIT);
+	VFS_SMR_ZONE_SET(null_node_zone);
+
 	return (0);
 }
 
@@ -80,6 +89,7 @@ int
 nullfs_uninit(struct vfsconf *vfsp)
 {
 
+	uma_zdestroy(null_node_zone);
 	rw_destroy(&null_hash_lock);
 	hashdestroy(null_node_hashtbl, M_NULLFSHASH, null_hash_mask);
 	return (0);
@@ -106,7 +116,7 @@ null_hashget_locked(struct mount *mp, struct vnode *lowervp)
 	 * reference count (but NOT the lower vnode's VREF counter).
 	 */
 	hd = NULL_NHASH(lowervp);
-	LIST_FOREACH(a, hd, null_hash) {
+	CK_LIST_FOREACH(a, hd, null_hash) {
 		if (a->null_lowervp != lowervp)
 			continue;
 		/*
@@ -129,17 +139,34 @@ struct vnode *
 null_hashget(struct mount *mp, struct vnode *lowervp)
 {
 	struct null_node_hashhead *hd;
+	struct null_node *a;
 	struct vnode *vp;
+	enum vgetstate vs;
 
-	hd = NULL_NHASH(lowervp);
-	if (LIST_EMPTY(hd))
-		return (NULL);
-
-	rw_rlock(&null_hash_lock);
-	vp = null_hashget_locked(mp, lowervp);
-	rw_runlock(&null_hash_lock);
+	ASSERT_VOP_LOCKED(lowervp, "null_hashget");
+	rw_assert(&null_hash_lock, RA_UNLOCKED);
 
-	return (vp);
+	vfs_smr_enter();
+	hd = NULL_NHASH(lowervp);
+	CK_LIST_FOREACH(a, hd, null_hash) {
+		if (a->null_lowervp != lowervp)
+			continue;
+		/*
+		 * See null_hashget_locked as to why the nullfs vnode can't be
+		 * doomed here.
+		 */
+		vp = NULLTOV(a);
+		VNPASS(!VN_IS_DOOMED(vp), vp);
+		if (vp->v_mount != mp)
+			continue;
+		vs = vget_prep_smr(vp);
+		vfs_smr_exit();
+		VNPASS(vs != VGET_NONE, vp);
+		vget_finish_ref(vp, vs);
+		return (vp);
+	}
+	vfs_smr_exit();
+	return (NULL);
 }
 
 static void
@@ -154,7 +181,7 @@ null_hashins(struct mount *mp, struct null_node *xp)
 
 	hd = NULL_NHASH(xp->null_lowervp);
 #ifdef INVARIANTS
-	LIST_FOREACH(oxp, hd, null_hash) {
+	CK_LIST_FOREACH(oxp, hd, null_hash) {
 		if (oxp->null_lowervp == xp->null_lowervp &&
 		    NULLTOV(oxp)->v_mount == mp) {
 			VNASSERT(0, NULLTOV(oxp),
@@ -162,7 +189,7 @@ null_hashins(struct mount *mp, struct null_node *xp)
 		}
 	}
 #endif
-	LIST_INSERT_HEAD(hd, xp, null_hash);
+	CK_LIST_INSERT_HEAD(hd, xp, null_hash);
 }
 
 static void
@@ -177,7 +204,7 @@ null_destroy_proto(struct vnode *vp, void *xp)
 	VI_UNLOCK(vp);
 	vgone(vp);
 	vput(vp);
-	free(xp, M_NULLFSNODE);
+	uma_zfree_smr(null_node_zone, xp);
 }
 
 /*
@@ -211,12 +238,12 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
 	 * Note that duplicate can only appear in hash if the lowervp is
 	 * locked LK_SHARED.
 	 */
-	xp = malloc(sizeof(struct null_node), M_NULLFSNODE, M_WAITOK);
+	xp = uma_zalloc_smr(null_node_zone, M_WAITOK);
 
 	error = getnewvnode("nullfs", mp, &null_vnodeops, &vp);
 	if (error) {
 		vput(lowervp);
-		free(xp, M_NULLFSNODE);
+		uma_zfree_smr(null_node_zone, xp);
 		return (error);
 	}
 
@@ -264,8 +291,8 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
 		return (error);
 	}
 
-	null_hashins(mp, xp);
 	vn_set_state(vp, VSTATE_CONSTRUCTED);
+	null_hashins(mp, xp);
 	rw_wunlock(&null_hash_lock);
 	*vpp = vp;
 
@@ -280,7 +307,7 @@ null_hashrem(struct null_node *xp)
 {
 
 	rw_wlock(&null_hash_lock);
-	LIST_REMOVE(xp, null_hash);
+	CK_LIST_REMOVE(xp, null_hash);
 	rw_wunlock(&null_hash_lock);
 }
 
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index 64e5b004abca..dd176b34e4eb 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -965,7 +965,7 @@ null_reclaim(struct vop_reclaim_args *ap)
 		vunref(lowervp);
 	else
 		vput(lowervp);
-	free(xp, M_NULLFSNODE);
+	uma_zfree_smr(null_node_zone, xp);
 
 	return (0);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202510031922.593JMK1d047368>