FreeBSD Mail Archives

Date:      Fri, 3 Oct 2025 19:22:21 GMT
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 641a58239520 - main - nullfs: avoid the interlock in null_lock with smr
Message-ID:  <202510031922.593JMLdx047401@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help


The branch main has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=641a58239520de9fc5a9077e9a709481cfc75dc0

commit 641a58239520de9fc5a9077e9a709481cfc75dc0
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2025-10-01 10:06:39 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2025-10-03 19:16:21 +0000

    nullfs: avoid the interlock in null_lock with smr
    
    This largely eliminates contention on the vnode interlock.
    
    Note this still does not scale, to be fixed(tm).
    
    Reviewed by:            kib
    Tested by:              pho (previous version)
    Differential Revision:  https://reviews.freebsd.org/D38761
---
 sys/fs/nullfs/null.h       |   1 +
 sys/fs/nullfs/null_vnops.c | 152 ++++++++++++++++++++++++++++-----------------
 2 files changed, 95 insertions(+), 58 deletions(-)

diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
index dd6cb4f71f07..aa7a689bec34 100644
--- a/sys/fs/nullfs/null.h
+++ b/sys/fs/nullfs/null.h
@@ -64,6 +64,7 @@ struct null_node {
 
 #define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
 #define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define	VTONULL_SMR(vp) ((struct null_node *)vn_load_v_data_smr(vp))
 #define	NULLTOV(xp) ((xp)->null_vnode)
 
 int nullfs_init(struct vfsconf *vfsp);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
index dd176b34e4eb..375b6aa27531 100644
--- a/sys/fs/nullfs/null_vnops.c
+++ b/sys/fs/nullfs/null_vnops.c
@@ -174,6 +174,8 @@
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/smr.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
@@ -185,6 +187,8 @@
 #include <vm/vm_object.h>
 #include <vm/vnode_pager.h>
 
+VFS_SMR_DECLARE;
+
 static int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
 SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 
 	&null_bug_bypass, 0, "");
@@ -768,75 +772,107 @@ null_rmdir(struct vop_rmdir_args *ap)
 }
 
 /*
- * We need to process our own vnode lock and then clear the
- * interlock flag as it applies only to our vnode, not the
- * vnodes below us on the stack.
+ * We need to process our own vnode lock and then clear the interlock flag as
+ * it applies only to our vnode, not the vnodes below us on the stack.
+ *
+ * We have to hold the vnode here to solve a potential reclaim race.  If we're
+ * forcibly vgone'd while we still have refs, a thread could be sleeping inside
+ * the lowervp's vop_lock routine.  When we vgone we will drop our last ref to
+ * the lowervp, which would allow it to be reclaimed.  The lowervp could then
+ * be recycled, in which case it is not legal to be sleeping in its VOP.  We
+ * prevent it from being recycled by holding the vnode here.
  */
+static struct vnode *
+null_lock_prep_with_smr(struct vop_lock1_args *ap)
+{
+	struct null_node *nn;
+	struct vnode *lvp;
+
+	vfs_smr_enter();
+
+	lvp = NULL;
+
+	nn = VTONULL_SMR(ap->a_vp);
+	if (__predict_true(nn != NULL)) {
+		lvp = nn->null_lowervp;
+		if (lvp != NULL && !vhold_smr(lvp))
+			lvp = NULL;
+	}
+
+	vfs_smr_exit();
+	return (lvp);
+}
+
+static struct vnode *
+null_lock_prep_with_interlock(struct vop_lock1_args *ap)
+{
+	struct null_node *nn;
+	struct vnode *lvp;
+
+	ASSERT_VI_LOCKED(ap->a_vp, __func__);
+
+	ap->a_flags &= ~LK_INTERLOCK;
+
+	lvp = NULL;
+
+	nn = VTONULL(ap->a_vp);
+	if (__predict_true(nn != NULL)) {
+		lvp = nn->null_lowervp;
+		if (lvp != NULL)
+			vholdnz(lvp);
+	}
+	VI_UNLOCK(ap->a_vp);
+	return (lvp);
+}
+
 static int
 null_lock(struct vop_lock1_args *ap)
 {
-	struct vnode *vp = ap->a_vp;
-	int flags;
-	struct null_node *nn;
 	struct vnode *lvp;
-	int error;
+	int error, flags;
 
-	if ((ap->a_flags & LK_INTERLOCK) == 0)
-		VI_LOCK(vp);
-	else
-		ap->a_flags &= ~LK_INTERLOCK;
-	flags = ap->a_flags;
-	nn = VTONULL(vp);
+	if (__predict_true((ap->a_flags & LK_INTERLOCK) == 0)) {
+		lvp = null_lock_prep_with_smr(ap);
+		if (__predict_false(lvp == NULL)) {
+			VI_LOCK(ap->a_vp);
+			lvp = null_lock_prep_with_interlock(ap);
+		}
+	} else {
+		lvp = null_lock_prep_with_interlock(ap);
+	}
+
+	ASSERT_VI_UNLOCKED(ap->a_vp, __func__);
+
+	if (__predict_false(lvp == NULL))
+		return (vop_stdlock(ap));
+
+	VNPASS(lvp->v_holdcnt > 0, lvp);
+	error = VOP_LOCK(lvp, ap->a_flags);
 	/*
-	 * If we're still active we must ask the lower layer to
-	 * lock as ffs has special lock considerations in its
-	 * vop lock.
+	 * We might have slept to get the lock and someone might have
+	 * clean our vnode already, switching vnode lock from one in
+	 * lowervp to v_lock in our own vnode structure.  Handle this
+	 * case by reacquiring correct lock in requested mode.
 	 */
-	if (nn != NULL && (lvp = NULLVPTOLOWERVP(vp)) != NULL) {
-		/*
-		 * We have to hold the vnode here to solve a potential
-		 * reclaim race.  If we're forcibly vgone'd while we
-		 * still have refs, a thread could be sleeping inside
-		 * the lowervp's vop_lock routine.  When we vgone we will
-		 * drop our last ref to the lowervp, which would allow it
-		 * to be reclaimed.  The lowervp could then be recycled,
-		 * in which case it is not legal to be sleeping in its VOP.
-		 * We prevent it from being recycled by holding the vnode
-		 * here.
-		 */
-		vholdnz(lvp);
-		VI_UNLOCK(vp);
-		error = VOP_LOCK(lvp, flags);
-
-		/*
-		 * We might have slept to get the lock and someone might have
-		 * clean our vnode already, switching vnode lock from one in
-		 * lowervp to v_lock in our own vnode structure.  Handle this
-		 * case by reacquiring correct lock in requested mode.
-		 */
-		if (VTONULL(vp) == NULL && error == 0) {
-			ap->a_flags &= ~LK_TYPE_MASK;
-			switch (flags & LK_TYPE_MASK) {
-			case LK_SHARED:
-				ap->a_flags |= LK_SHARED;
-				break;
-			case LK_UPGRADE:
-			case LK_EXCLUSIVE:
-				ap->a_flags |= LK_EXCLUSIVE;
-				break;
-			default:
-				panic("Unsupported lock request %d\n",
-				    ap->a_flags);
-			}
-			VOP_UNLOCK(lvp);
-			error = vop_stdlock(ap);
+	if (VTONULL(ap->a_vp) == NULL && error == 0) {
+		flags = ap->a_flags;
+		ap->a_flags &= ~LK_TYPE_MASK;
+		switch (flags & LK_TYPE_MASK) {
+		case LK_SHARED:
+			ap->a_flags |= LK_SHARED;
+			break;
+		case LK_UPGRADE:
+		case LK_EXCLUSIVE:
+			ap->a_flags |= LK_EXCLUSIVE;
+			break;
+		default:
+			panic("Unsupported lock request %d\n",
+			    flags);
 		}
-		vdrop(lvp);
-	} else {
-		VI_UNLOCK(vp);
+		VOP_UNLOCK(lvp);
 		error = vop_stdlock(ap);
 	}
-
+	vdrop(lvp);
 	return (error);
 }

Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202510031922.593JMLdx047401>

Header And Logo

Peripheral Links

Site Navigation

Header And Logo

Peripheral Links

Search

Site Navigation