Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 12 Mar 2026 16:11:55 +0000
From:      Alan Somers <asomers@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org
Subject:   git: 7e68af7ce2c1 - main - fusefs: redo vnode attribute locking
Message-ID:  <69b2e5cb.41328.db0b562@gitrepo.freebsd.org>

index | next in thread | raw e-mail

The branch main has been updated by asomers:

URL: https://cgit.FreeBSD.org/src/commit/?id=7e68af7ce2c1b892954df415774fe59fd2f1b62f

commit 7e68af7ce2c1b892954df415774fe59fd2f1b62f
Author:     Alan Somers <asomers@FreeBSD.org>
AuthorDate: 2026-01-23 21:23:51 +0000
Commit:     Alan Somers <asomers@FreeBSD.org>
CommitDate: 2026-03-12 16:11:25 +0000

    fusefs: redo vnode attribute locking
    
    Previously most fields in fuse_vnode_data were protected by the vnode
    lock.  But because DEBUG_VFS_LOCKS was never enabled by default until
    stable/15 the assertions were never checked, and many were wrong.
    Others were missing.  This led to panics in stable/15 and 16.0-CURRENT,
    when a vnode was expected to be exclusively locked but wasn't, for fuse
    file systems that mount with "-o async".
    
    In some places it isn't possible to exclusively lock the vnode when
    accessing these fields.  So protect them with a new mutex instead.  This
    fixes panics and unprotected field accesses in VOP_READ,
    VOP_COPY_FILE_RANGE, VOP_GETATTR, VOP_BMAP, and FUSE_NOTIFY_INVAL_ENTRY.
    Add assertions everywhere the protected fields are accessed.
    
    Lock the vnode exclusively when handling FUSE_NOTIFY_INVAL_INODE.
    
    During fuse_vnode_setsize, if the vnode isn't already exclusively
    locked, use the vn_delayed_setsize mechanism.  This fixes panics during
    VOP_READ or VOP_GETATTR.
    
    Also, ensure that fuse_vnop_rename locks the "from" vnode.
    
    Finally, reorder elements in struct fuse_vnode_data to reduce the
    structure size.
    
    Fixes:          283391
    Reported by:    kargl, markj, vishwin, Abdelkader Boudih, groenveld@acm.org
    MFC after:      2 weeks
    Sponsored by:   ConnectWise
    Reviewed by:    kib
    Differential Revision: https://reviews.freebsd.org/D55230
---
 sys/fs/fuse/fuse_file.c       |   8 +-
 sys/fs/fuse/fuse_internal.c   |  42 +++++----
 sys/fs/fuse/fuse_io.c         |  21 ++++-
 sys/fs/fuse/fuse_node.c       |  89 ++++++++++++++++----
 sys/fs/fuse/fuse_node.h       |  91 +++++++++++++++++---
 sys/fs/fuse/fuse_vfsops.c     |   2 +
 sys/fs/fuse/fuse_vnops.c      |  81 ++++++++++++++++--
 tests/sys/fs/fusefs/bmap.cc   |  34 +++++---
 tests/sys/fs/fusefs/notify.cc |  38 ++++++---
 tests/sys/fs/fusefs/read.cc   | 192 ++++++++++++++++++++++++++++++++++++++++++
 tests/sys/fs/fusefs/rename.cc |  90 ++++++++++++++++++++
 11 files changed, 609 insertions(+), 79 deletions(-)

diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c
index 5f5819c2ccae..2cb8ef84e511 100644
--- a/sys/fs/fuse/fuse_file.c
+++ b/sys/fs/fuse/fuse_file.c
@@ -194,6 +194,8 @@ fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh,
 	int err = 0;
 	int op = FUSE_RELEASE;
 
+	ASSERT_VOP_ELOCKED(vp, __func__);
+
 	if (fuse_isdeadfs(vp)) {
 		goto out;
 	}
@@ -381,7 +383,11 @@ fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type,
 	} else {
 		if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0)
 			fuse_io_invalbuf(vp, td);
-	        VTOFUD(vp)->flag &= ~FN_DIRECTIO;
+		/*
+		 * XXX Update the flag without the lock for now.  See
+		 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+		 */
+		VTOFUD(vp)->flag &= ~FN_DIRECTIO;
 	}
 
 }
diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c
index a3590060f44a..914deb416c08 100644
--- a/sys/fs/fuse/fuse_internal.c
+++ b/sys/fs/fuse/fuse_internal.c
@@ -262,7 +262,7 @@ fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
 	fvdat = VTOFUD(vp);
 	data = fuse_get_mpdata(mp);
 
-	ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
 
 	fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
 		&fvdat->attr_cache_timeout);
@@ -478,7 +478,9 @@ fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
 	cn.cn_namelen = fnieo.namelen;
 	err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
 	MPASS(err == 0);
+	CACHED_ATTR_LOCK(dvp);
 	fuse_vnode_clear_attr_cache(dvp);
+	CACHED_ATTR_UNLOCK(dvp);
 	vput(dvp);
 	return (0);
 }
@@ -498,8 +500,8 @@ fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
 	if (fniio.ino == FUSE_ROOT_ID)
 		err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
 	else
-		err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
-			&vp);
+		err = fuse_internal_get_cached_vnode(mp, fniio.ino,
+		    LK_EXCLUSIVE, &vp);
 	SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
 	if (err != 0 || vp == NULL)
 		return (err);
@@ -694,6 +696,8 @@ fuse_internal_remove(struct vnode *dvp,
 	nlink_t nlink;
 	int err = 0;
 
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 	fdisp_init(&fdi, cnp->cn_namelen + 1);
 	fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred);
 
@@ -891,15 +895,9 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct fuse_getattr_in *fgai;
 	struct fuse_attr_out *fao;
-	off_t old_filesize = fvdat->cached_attrs.va_size;
-	struct timespec old_atime = fvdat->cached_attrs.va_atime;
-	struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
-	struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
 	__enum_uint8(vtype) vtyp;
 	int err;
 
-	ASSERT_VOP_LOCKED(vp, __func__);
-
 	fdisp_init(&fdi, sizeof(*fgai));
 	fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
 	fgai = fdi.indata;
@@ -917,22 +915,27 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
 
 	fao = (struct fuse_attr_out *)fdi.answ;
 	vtyp = IFTOVT(fao->attr.mode);
+
+	CACHED_ATTR_LOCK(vp);
 	if (fvdat->flag & FN_SIZECHANGE)
-		fao->attr.size = old_filesize;
+		fao->attr.size = fvdat->cached_attrs.va_size;
 	if (fvdat->flag & FN_ATIMECHANGE) {
-		fao->attr.atime = old_atime.tv_sec;
-		fao->attr.atimensec = old_atime.tv_nsec;
+		fao->attr.atime = fvdat->cached_attrs.va_atime.tv_sec;
+		fao->attr.atimensec = fvdat->cached_attrs.va_atime.tv_nsec;
 	}
 	if (fvdat->flag & FN_CTIMECHANGE) {
-		fao->attr.ctime = old_ctime.tv_sec;
-		fao->attr.ctimensec = old_ctime.tv_nsec;
+		fao->attr.ctime = fvdat->cached_attrs.va_ctime.tv_sec;
+		fao->attr.ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
 	}
 	if (fvdat->flag & FN_MTIMECHANGE) {
-		fao->attr.mtime = old_mtime.tv_sec;
-		fao->attr.mtimensec = old_mtime.tv_nsec;
+		fao->attr.mtime = fvdat->cached_attrs.va_mtime.tv_sec;
+		fao->attr.mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
 	}
+
 	fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
 		fao->attr_valid_nsec, vap, true);
+
+	CACHED_ATTR_UNLOCK(vp);
 	if (vtyp != vnode_vtype(vp)) {
 		fuse_internal_vnode_disappear(vp);
 		err = ENOENT;
@@ -950,10 +953,13 @@ fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 {
 	struct vattr *attrs;
 
+	CACHED_ATTR_LOCK(vp);
 	if ((attrs = VTOVA(vp)) != NULL) {
 		*vap = *attrs;	/* struct copy */
+		CACHED_ATTR_UNLOCK(vp);
 		return 0;
-	}
+	} else
+		CACHED_ATTR_UNLOCK(vp);
 
 	return fuse_internal_do_getattr(vp, vap, cred, td);
 }
@@ -1140,7 +1146,7 @@ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
 	int err = 0;
 	__enum_uint8(vtype) vtyp;
 
-	ASSERT_VOP_ELOCKED(vp, __func__);
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
 
 	mp = vnode_mount(vp);
 	fvdat = VTOFUD(vp);
diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c
index 0760d7641c7d..9f864e48effc 100644
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@@ -401,6 +401,7 @@ retry:
 			fuse_warn(data, FSESS_WARN_WROTE_LONG,
 				"wrote more data than we provided it.");
 			/* This is bonkers.  Clear attr cache. */
+			ASSERT_CACHED_ATTRS_LOCKED(vp);
 			fvdat->flag &= ~FN_SIZECHANGE;
 			fuse_vnode_clear_attr_cache(vp);
 			err = EINVAL;
@@ -416,8 +417,10 @@ retry:
 			fuse_vnode_setsize(vp, as_written_offset, false);
 			getnanouptime(&fvdat->last_local_modify);
 		}
-		if (as_written_offset - diff >= filesize)
+		if (as_written_offset - diff >= filesize) {
+			ASSERT_CACHED_ATTRS_LOCKED(vp);
 			fvdat->flag &= ~FN_SIZECHANGE;
+		}
 
 		if (diff > 0) {
 			/* Short write */
@@ -454,8 +457,11 @@ retry:
 
 	fdisp_destroy(&fdi);
 
-	if (wrote_anything)
+	if (wrote_anything) {
+		CACHED_ATTR_LOCK(vp);
 		fuse_vnode_undirty_cached_timestamps(vp, false);
+		CACHED_ATTR_UNLOCK(vp);
+	}
 
 	vn_rlimit_fsizex_res(uio, r);
 	return (err);
@@ -556,6 +562,7 @@ again:
 			err = fuse_vnode_setsize(vp, uio->uio_offset + n, false);
 			filesize = uio->uio_offset + n;
 			getnanouptime(&fvdat->last_local_modify);
+			ASSERT_CACHED_ATTRS_LOCKED(vp);
 			fvdat->flag |= FN_SIZECHANGE;
 			if (err) {
 				brelse(bp);
@@ -806,6 +813,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
 			left = uiop->uio_resid;
 			bzero((char *)bp->b_data + nread, left);
 
+			CACHED_ATTR_LOCK(vp);
 			if ((fvdat->flag & FN_SIZECHANGE) == 0) {
 				/*
 				 * A short read with no error, when not using
@@ -838,6 +846,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
 					"Short read of a dirty file");
 				uiop->uio_resid = 0;
 			}
+			CACHED_ATTR_UNLOCK(vp);
 		}
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
@@ -855,10 +864,18 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
 		 * anything about it.  In particular, we can't invalidate any
 		 * part of the file's buffers because VOP_STRATEGY is called
 		 * with them already locked.
+		 *
+		 * Normally the vnode should be exclusively locked at this
+		 * point.  However, if clustered reads are in use, then in a
+		 * mixed read-write workload getblkx may need to flush a
+		 * partially written buffer to disk during a read.  In such a
+		 * case, the vnode may only have a shared lock at this point.
 		 */
+		CACHED_ATTR_LOCK(vp);
 		filesize = fvdat->cached_attrs.va_size;
 		/* filesize must've been cached by fuse_vnop_open.  */
 		KASSERT(filesize != VNOVAL, ("filesize should've been cached"));
+		CACHED_ATTR_UNLOCK(vp);
 
 		if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize)
 			bp->b_dirtyend = filesize - 
diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c
index 742dc66bcafc..f4fb993a7ca1 100644
--- a/sys/fs/fuse/fuse_node.c
+++ b/sys/fs/fuse/fuse_node.c
@@ -157,6 +157,8 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat,
 	fvdat->nid = nodeid;
 	LIST_INIT(&fvdat->handles);
 
+	mtx_init(&fvdat->cached_attr_mtx, "fuse attr cache mutex", NULL,
+	    MTX_DEF);
 	vattr_null(&fvdat->cached_attrs);
 	fvdat->cached_attrs.va_birthtime.tv_sec = -1;
 	fvdat->cached_attrs.va_birthtime.tv_nsec = 0;
@@ -181,6 +183,7 @@ fuse_vnode_destroy(struct vnode *vp)
 	struct fuse_vnode_data *fvdat = vp->v_data;
 
 	vp->v_data = NULL;
+	mtx_destroy(&fvdat->cached_attr_mtx);
 	KASSERT(LIST_EMPTY(&fvdat->handles),
 		("Destroying fuse vnode with open files!"));
 	free(fvdat, M_FUSEVN);
@@ -386,7 +389,8 @@ fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid)
 	struct fuse_setattr_in *fsai;
 	int err = 0;
 
-	ASSERT_VOP_ELOCKED(vp, "fuse_io_extend");
+	ASSERT_VOP_ELOCKED(vp, __func__); /* For flag and last_local_modify */
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
 
 	if (fuse_isdeadfs(vp)) {
 		return EBADF;
@@ -439,10 +443,10 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server)
 	struct vattr *attrs;
 	off_t oldsize;
 	size_t iosize;
-	struct buf *bp = NULL;
 	int err = 0;
 
-	ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize");
+	ASSERT_VOP_LOCKED(vp, __func__);
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
 
 	iosize = fuse_iosize(vp);
 	oldsize = fvdat->cached_attrs.va_size;
@@ -450,7 +454,45 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server)
 	if ((attrs = VTOVA(vp)) != NULL)
 		attrs->va_size = newsize;
 
-	if (newsize < oldsize) {
+	if (from_server && newsize > oldsize && oldsize != VNOVAL) {
+		/*
+		 * The FUSE server changed the file size behind our back.  We
+		 * should invalidate the entire cache.
+		 */
+		daddr_t end_lbn;
+
+		end_lbn = howmany(newsize, iosize);
+		v_inval_buf_range(vp, 0, end_lbn, iosize);
+	}
+
+	if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) {
+		err = fuse_vnode_setsize_immediate(vp, newsize < oldsize);
+	} else {
+		/* Without an exclusive vnode lock, we must defer the operation */
+		fvdat->flag |= FN_DELAYED_TRUNCATE;
+		vn_delayed_setsize(vp);
+	}
+
+	return err;
+}
+
+/* Immediately set the vnode's size in the pager */
+int
+fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink)
+{
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct buf *bp = NULL;
+	size_t iosize;
+	off_t newsize;
+	int err = 0;
+
+	MPASS(fvdat);
+	ASSERT_VOP_ELOCKED(vp, __func__);
+
+	iosize = fuse_iosize(vp);
+	newsize = fvdat->cached_attrs.va_size;
+
+	if (shrink) {
 		daddr_t lbn;
 
 		err = vtruncbuf(vp, newsize, fuse_iosize(vp));
@@ -474,21 +516,13 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server)
 		MPASS(bp->b_flags & B_VMIO);
 		vfs_bio_clrbuf(bp);
 		bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize);
-	} else if (from_server && newsize > oldsize && oldsize != VNOVAL) {
-		/*
-		 * The FUSE server changed the file size behind our back.  We
-		 * should invalidate the entire cache.
-		 */
-		daddr_t end_lbn;
-
-		end_lbn = howmany(newsize, iosize);
-		v_inval_buf_range(vp, 0, end_lbn, iosize);
 	}
 out:
 	if (bp)
 		brelse(bp);
 	vnode_pager_setsize(vp, newsize);
-	return err;
+
+	return (err);
 }
 
 /* Get the current, possibly dirty, size of the file */
@@ -497,15 +531,28 @@ fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred,
 	struct thread *td)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
+	struct vattr va;
 	int error = 0;
 
+	ASSERT_VOP_LOCKED(vp, __func__);
+
+	CACHED_ATTR_LOCK(vp);
 	if (!(fvdat->flag & FN_SIZECHANGE) &&
 		(!fuse_vnode_attr_cache_valid(vp) ||
-		  fvdat->cached_attrs.va_size == VNOVAL)) 
-		error = fuse_internal_do_getattr(vp, NULL, cred, td);
-
-	if (!error)
+		  fvdat->cached_attrs.va_size == VNOVAL)) {
+		CACHED_ATTR_UNLOCK(vp);
+		/*
+		 * It incurs a large struct copy, but we supply &va so we don't
+		 * have to acquire the lock a second time after
+		 * fuse_internal_do_getattr returns.
+		 */
+		error = fuse_internal_do_getattr(vp, &va, cred, td);
+		if (!error)
+			*filesize = va.va_size;
+	} else {
 		*filesize = fvdat->cached_attrs.va_size;
+		CACHED_ATTR_UNLOCK(vp);
+	}
 
 	return error;
 }
@@ -515,6 +562,8 @@ fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime)
 {
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 	fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE);
 	if (atime)
 		fvdat->flag &= ~FN_ATIMECHANGE;
@@ -537,6 +586,8 @@ fuse_vnode_update(struct vnode *vp, int flags)
 	if (mp->mnt_flag & MNT_NOATIME)
 		flags &= ~FN_ATIMECHANGE;
 
+	CACHED_ATTR_LOCK(vp);
+
 	if (flags & FN_ATIMECHANGE)
 		fvdat->cached_attrs.va_atime = ts;
 	if (flags & FN_MTIMECHANGE)
@@ -545,6 +596,8 @@ fuse_vnode_update(struct vnode *vp, int flags)
 		fvdat->cached_attrs.va_ctime = ts;
 
 	fvdat->flag |= flags;
+
+	CACHED_ATTR_UNLOCK(vp);
 }
 
 void
diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h
index 97774de9eeb5..b6e388d01702 100644
--- a/sys/fs/fuse/fuse_node.h
+++ b/sys/fs/fuse/fuse_node.h
@@ -79,6 +79,11 @@
  * cache_attrs.va_size field does not time out.
  */
 #define	FN_SIZECHANGE		0x00000100
+/*
+ * Whether I/O to this vnode should bypass the cache.
+ * XXX BUG: this should be part of the file handle, not the vnode data.
+ * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+ */
 #define	FN_DIRECTIO		0x00000200
 /* Indicates that parent_nid is valid */
 #define	FN_PARENT_NID		0x00000400
@@ -92,38 +97,81 @@
 #define	FN_CTIMECHANGE		0x00001000
 #define	FN_ATIMECHANGE		0x00002000
 
+/* vop_delayed_setsize should truncate the file */
+#define FN_DELAYED_TRUNCATE	0x00004000
+
+#define CACHED_ATTR_LOCK(vp)				\
+do {							\
+	ASSERT_VOP_LOCKED(vp, __func__);		\
+	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)		\
+		mtx_lock(&VTOFUD(vp)->cached_attr_mtx);	\
+} while(0)
+
+#define CACHED_ATTR_UNLOCK(vp)					\
+do {								\
+	ASSERT_VOP_LOCKED(vp, __func__);			\
+	if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE)			\
+		mtx_unlock(&VTOFUD(vp)->cached_attr_mtx);	\
+} while(0)
+
 struct fuse_vnode_data {
-	/** self **/
+	/* self's node id, similar to an inode number. Immutable. */
 	uint64_t	nid;
+	/*
+	 * Generation number.  Distinguishes files with same nid but that don't
+	 * overlap in time.  Immutable.
+	 */
 	uint64_t	generation;
 
-	/** parent **/
+	/* parent's node id.  Protected by the vnode lock. */
 	uint64_t	parent_nid;
 
 	/** I/O **/
-	/* List of file handles for all of the vnode's open file descriptors */
+	/*
+	 * List of file handles for all of the vnode's open file descriptors.
+	 * Protected by the vnode lock.
+	 */
 	LIST_HEAD(, fuse_filehandle)	handles;
 
-	/** flags **/
-	uint32_t	flag;
+	/* Protects flag, attr_cache_timeout and cached_attrs */
+	struct mtx	cached_attr_mtx;
 
-	/** meta **/
-	/* The monotonic time after which the attr cache is invalid */
+	/*
+	 * The monotonic time after which the attr cache is invalid
+	 * Protected by an exclusive vnode lock or the cached_attr_mtx
+	 */
 	struct bintime	attr_cache_timeout;
-	/* 
+
+	/*
 	 * Monotonic time after which the entry is invalid.  Used for lookups
-	 * by nodeid instead of pathname.
+	 * by nodeid instead of pathname.  Protected by the vnode lock.
 	 */
 	struct bintime	entry_cache_timeout;
+
 	/*
 	 * Monotonic time of the last FUSE operation that modified the file
 	 * size.  Used to avoid races between mutator ops like VOP_SETATTR and
-	 * unlocked accessor ops like VOP_LOOKUP.
+	 * unlocked accessor ops like VOP_LOOKUP.  Protected by the vnode lock.
 	 */
 	struct timespec	last_local_modify;
+
+	/* Protected by an exclusive vnode lock or the cached_attr_mtx */
 	struct vattr	cached_attrs;
+
+	/* Number of FUSE_LOOKUPs minus FUSE_FORGETs. Protected by vnode lock */
 	uint64_t	nlookup;
+
+	/*
+	 * Misc flags.  Protected by an exclusive vnode lock or the
+	 * cached_attr_mtx, because some of the flags reflect the contents of
+	 * cached_attrs.
+	 */
+	uint32_t	flag;
+
+	/* Vnode type.  Immutable */
 	__enum_uint8(vtype)	vtype;
+
+	/* State for clustered writes.  Protected by vnode lock */
 	struct vn_clusterw clusterw;
 };
 
@@ -141,18 +189,32 @@ struct fuse_fid {
 #define VTOFUD(vp) \
 	((struct fuse_vnode_data *)((vp)->v_data))
 #define VTOI(vp)    (VTOFUD(vp)->nid)
+
+#define ASSERT_CACHED_ATTRS_LOCKED(vp)				\
+do {								\
+	ASSERT_VOP_LOCKED(vp, __func__);			\
+	VNASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE ||		\
+		mtx_owned(&VTOFUD(vp)->cached_attr_mtx), vp,	\
+		("cached attrs not locked"));			\
+} while(0)
+
 static inline bool
 fuse_vnode_attr_cache_valid(struct vnode *vp)
 {
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct bintime now;
 
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 	getbinuptime(&now);
-	return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >));
+	return (bintime_cmp(&fvdat->attr_cache_timeout, &now, >));
 }
 
 static inline struct vattr*
 VTOVA(struct vnode *vp)
 {
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 	if (fuse_vnode_attr_cache_valid(vp))
 		return &(VTOFUD(vp)->cached_attrs);
 	else
@@ -162,6 +224,8 @@ VTOVA(struct vnode *vp)
 static inline void
 fuse_vnode_clear_attr_cache(struct vnode *vp)
 {
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 	bintime_clear(&VTOFUD(vp)->attr_cache_timeout);
 }
 
@@ -184,10 +248,14 @@ static inline void
 fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp)
 {
 	if (dvp != NULL && vp->v_type == VDIR) {
+		ASSERT_VOP_ELOCKED(vp, __func__); /* for parent_nid */
+
 		MPASS(dvp->v_type == VDIR);
 		VTOFUD(vp)->parent_nid = VTOI(dvp);
 		VTOFUD(vp)->flag |= FN_PARENT_NID;
 	} else {
+		ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 		VTOFUD(vp)->flag &= ~FN_PARENT_NID;
 	}
 }
@@ -207,6 +275,7 @@ void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags,
 int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid);
 
 int fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server);
+int fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink);
 
 void fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime);
 
diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c
index a5118aa7675f..7b6ad86e4b2b 100644
--- a/sys/fs/fuse/fuse_vfsops.c
+++ b/sys/fs/fuse/fuse_vfsops.c
@@ -601,6 +601,8 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
 	error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp);
 	if (error)
 		goto out;
+	/* for last_local_modify and fuse_internal_cache_attrs */
+	ASSERT_VOP_ELOCKED(*vpp, __func__);
 	fvdat = VTOFUD(*vpp);
 
 	if (timespeccmp(&now, &fvdat->last_local_modify, >)) {
diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c
index c66d3bcf01e0..ad7a2dcf84ef 100644
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@@ -134,6 +134,7 @@ static vop_close_t fuse_vnop_close;
 static vop_copy_file_range_t fuse_vnop_copy_file_range;
 static vop_create_t fuse_vnop_create;
 static vop_deallocate_t fuse_vnop_deallocate;
+static vop_delayed_setsize_t fuse_vnop_delayed_setsize;
 static vop_deleteextattr_t fuse_vnop_deleteextattr;
 static vop_fdatasync_t fuse_vnop_fdatasync;
 static vop_fsync_t fuse_vnop_fsync;
@@ -191,6 +192,7 @@ struct vop_vector fuse_vnops = {
 	.vop_copy_file_range = fuse_vnop_copy_file_range,
 	.vop_create = fuse_vnop_create,
 	.vop_deallocate = fuse_vnop_deallocate,
+	.vop_delayed_setsize = fuse_vnop_delayed_setsize,
 	.vop_deleteextattr = fuse_vnop_deleteextattr,
 	.vop_fsync = fuse_vnop_fsync,
 	.vop_fdatasync = fuse_vnop_fdatasync,
@@ -707,6 +709,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap)
 		return (EXTERROR(EOPNOTSUPP, "This server does not implement "
 		    "FUSE_FALLOCATE"));
 
+	ASSERT_CACHED_ATTRS_LOCKED(vp);
+
 	io.uio_offset = *offset;
 	io.uio_resid = *len;
 	err = vn_rlimit_fsize(vp, &io, curthread);
@@ -779,7 +783,7 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
 	struct fuse_data *data;
 	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	uint64_t biosize;
-	off_t fsize;
+	off_t fsize = VNOVAL;
 	daddr_t lbn = ap->a_bn;
 	daddr_t *pbn = ap->a_bnp;
 	int *runp = ap->a_runp;
@@ -822,9 +826,10 @@ fuse_vnop_bmap(struct vop_bmap_args *ap)
 		 * and the risk of getting it wrong is not worth the cost of
 		 * another upcall.
 		 */
-		if (fvdat->cached_attrs.va_size != VNOVAL)
-			fsize = fvdat->cached_attrs.va_size;
-		else
+		CACHED_ATTR_LOCK(vp);
+		fsize = fvdat->cached_attrs.va_size;
+		CACHED_ATTR_UNLOCK(vp);
+		if (fsize == VNOVAL)
 			error = fuse_vnode_size(vp, &fsize, td->td_ucred, td);
 		if (error == 0)
 			*runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1),
@@ -894,6 +899,7 @@ fuse_vnop_close(struct vop_close_args *ap)
 		cred = td->td_ucred;
 
 	err = fuse_flush(vp, cred, pid, fflag);
+	ASSERT_CACHED_ATTRS_LOCKED(vp);	/* For fvdat->flag */
 	if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) {
 		struct vattr vap;
 		struct fuse_data *data;
@@ -911,6 +917,7 @@ fuse_vnop_close(struct vop_close_args *ap)
 		}
 		if (access_e == 0) {
 			VATTR_NULL(&vap);
+			ASSERT_CACHED_ATTRS_LOCKED(vp);
 			vap.va_atime = fvdat->cached_attrs.va_atime;
 			/*
 			 * Ignore errors setting when setting atime.  That
@@ -1035,6 +1042,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap)
 		*ap->a_inoffp += fwo->size;
 		*ap->a_outoffp += fwo->size;
 		fuse_internal_clear_suid_on_write(outvp, outcred, td);
+		ASSERT_CACHED_ATTRS_LOCKED(outvp);
 		if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) {
 			fuse_vnode_setsize(outvp, *ap->a_outoffp, false);
 			getnanouptime(&outfvdat->last_local_modify);
@@ -1337,6 +1345,7 @@ fuse_vnop_inactive(struct vop_inactive_args *ap)
 
 	int need_flush = 1;
 
+	ASSERT_CACHED_ATTRS_LOCKED(vp);	/* For fvdat->flag */
 	LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) {
 		if (need_flush && vp->v_type == VREG) {
 			if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) {
@@ -1557,6 +1566,7 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
 	else if ((err = fuse_internal_access(dvp, VEXEC, td, cred)))
 		return err;
 
+	ASSERT_CACHED_ATTRS_LOCKED(dvp);	/* For flag */
 	is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.';
 	if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) {
 		if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) {
@@ -1960,6 +1970,10 @@ fuse_vnop_read(struct vop_read_args *ap)
 		    "to be closed"));
 	}
 
+	/*
+	 * XXX Check this flag without the lock.  See
+	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+	 */
 	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
 		ioflag |= IO_DIRECT;
 	}
@@ -2220,6 +2234,8 @@ fuse_vnop_remove(struct vop_remove_args *ap)
 	return err;
 }
 
+SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*",
+	"struct vnode*", "struct vnode*", "struct vnode*");
 /*
     struct vnop_rename_args {
 	struct vnode *a_fdvp;
@@ -2242,6 +2258,7 @@ fuse_vnop_rename(struct vop_rename_args *ap)
 	struct fuse_data *data;
 	bool newparent = fdvp != tdvp;
 	bool isdir = fvp->v_type == VDIR;
+	int locktype;
 	int err = 0;
 
 	if (fuse_isdeadfs(fdvp)) {
@@ -2270,11 +2287,32 @@ fuse_vnop_rename(struct vop_rename_args *ap)
 	 * have write permission to it, so ".." can be modified.
 	 */
 	data = fuse_get_mpdata(vnode_mount(tdvp));
+
+	if (tdvp != fdvp)
+		locktype = LK_EXCLUSIVE; /* for fuse_vnode_setparent */
+	else
+		locktype = LK_SHARED;
+
+	/*
+	 * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or
+	 * tvp
+	 */
+	if (vn_lock(fvp, locktype | LK_NOWAIT) != 0) {
+		/*
+		 * Can't release tdvp or tvp to try avoiding the LOR.
+		 * Must return instead.
+		 */
+		SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp,
+			tvp);
+		err = ERELOOKUP;
+		goto out;
+	}
+
 	if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) {
 		err = fuse_internal_access(fvp, VWRITE,
 			curthread, tcnp->cn_cred);
 		if (err)
-			goto out;
+			goto unlock;
 	}
 	err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp);
 	if (err == 0) {
@@ -2293,6 +2331,8 @@ fuse_vnop_rename(struct vop_rename_args *ap)
 		}
 		cache_purge(fdvp);
 	}
+unlock:
+	VOP_UNLOCK(fvp);
 out:
 	if (tdvp == tvp) {
 		vrele(tdvp);
@@ -2568,6 +2608,7 @@ static int
 fuse_vnop_write(struct vop_write_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
+	struct fuse_vnode_data *fvdat = VTOFUD(vp);
 	struct uio *uio = ap->a_uio;
 	int ioflag = ap->a_ioflag;
 	struct ucred *cred = ap->a_cred;
@@ -2583,9 +2624,12 @@ fuse_vnop_write(struct vop_write_args *ap)
 		    "to be closed"));
 	}
 
-	if (VTOFUD(vp)->flag & FN_DIRECTIO) {
+	/*
+	 * XXX Check this flag without the lock.  See
+	 * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088
+	 */
+	if (fvdat->flag & FN_DIRECTIO)
 		ioflag |= IO_DIRECT;
-	}
 
 	err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
 	if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) {
@@ -3219,6 +3263,29 @@ fallback:
 	return (vop_stddeallocate(ap));
 }
 
+/*
+   struct vop_delayed_setsize_args {
+	struct vop_generic_args a_gen;
+	struct vnode *a_vp;
+  };
+ */
+static int
+fuse_vnop_delayed_setsize(struct vop_delayed_setsize_args *ap)
+{
+	struct vnode *vp = ap->a_vp;
+	struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp);
+	bool shrink = (fvdat->flag & FN_DELAYED_TRUNCATE) != 0;
+	int err;
+
+	if (!fvdat)
+		return (0);
+
+	err = fuse_vnode_setsize_immediate(vp, shrink);
+	fvdat->flag &= ~FN_DELAYED_TRUNCATE;
+
+	return (err);
+}
+
 /*
     struct vop_deleteextattr_args {
 	struct vop_generic_args a_gen;
diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc
index e61dadb6d79e..622a3c3debcc 100644
--- a/tests/sys/fs/fusefs/bmap.cc
+++ b/tests/sys/fs/fusefs/bmap.cc
@@ -44,10 +44,13 @@ using namespace testing;
 const static char FULLPATH[] = "mountpoint/foo";
 const static char RELPATH[] = "foo";
 
-class Bmap: public FuseTest {
+class Bmap: public FuseTest,
+	    public WithParamInterface<tuple<int, int>>
+{
 public:
 virtual void SetUp() {
 	m_maxreadahead = UINT32_MAX;
+	m_init_flags |= get<0>(GetParam());
 	FuseTest::SetUp();
 }
 void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn)
@@ -73,12 +76,12 @@ void expect_lookup(const char *relpath, uint64_t ino, off_t size)
 }
 };
 
-class BmapEof: public Bmap, public WithParamInterface<int> {};
+class BmapEof: public Bmap {};
 
 /*
  * Test FUSE_BMAP
  */
-TEST_F(Bmap, bmap)
+TEST_P(Bmap, bmap)
 {
 	struct fiobmap2_arg arg;
 	/*
@@ -124,7 +127,7 @@ TEST_F(Bmap, bmap)
  * If the daemon does not implement VOP_BMAP, fusefs should return sensible
  * defaults.
  */
-TEST_F(Bmap, default_)
+TEST_P(Bmap, default_)
 {
 	struct fiobmap2_arg arg;
 	const off_t filesize = 1 << 30;
@@ -181,7 +184,7 @@ TEST_F(Bmap, default_)
  * The server returns an error for some reason for FUSE_BMAP.  fusefs should
  * faithfully report that error up to the caller.
  */
-TEST_F(Bmap, einval)
+TEST_P(Bmap, einval)
 {
 	struct fiobmap2_arg arg;
 	const off_t filesize = 1 << 30;
@@ -217,7 +220,7 @@ TEST_F(Bmap, einval)
  * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 .  The bug did not
  * lie in fusefs, but this is a convenient place for a regression test.
  */
-TEST_F(Bmap, spurious_einval)
+TEST_P(Bmap, spurious_einval)
 {
 	const off_t filesize = 4ull << 30;
 	const ino_t ino = 42;
@@ -288,7 +291,7 @@ TEST_P(BmapEof, eof)
 	int fd;
 	int ngetattrs;
 
-	ngetattrs = GetParam();
+	ngetattrs = get<1>(GetParam());
 	FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0);
 	expect_open(ino, 0, 1);
 	// Depending on ngetattrs, FUSE_READ could be called with either
@@ -348,6 +351,17 @@ TEST_P(BmapEof, eof)
 	leak(fd);
 }
 
-INSTANTIATE_TEST_SUITE_P(BE, BmapEof,
-	Values(1, 2, 3)
-);
+/*
+ * Try with and without async reads, because it affects the type of vnode lock
+ * on entry to fuse_vnop_bmap.
+ */
+INSTANTIATE_TEST_SUITE_P(B, Bmap, Values(
+	tuple(0, 0),
+	tuple(FUSE_ASYNC_READ, 0)
+));
+
+INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values(
+	tuple(0, 1),
+	tuple(0, 2),
+	tuple(0, 3)
+));
diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc
index d370a1e6e706..69742fb2a54b 100644
--- a/tests/sys/fs/fusefs/notify.cc
+++ b/tests/sys/fs/fusefs/notify.cc
@@ -47,8 +47,15 @@ using namespace testing;
  * invalidation.  This file tests our client's handling of those messages.
  */
 
-class Notify: public FuseTest {
+class Notify: public FuseTest,
+	      public WithParamInterface<int>
+{
 public:
+virtual void SetUp() {
+	m_init_flags |= GetParam();
+	FuseTest::SetUp();
+}
+
 /* Ignore an optional FUSE_FSYNC */
 void maybe_expect_fsync(uint64_t ino)
 {
@@ -154,7 +161,7 @@ static void* store(void* arg) {
 }
 
 /* Invalidate a nonexistent entry */
-TEST_F(Notify, inval_entry_nonexistent)
+TEST_P(Notify, inval_entry_nonexistent)
 {
 	const static char *name = "foo";
 	struct inval_entry_args iea;
@@ -173,7 +180,7 @@ TEST_F(Notify, inval_entry_nonexistent)
 }
 
 /* Invalidate a cached entry */
-TEST_F(Notify, inval_entry)
+TEST_P(Notify, inval_entry)
 {
 	const static char FULLPATH[] = "mountpoint/foo";
*** 404 LINES SKIPPED ***


home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69b2e5cb.41328.db0b562>