Date: Thu, 12 Mar 2026 16:11:55 +0000 From: Alan Somers <asomers@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 7e68af7ce2c1 - main - fusefs: redo vnode attribute locking Message-ID: <69b2e5cb.41328.db0b562@gitrepo.freebsd.org>
index | next in thread | raw e-mail
The branch main has been updated by asomers: URL: https://cgit.FreeBSD.org/src/commit/?id=7e68af7ce2c1b892954df415774fe59fd2f1b62f commit 7e68af7ce2c1b892954df415774fe59fd2f1b62f Author: Alan Somers <asomers@FreeBSD.org> AuthorDate: 2026-01-23 21:23:51 +0000 Commit: Alan Somers <asomers@FreeBSD.org> CommitDate: 2026-03-12 16:11:25 +0000 fusefs: redo vnode attribute locking Previously most fields in fuse_vnode_data were protected by the vnode lock. But because DEBUG_VFS_LOCKS was never enabled by default until stable/15 the assertions were never checked, and many were wrong. Others were missing. This led to panics in stable/15 and 16.0-CURRENT, when a vnode was expected to be exclusively locked but wasn't, for fuse file systems that mount with "-o async". In some places it isn't possible to exclusively lock the vnode when accessing these fields. So protect them with a new mutex instead. This fixes panics and unprotected field accesses in VOP_READ, VOP_COPY_FILE_RANGE, VOP_GETATTR, VOP_BMAP, and FUSE_NOTIFY_INVAL_ENTRY. Add assertions everywhere the protected fields are accessed. Lock the vnode exclusively when handling FUSE_NOTIFY_INVAL_INODE. During fuse_vnode_setsize, if the vnode isn't already exclusively locked, use the vn_delayed_setsize mechanism. This fixes panics during VOP_READ or VOP_GETATTR. Also, ensure that fuse_vnop_rename locks the "from" vnode. Finally, reorder elements in struct fuse_vnode_data to reduce the structure size. Fixes: 283391 Reported by: kargl, markj, vishwin, Abdelkader Boudih, groenveld@acm.org MFC after: 2 weeks Sponsored by: ConnectWise Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D55230 --- sys/fs/fuse/fuse_file.c | 8 +- sys/fs/fuse/fuse_internal.c | 42 +++++---- sys/fs/fuse/fuse_io.c | 21 ++++- sys/fs/fuse/fuse_node.c | 89 ++++++++++++++++---- sys/fs/fuse/fuse_node.h | 91 +++++++++++++++++--- sys/fs/fuse/fuse_vfsops.c | 2 + sys/fs/fuse/fuse_vnops.c | 81 ++++++++++++++++-- tests/sys/fs/fusefs/bmap.cc | 34 +++++--- tests/sys/fs/fusefs/notify.cc | 38 ++++++--- tests/sys/fs/fusefs/read.cc | 192 ++++++++++++++++++++++++++++++++++++++++++ tests/sys/fs/fusefs/rename.cc | 90 ++++++++++++++++++++ 11 files changed, 609 insertions(+), 79 deletions(-) diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c index 5f5819c2ccae..2cb8ef84e511 100644 --- a/sys/fs/fuse/fuse_file.c +++ b/sys/fs/fuse/fuse_file.c @@ -194,6 +194,8 @@ fuse_filehandle_close(struct vnode *vp, struct fuse_filehandle *fufh, int err = 0; int op = FUSE_RELEASE; + ASSERT_VOP_ELOCKED(vp, __func__); + if (fuse_isdeadfs(vp)) { goto out; } @@ -381,7 +383,11 @@ fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type, } else { if ((foo->open_flags & FOPEN_KEEP_CACHE) == 0) fuse_io_invalbuf(vp, td); - VTOFUD(vp)->flag &= ~FN_DIRECTIO; + /* + * XXX Update the flag without the lock for now. See + * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088 + */ + VTOFUD(vp)->flag &= ~FN_DIRECTIO; } } diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index a3590060f44a..914deb416c08 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -262,7 +262,7 @@ fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr, fvdat = VTOFUD(vp); data = fuse_get_mpdata(mp); - ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs"); + ASSERT_CACHED_ATTRS_LOCKED(vp); fuse_validity_2_bintime(attr_valid, attr_valid_nsec, &fvdat->attr_cache_timeout); @@ -478,7 +478,9 @@ fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) cn.cn_namelen = fnieo.namelen; err = cache_lookup(dvp, &vp, &cn, NULL, NULL); MPASS(err == 0); + CACHED_ATTR_LOCK(dvp); fuse_vnode_clear_attr_cache(dvp); + CACHED_ATTR_UNLOCK(dvp); vput(dvp); return (0); } @@ -498,8 +500,8 @@ fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio) if (fniio.ino == FUSE_ROOT_ID) err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp); else - err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED, - &vp); + err = fuse_internal_get_cached_vnode(mp, fniio.ino, + LK_EXCLUSIVE, &vp); SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio); if (err != 0 || vp == NULL) return (err); @@ -694,6 +696,8 @@ fuse_internal_remove(struct vnode *dvp, nlink_t nlink; int err = 0; + ASSERT_CACHED_ATTRS_LOCKED(vp); + fdisp_init(&fdi, cnp->cn_namelen + 1); fdisp_make_vp(&fdi, op, dvp, curthread, cnp->cn_cred); @@ -891,15 +895,9 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, struct fuse_vnode_data *fvdat = VTOFUD(vp); struct fuse_getattr_in *fgai; struct fuse_attr_out *fao; - off_t old_filesize = fvdat->cached_attrs.va_size; - struct timespec old_atime = fvdat->cached_attrs.va_atime; - struct timespec old_ctime = fvdat->cached_attrs.va_ctime; - struct timespec old_mtime = fvdat->cached_attrs.va_mtime; __enum_uint8(vtype) vtyp; int err; - ASSERT_VOP_LOCKED(vp, __func__); - fdisp_init(&fdi, sizeof(*fgai)); fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred); fgai = fdi.indata; @@ -917,22 +915,27 @@ fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, fao = (struct fuse_attr_out *)fdi.answ; vtyp = IFTOVT(fao->attr.mode); + + CACHED_ATTR_LOCK(vp); if (fvdat->flag & FN_SIZECHANGE) - fao->attr.size = old_filesize; + fao->attr.size = fvdat->cached_attrs.va_size; if (fvdat->flag & FN_ATIMECHANGE) { - fao->attr.atime = old_atime.tv_sec; - fao->attr.atimensec = old_atime.tv_nsec; + fao->attr.atime = fvdat->cached_attrs.va_atime.tv_sec; + fao->attr.atimensec = fvdat->cached_attrs.va_atime.tv_nsec; } if (fvdat->flag & FN_CTIMECHANGE) { - fao->attr.ctime = old_ctime.tv_sec; - fao->attr.ctimensec = old_ctime.tv_nsec; + fao->attr.ctime = fvdat->cached_attrs.va_ctime.tv_sec; + fao->attr.ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec; } if (fvdat->flag & FN_MTIMECHANGE) { - fao->attr.mtime = old_mtime.tv_sec; - fao->attr.mtimensec = old_mtime.tv_nsec; + fao->attr.mtime = fvdat->cached_attrs.va_mtime.tv_sec; + fao->attr.mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec; } + fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid, fao->attr_valid_nsec, vap, true); + + CACHED_ATTR_UNLOCK(vp); if (vtyp != vnode_vtype(vp)) { fuse_internal_vnode_disappear(vp); err = ENOENT; @@ -950,10 +953,13 @@ fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, { struct vattr *attrs; + CACHED_ATTR_LOCK(vp); if ((attrs = VTOVA(vp)) != NULL) { *vap = *attrs; /* struct copy */ + CACHED_ATTR_UNLOCK(vp); return 0; - } + } else + CACHED_ATTR_UNLOCK(vp); return fuse_internal_do_getattr(vp, vap, cred, td); } @@ -1140,7 +1146,7 @@ int fuse_internal_setattr(struct vnode *vp, struct vattr *vap, int err = 0; __enum_uint8(vtype) vtyp; - ASSERT_VOP_ELOCKED(vp, __func__); + ASSERT_CACHED_ATTRS_LOCKED(vp); mp = vnode_mount(vp); fvdat = VTOFUD(vp); diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index 0760d7641c7d..9f864e48effc 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -401,6 +401,7 @@ retry: fuse_warn(data, FSESS_WARN_WROTE_LONG, "wrote more data than we provided it."); /* This is bonkers. Clear attr cache. */ + ASSERT_CACHED_ATTRS_LOCKED(vp); fvdat->flag &= ~FN_SIZECHANGE; fuse_vnode_clear_attr_cache(vp); err = EINVAL; @@ -416,8 +417,10 @@ retry: fuse_vnode_setsize(vp, as_written_offset, false); getnanouptime(&fvdat->last_local_modify); } - if (as_written_offset - diff >= filesize) + if (as_written_offset - diff >= filesize) { + ASSERT_CACHED_ATTRS_LOCKED(vp); fvdat->flag &= ~FN_SIZECHANGE; + } if (diff > 0) { /* Short write */ @@ -454,8 +457,11 @@ retry: fdisp_destroy(&fdi); - if (wrote_anything) + if (wrote_anything) { + CACHED_ATTR_LOCK(vp); fuse_vnode_undirty_cached_timestamps(vp, false); + CACHED_ATTR_UNLOCK(vp); + } vn_rlimit_fsizex_res(uio, r); return (err); @@ -556,6 +562,7 @@ again: err = fuse_vnode_setsize(vp, uio->uio_offset + n, false); filesize = uio->uio_offset + n; getnanouptime(&fvdat->last_local_modify); + ASSERT_CACHED_ATTRS_LOCKED(vp); fvdat->flag |= FN_SIZECHANGE; if (err) { brelse(bp); @@ -806,6 +813,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp) left = uiop->uio_resid; bzero((char *)bp->b_data + nread, left); + CACHED_ATTR_LOCK(vp); if ((fvdat->flag & FN_SIZECHANGE) == 0) { /* * A short read with no error, when not using @@ -838,6 +846,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp) "Short read of a dirty file"); uiop->uio_resid = 0; } + CACHED_ATTR_UNLOCK(vp); } if (error) { bp->b_ioflags |= BIO_ERROR; @@ -855,10 +864,18 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp) * anything about it. In particular, we can't invalidate any * part of the file's buffers because VOP_STRATEGY is called * with them already locked. + * + * Normally the vnode should be exclusively locked at this + * point. However, if clustered reads are in use, then in a + * mixed read-write workload getblkx may need to flush a + * partially written buffer to disk during a read. In such a + * case, the vnode may only have a shared lock at this point. */ + CACHED_ATTR_LOCK(vp); filesize = fvdat->cached_attrs.va_size; /* filesize must've been cached by fuse_vnop_open. */ KASSERT(filesize != VNOVAL, ("filesize should've been cached")); + CACHED_ATTR_UNLOCK(vp); if ((off_t)bp->b_lblkno * biosize + bp->b_dirtyend > filesize) bp->b_dirtyend = filesize - diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index 742dc66bcafc..f4fb993a7ca1 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -157,6 +157,8 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, fvdat->nid = nodeid; LIST_INIT(&fvdat->handles); + mtx_init(&fvdat->cached_attr_mtx, "fuse attr cache mutex", NULL, + MTX_DEF); vattr_null(&fvdat->cached_attrs); fvdat->cached_attrs.va_birthtime.tv_sec = -1; fvdat->cached_attrs.va_birthtime.tv_nsec = 0; @@ -181,6 +183,7 @@ fuse_vnode_destroy(struct vnode *vp) struct fuse_vnode_data *fvdat = vp->v_data; vp->v_data = NULL; + mtx_destroy(&fvdat->cached_attr_mtx); KASSERT(LIST_EMPTY(&fvdat->handles), ("Destroying fuse vnode with open files!")); free(fvdat, M_FUSEVN); @@ -386,7 +389,8 @@ fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid) struct fuse_setattr_in *fsai; int err = 0; - ASSERT_VOP_ELOCKED(vp, "fuse_io_extend"); + ASSERT_VOP_ELOCKED(vp, __func__); /* For flag and last_local_modify */ + ASSERT_CACHED_ATTRS_LOCKED(vp); if (fuse_isdeadfs(vp)) { return EBADF; @@ -439,10 +443,10 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server) struct vattr *attrs; off_t oldsize; size_t iosize; - struct buf *bp = NULL; int err = 0; - ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); + ASSERT_VOP_LOCKED(vp, __func__); + ASSERT_CACHED_ATTRS_LOCKED(vp); iosize = fuse_iosize(vp); oldsize = fvdat->cached_attrs.va_size; @@ -450,7 +454,45 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server) if ((attrs = VTOVA(vp)) != NULL) attrs->va_size = newsize; - if (newsize < oldsize) { + if (from_server && newsize > oldsize && oldsize != VNOVAL) { + /* + * The FUSE server changed the file size behind our back. We + * should invalidate the entire cache. + */ + daddr_t end_lbn; + + end_lbn = howmany(newsize, iosize); + v_inval_buf_range(vp, 0, end_lbn, iosize); + } + + if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE) { + err = fuse_vnode_setsize_immediate(vp, newsize < oldsize); + } else { + /* Without an exclusive vnode lock, we must defer the operation */ + fvdat->flag |= FN_DELAYED_TRUNCATE; + vn_delayed_setsize(vp); + } + + return err; +} + +/* Immediately set the vnode's size in the pager */ +int +fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct buf *bp = NULL; + size_t iosize; + off_t newsize; + int err = 0; + + MPASS(fvdat); + ASSERT_VOP_ELOCKED(vp, __func__); + + iosize = fuse_iosize(vp); + newsize = fvdat->cached_attrs.va_size; + + if (shrink) { daddr_t lbn; err = vtruncbuf(vp, newsize, fuse_iosize(vp)); @@ -474,21 +516,13 @@ fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server) MPASS(bp->b_flags & B_VMIO); vfs_bio_clrbuf(bp); bp->b_dirtyend = MIN(bp->b_dirtyend, newsize - lbn * iosize); - } else if (from_server && newsize > oldsize && oldsize != VNOVAL) { - /* - * The FUSE server changed the file size behind our back. We - * should invalidate the entire cache. - */ - daddr_t end_lbn; - - end_lbn = howmany(newsize, iosize); - v_inval_buf_range(vp, 0, end_lbn, iosize); } out: if (bp) brelse(bp); vnode_pager_setsize(vp, newsize); - return err; + + return (err); } /* Get the current, possibly dirty, size of the file */ @@ -497,15 +531,28 @@ fuse_vnode_size(struct vnode *vp, off_t *filesize, struct ucred *cred, struct thread *td) { struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct vattr va; int error = 0; + ASSERT_VOP_LOCKED(vp, __func__); + + CACHED_ATTR_LOCK(vp); if (!(fvdat->flag & FN_SIZECHANGE) && (!fuse_vnode_attr_cache_valid(vp) || - fvdat->cached_attrs.va_size == VNOVAL)) - error = fuse_internal_do_getattr(vp, NULL, cred, td); - - if (!error) + fvdat->cached_attrs.va_size == VNOVAL)) { + CACHED_ATTR_UNLOCK(vp); + /* + * It incurs a large struct copy, but we supply &va so we don't + * have to acquire the lock a second time after + * fuse_internal_do_getattr returns. + */ + error = fuse_internal_do_getattr(vp, &va, cred, td); + if (!error) + *filesize = va.va_size; + } else { *filesize = fvdat->cached_attrs.va_size; + CACHED_ATTR_UNLOCK(vp); + } return error; } @@ -515,6 +562,8 @@ fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime) { struct fuse_vnode_data *fvdat = VTOFUD(vp); + ASSERT_CACHED_ATTRS_LOCKED(vp); + fvdat->flag &= ~(FN_MTIMECHANGE | FN_CTIMECHANGE); if (atime) fvdat->flag &= ~FN_ATIMECHANGE; @@ -537,6 +586,8 @@ fuse_vnode_update(struct vnode *vp, int flags) if (mp->mnt_flag & MNT_NOATIME) flags &= ~FN_ATIMECHANGE; + CACHED_ATTR_LOCK(vp); + if (flags & FN_ATIMECHANGE) fvdat->cached_attrs.va_atime = ts; if (flags & FN_MTIMECHANGE) @@ -545,6 +596,8 @@ fuse_vnode_update(struct vnode *vp, int flags) fvdat->cached_attrs.va_ctime = ts; fvdat->flag |= flags; + + CACHED_ATTR_UNLOCK(vp); } void diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h index 97774de9eeb5..b6e388d01702 100644 --- a/sys/fs/fuse/fuse_node.h +++ b/sys/fs/fuse/fuse_node.h @@ -79,6 +79,11 @@ * cache_attrs.va_size field does not time out. */ #define FN_SIZECHANGE 0x00000100 +/* + * Whether I/O to this vnode should bypass the cache. + * XXX BUG: this should be part of the file handle, not the vnode data. + * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088 + */ #define FN_DIRECTIO 0x00000200 /* Indicates that parent_nid is valid */ #define FN_PARENT_NID 0x00000400 @@ -92,38 +97,81 @@ #define FN_CTIMECHANGE 0x00001000 #define FN_ATIMECHANGE 0x00002000 +/* vop_delayed_setsize should truncate the file */ +#define FN_DELAYED_TRUNCATE 0x00004000 + +#define CACHED_ATTR_LOCK(vp) \ +do { \ + ASSERT_VOP_LOCKED(vp, __func__); \ + if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \ + mtx_lock(&VTOFUD(vp)->cached_attr_mtx); \ +} while(0) + +#define CACHED_ATTR_UNLOCK(vp) \ +do { \ + ASSERT_VOP_LOCKED(vp, __func__); \ + if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) \ + mtx_unlock(&VTOFUD(vp)->cached_attr_mtx); \ +} while(0) + struct fuse_vnode_data { - /** self **/ + /* self's node id, similar to an inode number. Immutable. */ uint64_t nid; + /* + * Generation number. Distinguishes files with same nid but that don't + * overlap in time. Immutable. + */ uint64_t generation; - /** parent **/ + /* parent's node id. Protected by the vnode lock. */ uint64_t parent_nid; /** I/O **/ - /* List of file handles for all of the vnode's open file descriptors */ + /* + * List of file handles for all of the vnode's open file descriptors. + * Protected by the vnode lock. + */ LIST_HEAD(, fuse_filehandle) handles; - /** flags **/ - uint32_t flag; + /* Protects flag, attr_cache_timeout and cached_attrs */ + struct mtx cached_attr_mtx; - /** meta **/ - /* The monotonic time after which the attr cache is invalid */ + /* + * The monotonic time after which the attr cache is invalid + * Protected by an exclusive vnode lock or the cached_attr_mtx + */ struct bintime attr_cache_timeout; - /* + + /* * Monotonic time after which the entry is invalid. Used for lookups - * by nodeid instead of pathname. + * by nodeid instead of pathname. Protected by the vnode lock. */ struct bintime entry_cache_timeout; + /* * Monotonic time of the last FUSE operation that modified the file * size. Used to avoid races between mutator ops like VOP_SETATTR and - * unlocked accessor ops like VOP_LOOKUP. + * unlocked accessor ops like VOP_LOOKUP. Protected by the vnode lock. */ struct timespec last_local_modify; + + /* Protected by an exclusive vnode lock or the cached_attr_mtx */ struct vattr cached_attrs; + + /* Number of FUSE_LOOKUPs minus FUSE_FORGETs. Protected by vnode lock */ uint64_t nlookup; + + /* + * Misc flags. Protected by an exclusive vnode lock or the + * cached_attr_mtx, because some of the flags reflect the contents of + * cached_attrs. + */ + uint32_t flag; + + /* Vnode type. Immutable */ __enum_uint8(vtype) vtype; + + /* State for clustered writes. Protected by vnode lock */ struct vn_clusterw clusterw; }; @@ -141,18 +189,32 @@ struct fuse_fid { #define VTOFUD(vp) \ ((struct fuse_vnode_data *)((vp)->v_data)) #define VTOI(vp) (VTOFUD(vp)->nid) + +#define ASSERT_CACHED_ATTRS_LOCKED(vp) \ +do { \ + ASSERT_VOP_LOCKED(vp, __func__); \ + VNASSERT(VOP_ISLOCKED(vp) == LK_EXCLUSIVE || \ + mtx_owned(&VTOFUD(vp)->cached_attr_mtx), vp, \ + ("cached attrs not locked")); \ +} while(0) + static inline bool fuse_vnode_attr_cache_valid(struct vnode *vp) { + struct fuse_vnode_data *fvdat = VTOFUD(vp); struct bintime now; + ASSERT_CACHED_ATTRS_LOCKED(vp); + getbinuptime(&now); - return (bintime_cmp(&(VTOFUD(vp)->attr_cache_timeout), &now, >)); + return (bintime_cmp(&fvdat->attr_cache_timeout, &now, >)); } static inline struct vattr* VTOVA(struct vnode *vp) { + ASSERT_CACHED_ATTRS_LOCKED(vp); + if (fuse_vnode_attr_cache_valid(vp)) return &(VTOFUD(vp)->cached_attrs); else @@ -162,6 +224,8 @@ VTOVA(struct vnode *vp) static inline void fuse_vnode_clear_attr_cache(struct vnode *vp) { + ASSERT_CACHED_ATTRS_LOCKED(vp); + bintime_clear(&VTOFUD(vp)->attr_cache_timeout); } @@ -184,10 +248,14 @@ static inline void fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp) { if (dvp != NULL && vp->v_type == VDIR) { + ASSERT_VOP_ELOCKED(vp, __func__); /* for parent_nid */ + MPASS(dvp->v_type == VDIR); VTOFUD(vp)->parent_nid = VTOI(dvp); VTOFUD(vp)->flag |= FN_PARENT_NID; } else { + ASSERT_CACHED_ATTRS_LOCKED(vp); + VTOFUD(vp)->flag &= ~FN_PARENT_NID; } } @@ -207,6 +275,7 @@ void fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred, pid_t pid); int fuse_vnode_setsize(struct vnode *vp, off_t newsize, bool from_server); +int fuse_vnode_setsize_immediate(struct vnode *vp, bool shrink); void fuse_vnode_undirty_cached_timestamps(struct vnode *vp, bool atime); diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c index a5118aa7675f..7b6ad86e4b2b 100644 --- a/sys/fs/fuse/fuse_vfsops.c +++ b/sys/fs/fuse/fuse_vfsops.c @@ -601,6 +601,8 @@ fuse_vfsop_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) error = fuse_vnode_get(mp, feo, nodeid, NULL, vpp, NULL, vtyp); if (error) goto out; + /* for last_local_modify and fuse_internal_cache_attrs */ + ASSERT_VOP_ELOCKED(*vpp, __func__); fvdat = VTOFUD(*vpp); if (timespeccmp(&now, &fvdat->last_local_modify, >)) { diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index c66d3bcf01e0..ad7a2dcf84ef 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -134,6 +134,7 @@ static vop_close_t fuse_vnop_close; static vop_copy_file_range_t fuse_vnop_copy_file_range; static vop_create_t fuse_vnop_create; static vop_deallocate_t fuse_vnop_deallocate; +static vop_delayed_setsize_t fuse_vnop_delayed_setsize; static vop_deleteextattr_t fuse_vnop_deleteextattr; static vop_fdatasync_t fuse_vnop_fdatasync; static vop_fsync_t fuse_vnop_fsync; @@ -191,6 +192,7 @@ struct vop_vector fuse_vnops = { .vop_copy_file_range = fuse_vnop_copy_file_range, .vop_create = fuse_vnop_create, .vop_deallocate = fuse_vnop_deallocate, + .vop_delayed_setsize = fuse_vnop_delayed_setsize, .vop_deleteextattr = fuse_vnop_deleteextattr, .vop_fsync = fuse_vnop_fsync, .vop_fdatasync = fuse_vnop_fdatasync, @@ -707,6 +709,8 @@ fuse_vnop_allocate(struct vop_allocate_args *ap) return (EXTERROR(EOPNOTSUPP, "This server does not implement " "FUSE_FALLOCATE")); + ASSERT_CACHED_ATTRS_LOCKED(vp); + io.uio_offset = *offset; io.uio_resid = *len; err = vn_rlimit_fsize(vp, &io, curthread); @@ -779,7 +783,7 @@ fuse_vnop_bmap(struct vop_bmap_args *ap) struct fuse_data *data; struct fuse_vnode_data *fvdat = VTOFUD(vp); uint64_t biosize; - off_t fsize; + off_t fsize = VNOVAL; daddr_t lbn = ap->a_bn; daddr_t *pbn = ap->a_bnp; int *runp = ap->a_runp; @@ -822,9 +826,10 @@ fuse_vnop_bmap(struct vop_bmap_args *ap) * and the risk of getting it wrong is not worth the cost of * another upcall. */ - if (fvdat->cached_attrs.va_size != VNOVAL) - fsize = fvdat->cached_attrs.va_size; - else + CACHED_ATTR_LOCK(vp); + fsize = fvdat->cached_attrs.va_size; + CACHED_ATTR_UNLOCK(vp); + if (fsize == VNOVAL) error = fuse_vnode_size(vp, &fsize, td->td_ucred, td); if (error == 0) *runp = MIN(MAX(0, fsize / (off_t)biosize - lbn - 1), @@ -894,6 +899,7 @@ fuse_vnop_close(struct vop_close_args *ap) cred = td->td_ucred; err = fuse_flush(vp, cred, pid, fflag); + ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */ if (err == 0 && (fvdat->flag & FN_ATIMECHANGE) && !vfs_isrdonly(mp)) { struct vattr vap; struct fuse_data *data; @@ -911,6 +917,7 @@ fuse_vnop_close(struct vop_close_args *ap) } if (access_e == 0) { VATTR_NULL(&vap); + ASSERT_CACHED_ATTRS_LOCKED(vp); vap.va_atime = fvdat->cached_attrs.va_atime; /* * Ignore errors setting when setting atime. That @@ -1035,6 +1042,7 @@ fuse_vnop_copy_file_range(struct vop_copy_file_range_args *ap) *ap->a_inoffp += fwo->size; *ap->a_outoffp += fwo->size; fuse_internal_clear_suid_on_write(outvp, outcred, td); + ASSERT_CACHED_ATTRS_LOCKED(outvp); if (*ap->a_outoffp > outfvdat->cached_attrs.va_size) { fuse_vnode_setsize(outvp, *ap->a_outoffp, false); getnanouptime(&outfvdat->last_local_modify); @@ -1337,6 +1345,7 @@ fuse_vnop_inactive(struct vop_inactive_args *ap) int need_flush = 1; + ASSERT_CACHED_ATTRS_LOCKED(vp); /* For fvdat->flag */ LIST_FOREACH_SAFE(fufh, &fvdat->handles, next, fufh_tmp) { if (need_flush && vp->v_type == VREG) { if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { @@ -1557,6 +1566,7 @@ fuse_vnop_lookup(struct vop_lookup_args *ap) else if ((err = fuse_internal_access(dvp, VEXEC, td, cred))) return err; + ASSERT_CACHED_ATTRS_LOCKED(dvp); /* For flag */ is_dot = cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.'; if (isdotdot && !(data->dataflags & FSESS_EXPORT_SUPPORT)) { if (!(VTOFUD(dvp)->flag & FN_PARENT_NID)) { @@ -1960,6 +1970,10 @@ fuse_vnop_read(struct vop_read_args *ap) "to be closed")); } + /* + * XXX Check this flag without the lock. See + * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088 + */ if (VTOFUD(vp)->flag & FN_DIRECTIO) { ioflag |= IO_DIRECT; } @@ -2220,6 +2234,8 @@ fuse_vnop_remove(struct vop_remove_args *ap) return err; } +SDT_PROBE_DEFINE4(fusefs, , vnops, erelookup, "struct vnode*", + "struct vnode*", "struct vnode*", "struct vnode*"); /* struct vnop_rename_args { struct vnode *a_fdvp; @@ -2242,6 +2258,7 @@ fuse_vnop_rename(struct vop_rename_args *ap) struct fuse_data *data; bool newparent = fdvp != tdvp; bool isdir = fvp->v_type == VDIR; + int locktype; int err = 0; if (fuse_isdeadfs(fdvp)) { @@ -2270,11 +2287,32 @@ fuse_vnop_rename(struct vop_rename_args *ap) * have write permission to it, so ".." can be modified. */ data = fuse_get_mpdata(vnode_mount(tdvp)); + + if (tdvp != fdvp) + locktype = LK_EXCLUSIVE; /* for fuse_vnode_setparent */ + else + locktype = LK_SHARED; + + /* + * Must use LK_NOWAIT to prevent LORs between fvp and tdvp or + * tvp + */ + if (vn_lock(fvp, locktype | LK_NOWAIT) != 0) { + /* + * Can't release tdvp or tvp to try avoiding the LOR. + * Must return instead. + */ + SDT_PROBE4(fusefs, , vnops, erelookup, fdvp, fvp, tdvp, + tvp); + err = ERELOOKUP; + goto out; + } + if (data->dataflags & FSESS_DEFAULT_PERMISSIONS && isdir && newparent) { err = fuse_internal_access(fvp, VWRITE, curthread, tcnp->cn_cred); if (err) - goto out; + goto unlock; } err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); if (err == 0) { @@ -2293,6 +2331,8 @@ fuse_vnop_rename(struct vop_rename_args *ap) } cache_purge(fdvp); } +unlock: + VOP_UNLOCK(fvp); out: if (tdvp == tvp) { vrele(tdvp); @@ -2568,6 +2608,7 @@ static int fuse_vnop_write(struct vop_write_args *ap) { struct vnode *vp = ap->a_vp; + struct fuse_vnode_data *fvdat = VTOFUD(vp); struct uio *uio = ap->a_uio; int ioflag = ap->a_ioflag; struct ucred *cred = ap->a_cred; @@ -2583,9 +2624,12 @@ fuse_vnop_write(struct vop_write_args *ap) "to be closed")); } - if (VTOFUD(vp)->flag & FN_DIRECTIO) { + /* + * XXX Check this flag without the lock. See + * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=293088 + */ + if (fvdat->flag & FN_DIRECTIO) ioflag |= IO_DIRECT; - } err = fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid); if (err == EBADF && vnode_mount(vp)->mnt_flag & MNT_EXPORTED) { @@ -3219,6 +3263,29 @@ fallback: return (vop_stddeallocate(ap)); } +/* + struct vop_delayed_setsize_args { + struct vop_generic_args a_gen; + struct vnode *a_vp; + }; + */ +static int +fuse_vnop_delayed_setsize(struct vop_delayed_setsize_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); + bool shrink = (fvdat->flag & FN_DELAYED_TRUNCATE) != 0; + int err; + + if (!fvdat) + return (0); + + err = fuse_vnode_setsize_immediate(vp, shrink); + fvdat->flag &= ~FN_DELAYED_TRUNCATE; + + return (err); +} + /* struct vop_deleteextattr_args { struct vop_generic_args a_gen; diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc index e61dadb6d79e..622a3c3debcc 100644 --- a/tests/sys/fs/fusefs/bmap.cc +++ b/tests/sys/fs/fusefs/bmap.cc @@ -44,10 +44,13 @@ using namespace testing; const static char FULLPATH[] = "mountpoint/foo"; const static char RELPATH[] = "foo"; -class Bmap: public FuseTest { +class Bmap: public FuseTest, + public WithParamInterface<tuple<int, int>> +{ public: virtual void SetUp() { m_maxreadahead = UINT32_MAX; + m_init_flags |= get<0>(GetParam()); FuseTest::SetUp(); } void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn) @@ -73,12 +76,12 @@ void expect_lookup(const char *relpath, uint64_t ino, off_t size) } }; -class BmapEof: public Bmap, public WithParamInterface<int> {}; +class BmapEof: public Bmap {}; /* * Test FUSE_BMAP */ -TEST_F(Bmap, bmap) +TEST_P(Bmap, bmap) { struct fiobmap2_arg arg; /* @@ -124,7 +127,7 @@ TEST_F(Bmap, bmap) * If the daemon does not implement VOP_BMAP, fusefs should return sensible * defaults. */ -TEST_F(Bmap, default_) +TEST_P(Bmap, default_) { struct fiobmap2_arg arg; const off_t filesize = 1 << 30; @@ -181,7 +184,7 @@ TEST_F(Bmap, default_) * The server returns an error for some reason for FUSE_BMAP. fusefs should * faithfully report that error up to the caller. */ -TEST_F(Bmap, einval) +TEST_P(Bmap, einval) { struct fiobmap2_arg arg; const off_t filesize = 1 << 30; @@ -217,7 +220,7 @@ TEST_F(Bmap, einval) * https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=264196 . The bug did not * lie in fusefs, but this is a convenient place for a regression test. */ -TEST_F(Bmap, spurious_einval) +TEST_P(Bmap, spurious_einval) { const off_t filesize = 4ull << 30; const ino_t ino = 42; @@ -288,7 +291,7 @@ TEST_P(BmapEof, eof) int fd; int ngetattrs; - ngetattrs = GetParam(); + ngetattrs = get<1>(GetParam()); FuseTest::expect_lookup(RELPATH, ino, mode, filesize, 1, 0); expect_open(ino, 0, 1); // Depending on ngetattrs, FUSE_READ could be called with either @@ -348,6 +351,17 @@ TEST_P(BmapEof, eof) leak(fd); } -INSTANTIATE_TEST_SUITE_P(BE, BmapEof, - Values(1, 2, 3) -); +/* + * Try with and without async reads, because it affects the type of vnode lock + * on entry to fuse_vnop_bmap. + */ +INSTANTIATE_TEST_SUITE_P(B, Bmap, Values( + tuple(0, 0), + tuple(FUSE_ASYNC_READ, 0) +)); + +INSTANTIATE_TEST_SUITE_P(BE, BmapEof, Values( + tuple(0, 1), + tuple(0, 2), + tuple(0, 3) +)); diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc index d370a1e6e706..69742fb2a54b 100644 --- a/tests/sys/fs/fusefs/notify.cc +++ b/tests/sys/fs/fusefs/notify.cc @@ -47,8 +47,15 @@ using namespace testing; * invalidation. This file tests our client's handling of those messages. */ -class Notify: public FuseTest { +class Notify: public FuseTest, + public WithParamInterface<int> +{ public: +virtual void SetUp() { + m_init_flags |= GetParam(); + FuseTest::SetUp(); +} + /* Ignore an optional FUSE_FSYNC */ void maybe_expect_fsync(uint64_t ino) { @@ -154,7 +161,7 @@ static void* store(void* arg) { } /* Invalidate a nonexistent entry */ -TEST_F(Notify, inval_entry_nonexistent) +TEST_P(Notify, inval_entry_nonexistent) { const static char *name = "foo"; struct inval_entry_args iea; @@ -173,7 +180,7 @@ TEST_F(Notify, inval_entry_nonexistent) } /* Invalidate a cached entry */ -TEST_F(Notify, inval_entry) +TEST_P(Notify, inval_entry) { const static char FULLPATH[] = "mountpoint/foo"; *** 404 LINES SKIPPED ***home | help
Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?69b2e5cb.41328.db0b562>
