Date: Tue, 03 Sep 2019 14:06:47 -0000 From: Alan Somers <asomers@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r346162 - in projects/fuse2: sys/fs/fuse sys/kern sys/sys tests/sys/fs/fusefs Message-ID: <201904121905.x3CJ57Mk033444@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: asomers Date: Fri Apr 12 19:05:06 2019 New Revision: 346162 URL: https://svnweb.freebsd.org/changeset/base/346162 Log: fusefs: evict invalidated cache contents during write-through fusefs's default cache mode is "writethrough", although it currently works more like "write-around"; writes bypass the cache completely. Since writes bypass the cache, they were leaving stale previously-read data in the cache. This commit invalidates that stale data. It also adds a new global v_inval_buf_range method, like vtruncbuf but for a range of a file. PR: 235774 Reported by: cem Sponsored by: The FreeBSD Foundation Modified: projects/fuse2/sys/fs/fuse/fuse_io.c projects/fuse2/sys/kern/vfs_subr.c projects/fuse2/sys/sys/vnode.h projects/fuse2/tests/sys/fs/fusefs/write.cc Modified: projects/fuse2/sys/fs/fuse/fuse_io.c ============================================================================== --- projects/fuse2/sys/fs/fuse/fuse_io.c Fri Apr 12 18:54:09 2019 (r346161) +++ projects/fuse2/sys/fs/fuse/fuse_io.c Fri Apr 12 19:05:06 2019 (r346162) @@ -171,8 +171,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in * cached. */ if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) { + off_t start, end; + SDT_PROBE2(fuse, , io, trace, 1, "direct write of vnode"); + start = uio->uio_offset; + end = start + uio->uio_resid; + v_inval_buf_range(vp, start, end, fuse_iosize(vp)); err = fuse_write_directbackend(vp, uio, cred, fufh, ioflag); } else { Modified: projects/fuse2/sys/kern/vfs_subr.c ============================================================================== --- projects/fuse2/sys/kern/vfs_subr.c Fri Apr 12 18:54:09 2019 (r346161) +++ projects/fuse2/sys/kern/vfs_subr.c Fri Apr 12 19:05:06 2019 (r346162) @@ -116,6 +116,8 @@ static void vfs_knl_assert_locked(void *arg); static void vfs_knl_assert_unlocked(void *arg); static void vnlru_return_batches(struct vfsops *mnt_op); static void destroy_vpollinfo(struct vpollinfo *vi); +static int v_inval_buf_range1(struct vnode *vp, struct bufobj *bo, + daddr_t startlbn, daddr_t endlbn); /* * These fences are intended for cases where some synchronization is @@ -1865,9 +1867,8 @@ int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize) { struct buf *bp, *nbp; - int anyfreed; - daddr_t trunclbn; struct bufobj *bo; + daddr_t startlbn; CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__, vp, cred, blksize, (uintmax_t)length); @@ -1875,22 +1876,111 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, off_t /* * Round up to the *next* lbn. */ - trunclbn = howmany(length, blksize); + startlbn = howmany(length, blksize); ASSERT_VOP_LOCKED(vp, "vtruncbuf"); + restart: bo = &vp->v_bufobj; BO_LOCK(bo); + if (v_inval_buf_range1(vp, bo, length, INT64_MAX) == EAGAIN) + goto restart; + + if (length > 0) { +restartsync: + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { + if (bp->b_lblkno > 0) + continue; + /* + * Since we hold the vnode lock this should only + * fail if we're racing with the buf daemon. + */ + if (BUF_LOCK(bp, + LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, + BO_LOCKPTR(bo)) == ENOLCK) { + goto restart; + } + VNASSERT((bp->b_flags & B_DELWRI), vp, + ("buf(%p) on dirty queue without DELWRI", bp)); + + bremfree(bp); + bawrite(bp); + BO_LOCK(bo); + goto restartsync; + } + } + + bufobj_wwait(bo, 0, 0); + BO_UNLOCK(bo); + vnode_pager_setsize(vp, length); + + return (0); +} + +/* + * Invalidate the cached pages of a file's buffer within the range of block + * numbers [startlbn, endlbn). Every buffer that overlaps that range will be + * invalidated. This must not result in any dirty data being lost. + */ +void +v_inval_buf_range(struct vnode *vp, off_t start, off_t end, int blksize) +{ + struct bufobj *bo; + daddr_t startlbn, endlbn; + + /* Round "outwards" */ + startlbn = start / blksize; + endlbn = howmany(end, blksize); + + ASSERT_VOP_LOCKED(vp, "v_inval_buf_range"); + +restart: + bo = &vp->v_bufobj; + BO_LOCK(bo); + +#ifdef INVARIANTS + struct buf *bp, *nbp; + + TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { + /* + * Disallow invalidating dirty data outside of the requested + * offsets. Assume that data within the requested offsets is + * being invalidated for a good reason. + */ + off_t blkstart, blkend; + + blkstart = bp->b_offset; + blkend = bp->b_offset + bp->b_bcount; + KASSERT(blkstart >= start && blkend <= end, + ("Invalidating extra dirty data!")); + } +#endif + + if (v_inval_buf_range1(vp, bo, startlbn, endlbn) == EAGAIN) + goto restart; + + BO_UNLOCK(bo); + vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end)); +} + +/* Like v_inval_buf_range, but operates on whole buffers instead of offsets */ +static int +v_inval_buf_range1(struct vnode *vp, struct bufobj *bo, + daddr_t startlbn, daddr_t endlbn) +{ + struct buf *bp, *nbp; + int anyfreed; + anyfreed = 1; for (;anyfreed;) { anyfreed = 0; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { - if (bp->b_lblkno < trunclbn) + if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) - goto restart; + return EAGAIN; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); @@ -1904,17 +1994,17 @@ restart: (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { BO_UNLOCK(bo); - goto restart; + return EAGAIN; } } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { - if (bp->b_lblkno < trunclbn) + if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) - goto restart; + return EAGAIN; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; @@ -1927,40 +2017,11 @@ restart: (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { BO_UNLOCK(bo); - goto restart; + return EAGAIN; } } } - - if (length > 0) { -restartsync: - TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { - if (bp->b_lblkno > 0) - continue; - /* - * Since we hold the vnode lock this should only - * fail if we're racing with the buf daemon. - */ - if (BUF_LOCK(bp, - LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, - BO_LOCKPTR(bo)) == ENOLCK) { - goto restart; - } - VNASSERT((bp->b_flags & B_DELWRI), vp, - ("buf(%p) on dirty queue without DELWRI", bp)); - - bremfree(bp); - bawrite(bp); - BO_LOCK(bo); - goto restartsync; - } - } - - bufobj_wwait(bo, 0, 0); - BO_UNLOCK(bo); - vnode_pager_setsize(vp, length); - - return (0); + return 0; } static void Modified: projects/fuse2/sys/sys/vnode.h ============================================================================== --- projects/fuse2/sys/sys/vnode.h Fri Apr 12 18:54:09 2019 (r346161) +++ projects/fuse2/sys/sys/vnode.h Fri Apr 12 19:05:06 2019 (r346162) @@ -660,6 +660,8 @@ void vinactive(struct vnode *, struct thread *); int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo); int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize); +void v_inval_buf_range(struct vnode *vp, off_t start, off_t end, + int blksize); void vunref(struct vnode *); void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3); int vrecycle(struct vnode *vp); Modified: projects/fuse2/tests/sys/fs/fusefs/write.cc ============================================================================== --- projects/fuse2/tests/sys/fs/fusefs/write.cc Fri Apr 12 18:54:09 2019 (r346161) +++ projects/fuse2/tests/sys/fs/fusefs/write.cc Fri Apr 12 19:05:06 2019 (r346162) @@ -228,8 +228,7 @@ TEST_F(Write, append_direct_io) } /* A direct write should evict any overlapping cached data */ -/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235774 */ -TEST_F(Write, DISABLED_direct_io_evicts_cache) +TEST_F(Write, direct_io_evicts_cache) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; @@ -407,6 +406,42 @@ TEST_F(Write, DISABLED_mmap) free(expected); free(zeros); +} + +/* In WriteThrough mode, a write should evict overlapping cached data */ +TEST_F(WriteThrough, evicts_read_cache) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char CONTENTS0[] = "abcdefgh"; + const char CONTENTS1[] = "ijklmnop"; + uint64_t ino = 42; + int fd; + ssize_t bufsize = strlen(CONTENTS0) + 1; + char readbuf[bufsize]; + + expect_lookup(RELPATH, ino, bufsize); + expect_open(ino, 0, 1); + expect_read(ino, 0, bufsize, bufsize, CONTENTS0); + expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS1); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + + // Prime cache + ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); + + // Write directly, evicting cache + ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); + ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno); + + // Read again. Cache should be bypassed + expect_read(ino, 0, bufsize, bufsize, CONTENTS1); + ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno); + ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno); + ASSERT_STREQ(readbuf, CONTENTS1); + + /* Deliberately leak fd. close(2) will be tested in release.cc */ } TEST_F(WriteThrough, pwrite)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201904121905.x3CJ57Mk033444>