Date: Sun, 28 Jul 2019 15:17:33 +0000 (UTC) From: Alan Somers <asomers@FreeBSD.org> To: src-committers@freebsd.org, svn-src-projects@freebsd.org Subject: svn commit: r350388 - in projects/fuse2: sys/fs/fuse tests/sys/fs/fusefs Message-ID: <201907281517.x6SFHXaa050783@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: asomers Date: Sun Jul 28 15:17:32 2019 New Revision: 350388 URL: https://svnweb.freebsd.org/changeset/base/350388 Log: fusefs: fix panic when writing with O_DIRECT and using writeback cache When a fusefs file system is mounted using the writeback cache, the cache may still be bypassed by opening a file with O_DIRECT. When writing with O_DIRECT, the cache must be invalidated for the affected portion of the file. Fix some panics caused by inadvertently invalidating too much. Sponsored by: The FreeBSD Foundation Modified: projects/fuse2/sys/fs/fuse/fuse_io.c projects/fuse2/tests/sys/fs/fusefs/write.cc Modified: projects/fuse2/sys/fs/fuse/fuse_io.c ============================================================================== --- projects/fuse2/sys/fs/fuse/fuse_io.c Sun Jul 28 04:02:22 2019 (r350387) +++ projects/fuse2/sys/fs/fuse/fuse_io.c Sun Jul 28 15:17:32 2019 (r350388) @@ -119,9 +119,11 @@ SDT_PROVIDER_DECLARE(fusefs); */ SDT_PROBE_DEFINE2(fusefs, , io, trace, "int", "char*"); +static int +fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end); static void fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred, - struct thread *td); + struct thread *td); static int fuse_read_directbackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh); @@ -136,6 +138,58 @@ static int fuse_write_biobackend(struct vnode *vp, struct uio *uio, struct ucred *cred, struct fuse_filehandle *fufh, int ioflag, pid_t pid); +/* Invalidate a range of cached data, whether dirty of not */ +static int +fuse_inval_buf_range(struct vnode *vp, off_t filesize, off_t start, off_t end) +{ + struct buf *bp; + daddr_t left_lbn, end_lbn, right_lbn; + off_t new_filesize; + int iosize, left_on, right_on, right_blksize; + + iosize = fuse_iosize(vp); + left_lbn = start / iosize; + end_lbn = howmany(end, iosize); + left_on = start & (iosize - 1); + if (left_on != 0) { + bp = getblk(vp, left_lbn, iosize, PCATCH, 0, 0); + if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyend >= left_on) { + /* + * Flush the dirty buffer, because we don't have a + * byte-granular way to record which parts of the + * buffer are valid. + */ + bwrite(bp); + if (bp->b_error) + return (bp->b_error); + } else { + brelse(bp); + } + } + right_on = end & (iosize - 1); + if (right_on != 0) { + right_lbn = end / iosize; + new_filesize = MAX(filesize, end); + right_blksize = MIN(iosize, new_filesize - iosize * right_lbn); + bp = getblk(vp, right_lbn, right_blksize, PCATCH, 0, 0); + if ((bp->b_flags & B_CACHE) != 0 && bp->b_dirtyoff < right_on) { + /* + * Flush the dirty buffer, because we don't have a + * byte-granular way to record which parts of the + * buffer are valid. + */ + bwrite(bp); + if (bp->b_error) + return (bp->b_error); + } else { + brelse(bp); + } + } + + v_inval_buf_range(vp, left_lbn, end_lbn, iosize); + return (0); +} + /* * FreeBSD clears the SUID and SGID bits on any write by a non-root user. */ @@ -236,7 +290,6 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in case UIO_WRITE: fuse_vnode_update(vp, FN_MTIMECHANGE | FN_CTIMECHANGE); if (directio) { - const int iosize = fuse_iosize(vp); off_t start, end, filesize; SDT_PROBE2(fusefs, , io, trace, 1, @@ -252,7 +305,9 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, in (IO_VMIO | IO_DIRECT), ("IO_DIRECT used for a cache flush?")); /* Invalidate the write cache when writing directly */ - v_inval_buf_range(vp, start, end, iosize); + err = fuse_inval_buf_range(vp, filesize, start, end); + if (err) + return (err); err = fuse_write_directbackend(vp, uio, cred, fufh, filesize, ioflag, false); } else { Modified: projects/fuse2/tests/sys/fs/fusefs/write.cc ============================================================================== --- projects/fuse2/tests/sys/fs/fusefs/write.cc Sun Jul 28 04:02:22 2019 (r350387) +++ projects/fuse2/tests/sys/fs/fusefs/write.cc Sun Jul 28 15:17:32 2019 (r350388) @@ -951,6 +951,132 @@ TEST_F(WriteBackAsync, delay) } /* + * A direct write should not evict dirty cached data from outside of its own + * byte range. + */ +TEST_F(WriteBackAsync, direct_io_ignores_unrelated_cached) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + const char CONTENTS0[] = "abcdefgh"; + const char CONTENTS1[] = "ijklmnop"; + uint64_t ino = 42; + int fd; + ssize_t bufsize = strlen(CONTENTS0) + 1; + ssize_t fsize = 2 * m_maxbcachebuf; + char readbuf[bufsize]; + void *zeros; + + zeros = calloc(1, m_maxbcachebuf); + ASSERT_NE(nullptr, zeros); + + expect_lookup(RELPATH, ino, fsize); + expect_open(ino, 0, 1); + expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, zeros); + FuseTest::expect_write(ino, m_maxbcachebuf, bufsize, bufsize, 0, 0, + CONTENTS1); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + + // Cache first block with dirty data. This will entail first reading + // the existing data. + ASSERT_EQ(bufsize, pwrite(fd, CONTENTS0, bufsize, 0)) + << strerror(errno); + + // Write directly to second block + ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); + ASSERT_EQ(bufsize, pwrite(fd, CONTENTS1, bufsize, m_maxbcachebuf)) + << strerror(errno); + + // Read from the first block again. Should be serviced by cache. + ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); + ASSERT_EQ(bufsize, pread(fd, readbuf, bufsize, 0)) << strerror(errno); + ASSERT_STREQ(readbuf, CONTENTS0); + + leak(fd); + free(zeros); +} + +/* + * If a direct io write partially overlaps one or two blocks of dirty cached + * data, No dirty data should be lost. Admittedly this is a weird test, + * because it would be unusual to use O_DIRECT and the writeback cache. + */ +TEST_F(WriteBackAsync, direct_io_partially_overlaps_cached_block) +{ + const char FULLPATH[] = "mountpoint/some_file.txt"; + const char RELPATH[] = "some_file.txt"; + uint64_t ino = 42; + int fd; + off_t bs = m_maxbcachebuf; + ssize_t fsize = 3 * bs; + void *readbuf, *zeros, *ones, *zeroones, *onezeros; + + readbuf = malloc(bs); + ASSERT_NE(nullptr, readbuf) << strerror(errno); + zeros = calloc(1, 3 * bs); + ASSERT_NE(nullptr, zeros); + ones = calloc(1, 2 * bs); + ASSERT_NE(nullptr, ones); + memset(ones, 1, 2 * bs); + zeroones = calloc(1, bs); + ASSERT_NE(nullptr, zeroones); + memset((uint8_t*)zeroones + bs / 2, 1, bs / 2); + onezeros = calloc(1, bs); + ASSERT_NE(nullptr, onezeros); + memset(onezeros, 1, bs / 2); + + expect_lookup(RELPATH, ino, fsize); + expect_open(ino, 0, 1); + + fd = open(FULLPATH, O_RDWR); + EXPECT_LE(0, fd) << strerror(errno); + + /* Cache first and third blocks with dirty data. */ + ASSERT_EQ(3 * bs, pwrite(fd, zeros, 3 * bs, 0)) << strerror(errno); + + /* + * Write directly to all three blocks. The partially written blocks + * will be flushed because they're dirty. + */ + FuseTest::expect_write(ino, 0, bs, bs, 0, 0, zeros); + FuseTest::expect_write(ino, 2 * bs, bs, bs, 0, 0, zeros); + /* The direct write is split in two because of the m_maxwrite value */ + FuseTest::expect_write(ino, bs / 2, bs, bs, 0, 0, ones); + FuseTest::expect_write(ino, 3 * bs / 2, bs, bs, 0, 0, ones); + ASSERT_EQ(0, fcntl(fd, F_SETFL, O_DIRECT)) << strerror(errno); + ASSERT_EQ(2 * bs, pwrite(fd, ones, 2 * bs, bs / 2)) << strerror(errno); + + /* + * Read from both the valid and invalid portions of the first and third + * blocks again. This will entail FUSE_READ operations because these + * blocks were invalidated by the direct write. + */ + expect_read(ino, 0, bs, bs, zeroones); + expect_read(ino, 2 * bs, bs, bs, onezeros); + ASSERT_EQ(0, fcntl(fd, F_SETFL, 0)) << strerror(errno); + ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 0)) << strerror(errno); + EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2)); + ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 5 * bs / 2)) + << strerror(errno); + EXPECT_EQ(0, memcmp(zeros, readbuf, bs / 2)); + ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, bs / 2)) + << strerror(errno); + EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2)); + ASSERT_EQ(bs / 2, pread(fd, readbuf, bs / 2, 2 * bs)) + << strerror(errno); + EXPECT_EQ(0, memcmp(ones, readbuf, bs / 2)); + + leak(fd); + free(zeroones); + free(onezeros); + free(ones); + free(zeros); + free(readbuf); +} + +/* * In WriteBack mode, writes may be cached beyond what the server thinks is the * EOF. In this case, a short read at EOF should _not_ cause fusefs to update * the file's size.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201907281517.x6SFHXaa050783>