Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 15 Feb 2019 22:49:15 +0000 (UTC)
From:      Conrad Meyer <cem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r344183 - head/sys/fs/fuse
Message-ID:  <201902152249.x1FMnFaL013219@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: cem
Date: Fri Feb 15 22:49:15 2019
New Revision: 344183
URL: https://svnweb.freebsd.org/changeset/base/344183

Log:
  FUSE: Respect userspace FS "do-not-cache" of file attributes
  
  The FUSE protocol demands that kernel implementations cache user filesystem
  file attributes (vattr data) for a maximum period of time in the range of
  [0, ULONG_MAX] seconds.  In practice, typical requests are for 0, 1, or 10
  seconds; or "a long time" to represent indefinite caching.
  
  Historically, FreeBSD FUSE has ignored this client directive entirely.  This
  works fine for local-only filesystems, but causes consistency issues with
  multi-writer network filesystems.
  
  For now, respect 0 second cache TTLs and do not cache such metadata.
  Non-zero metadata caching TTLs in the range [0.000000001, ULONG_MAX] seconds
  are still cached indefinitely, because it is unclear how a userspace
  filesystem could do anything sensible with those semantics even if
  implemented.
  
  In the future, as an optimization, we should implement notify_inval_entry,
  etc, which provide userspace filesystems a way of evicting the kernel cache.
  
  One potentially bogus access to invalid cached attribute data was left in
  fuse_io_strategy.  It is restricted behind the undocumented and non-default
  "vfs.fuse.fix_broken_io" sysctl or "brokenio" mount option; maybe these are
  deadcode and can be eliminated?
  
  Some minor APIs changed to facilitate this:
  
  1. Attribute cache validity is tracked in FUSE inodes ("fuse_vnode_data").
  
  2. cache_attrs() respects the provided TTL and only caches in the FUSE
  inode if TTL > 0.  It also grows an "out" argument, which, if non-NULL,
  stores the translated fuse_attr (even if not suitable for caching).
  
  3. FUSE VTOVA(vp) returns NULL if the vnode's cache is invalid, to help
  avoid programming mistakes.
  
  4. A VOP_LINK check for potential nlink overflow prior to invoking the FUSE
  link op was weakened (only performed when we have a valid attr cache).  The
  check is racy in a multi-writer network filesystem anyway -- classic TOCTOU.
  We have to trust any userspace filesystem that rejects local caching to
  account for it correctly.
  
  PR:		230258 (inspired by; does not fix)

Modified:
  head/sys/fs/fuse/fuse_internal.c
  head/sys/fs/fuse/fuse_internal.h
  head/sys/fs/fuse/fuse_io.c
  head/sys/fs/fuse/fuse_node.c
  head/sys/fs/fuse/fuse_node.h
  head/sys/fs/fuse/fuse_vnops.c

Modified: head/sys/fs/fuse/fuse_internal.c
==============================================================================
--- head/sys/fs/fuse/fuse_internal.c	Fri Feb 15 22:48:50 2019	(r344182)
+++ head/sys/fs/fuse/fuse_internal.c	Fri Feb 15 22:49:15 2019	(r344183)
@@ -373,7 +373,6 @@ fuse_internal_readdir_processdata(struct uio *uio,
 
 /* remove */
 
-#define INVALIDATE_CACHED_VATTRS_UPON_UNLINK 1
 int
 fuse_internal_remove(struct vnode *dvp,
     struct vnode *vp,
@@ -381,16 +380,12 @@ fuse_internal_remove(struct vnode *dvp,
     enum fuse_opcode op)
 {
 	struct fuse_dispatcher fdi;
+	struct fuse_vnode_data *fvdat;
+	int err;
 
-	struct vattr *vap = VTOVA(vp);
+	err = 0;
+	fvdat = VTOFUD(vp);
 
-#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK
-	int need_invalidate = 0;
-	uint64_t target_nlink = 0;
-
-#endif
-	int err = 0;
-
 	debug_printf("dvp=%p, cnp=%p, op=%d\n", vp, cnp, op);
 
 	fdisp_init(&fdi, cnp->cn_namelen + 1);
@@ -399,13 +394,6 @@ fuse_internal_remove(struct vnode *dvp,
 	memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
 	((char *)fdi.indata)[cnp->cn_namelen] = '\0';
 
-#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK
-	if (vap->va_nlink > 1) {
-		need_invalidate = 1;
-		target_nlink = vap->va_nlink;
-	}
-#endif
-
 	err = fdisp_wait_answ(&fdi);
 	fdisp_destroy(&fdi);
 	return err;
@@ -489,7 +477,7 @@ fuse_internal_newentry_core(struct vnode *dvp,
 		    feo->nodeid, 1);
 		return err;
 	}
-	cache_attrs(*vpp, feo);
+	cache_attrs(*vpp, feo, NULL);
 
 	return err;
 }
@@ -563,6 +551,7 @@ fuse_internal_vnode_disappear(struct vnode *vp)
 
 	ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
 	fvdat->flag |= FN_REVOKED;
+	fvdat->valid_attr_cache = false;
 	cache_purge(vp);
 }
 

Modified: head/sys/fs/fuse/fuse_internal.h
==============================================================================
--- head/sys/fs/fuse/fuse_internal.h	Fri Feb 15 22:48:50 2019	(r344182)
+++ head/sys/fs/fuse/fuse_internal.h	Fri Feb 15 22:49:15 2019	(r344183)
@@ -200,15 +200,47 @@ fuse_internal_access(struct vnode *vp,
 
 /* attributes */
 
+/*
+ * Cache FUSE attributes 'fat', with nominal expiration
+ * 'attr_valid'.'attr_valid_nsec', in attr cache associated with vnode 'vp'.
+ * Optionally, if argument 'vap' is not NULL, store a copy of the converted
+ * attributes there as well.
+ *
+ * If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
+ * return the result to the caller).
+ */
 static __inline
 void
-fuse_internal_attr_fat2vat(struct mount *mp,
+fuse_internal_attr_fat2vat(struct vnode *vp,
                            struct fuse_attr *fat,
+                           uint64_t attr_valid,
+                           uint32_t attr_valid_nsec,
                            struct vattr *vap)
 {
+    struct mount *mp;
+    struct fuse_vnode_data *fvdat;
+    struct vattr *vp_cache_at;
+
+    mp = vnode_mount(vp);
+    fvdat = VTOFUD(vp);
+
     DEBUGX(FUSE_DEBUG_INTERNAL,
         "node #%ju, mode 0%o\n", (uintmax_t)fat->ino, fat->mode);
 
+    /* Honor explicit do-not-cache requests from user filesystems. */
+    if (attr_valid == 0 && attr_valid_nsec == 0)
+        fvdat->valid_attr_cache = false;
+    else
+        fvdat->valid_attr_cache = true;
+
+    vp_cache_at = VTOVA(vp);
+
+    if (vap == NULL && vp_cache_at == NULL)
+        return;
+
+    if (vap == NULL)
+        vap = vp_cache_at;
+
     vattr_null(vap);
 
     vap->va_fsid = mp->mnt_stat.f_fsid.val[0];
@@ -227,21 +259,17 @@ fuse_internal_attr_fat2vat(struct mount *mp,
     vap->va_ctime.tv_nsec = fat->ctimensec;
     vap->va_blocksize = PAGE_SIZE;
     vap->va_type = IFTOVT(fat->mode);
-
-#if (S_BLKSIZE == 512)
-    /* Optimize this case */
-    vap->va_bytes = fat->blocks << 9;
-#else
     vap->va_bytes = fat->blocks * S_BLKSIZE;
-#endif
-
     vap->va_flags = 0;
+
+    if (vap != vp_cache_at && vp_cache_at != NULL)
+        memcpy(vp_cache_at, vap, sizeof(*vap));
 }
 
 
-#define	cache_attrs(vp, fuse_out)					\
-	fuse_internal_attr_fat2vat(vnode_mount(vp), &(fuse_out)->attr,	\
-	    VTOVA(vp));
+#define	cache_attrs(vp, fuse_out, vap_out)				\
+        fuse_internal_attr_fat2vat((vp), &(fuse_out)->attr,		\
+            (fuse_out)->attr_valid, (fuse_out)->attr_valid_nsec, (vap_out))
 
 /* fsync */
 

Modified: head/sys/fs/fuse/fuse_io.c
==============================================================================
--- head/sys/fs/fuse/fuse_io.c	Fri Feb 15 22:48:50 2019	(r344182)
+++ head/sys/fs/fuse/fuse_io.c	Fri Feb 15 22:49:15 2019	(r344183)
@@ -655,6 +655,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
 		uiop->uio_offset = ((off_t)bp->b_blkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
 
+		/* XXXCEM: Potentially invalid access to cached_attrs here */
 		if ((!error && uiop->uio_resid) ||
 		    (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO &&
 		    uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 &&

Modified: head/sys/fs/fuse/fuse_node.c
==============================================================================
--- head/sys/fs/fuse/fuse_node.c	Fri Feb 15 22:48:50 2019	(r344182)
+++ head/sys/fs/fuse/fuse_node.c	Fri Feb 15 22:49:15 2019	(r344183)
@@ -147,6 +147,7 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_da
 	int i;
 
 	fvdat->nid = nodeid;
+	vattr_null(&fvdat->cached_attrs);
 	if (nodeid == FUSE_ROOT_ID) {
 		vp->v_vflag |= VV_ROOT;
 	}

Modified: head/sys/fs/fuse/fuse_node.h
==============================================================================
--- head/sys/fs/fuse/fuse_node.h	Fri Feb 15 22:48:50 2019	(r344182)
+++ head/sys/fs/fuse/fuse_node.h	Fri Feb 15 22:49:15 2019	(r344183)
@@ -86,6 +86,7 @@ struct fuse_vnode_data {
     uint32_t   flag;
 
     /** meta **/
+    bool              valid_attr_cache;
     struct vattr      cached_attrs;
     off_t             filesize;
     uint64_t          nlookup;
@@ -95,7 +96,9 @@ struct fuse_vnode_data {
 #define VTOFUD(vp) \
     ((struct fuse_vnode_data *)((vp)->v_data))
 #define VTOI(vp)    (VTOFUD(vp)->nid)
-#define VTOVA(vp)   (&(VTOFUD(vp)->cached_attrs))
+#define VTOVA(vp) \
+    (VTOFUD(vp)->valid_attr_cache ? \
+    &(VTOFUD(vp)->cached_attrs) : NULL)
 #define VTOILLU(vp) ((uint64_t)(VTOFUD(vp) ? VTOI(vp) : 0))
 
 #define FUSE_NULL_ID 0

Modified: head/sys/fs/fuse/fuse_vnops.c
==============================================================================
--- head/sys/fs/fuse/fuse_vnops.c	Fri Feb 15 22:48:50 2019	(r344182)
+++ head/sys/fs/fuse/fuse_vnops.c	Fri Feb 15 22:49:15 2019	(r344183)
@@ -518,10 +518,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
 		}
 		goto out;
 	}
-	cache_attrs(vp, (struct fuse_attr_out *)fdi.answ);
-	if (vap != VTOVA(vp)) {
-		memcpy(vap, VTOVA(vp), sizeof(*vap));
-	}
+
+	cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, vap);
 	if (vap->va_type != vnode_vtype(vp)) {
 		fuse_internal_vnode_disappear(vp);
 		err = ENOENT;
@@ -628,9 +626,15 @@ fuse_vnop_link(struct vop_link_args *ap)
 	if (vnode_mount(tdvp) != vnode_mount(vp)) {
 		return EXDEV;
 	}
-	if (vap->va_nlink >= FUSE_LINK_MAX) {
+
+	/*
+	 * This is a seatbelt check to protect naive userspace filesystems from
+	 * themselves and the limitations of the FUSE IPC protocol.  If a
+	 * filesystem does not allow attribute caching, assume it is capable of
+	 * validating that nlink does not overflow.
+	 */
+	if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
 		return EMLINK;
-	}
 	fli.oldnodeid = VTOI(vp);
 
 	fdisp_init(&fdi, 0);
@@ -966,9 +970,11 @@ calldaemon:
 		}
 
 		if (op == FUSE_GETATTR) {
-			cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ);
+			cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ,
+			    NULL);
 		} else {
-			cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ);
+			cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ,
+			    NULL);
 		}
 
 		/* Insert name into cache if appropriate. */
@@ -1644,7 +1650,7 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
 		}
 	}
 	if (!err && !sizechanged) {
-		cache_attrs(vp, (struct fuse_attr_out *)fdi.answ);
+		cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, NULL);
 	}
 out:
 	fdisp_destroy(&fdi);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201902152249.x1FMnFaL013219>