Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 22 Oct 2020 19:28:12 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r366950 - in head/sys: kern sys
Message-ID:  <202010221928.09MJSC5h019191@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Thu Oct 22 19:28:12 2020
New Revision: 366950
URL: https://svnweb.freebsd.org/changeset/base/366950

Log:
  vfs: prevent avoidable evictions on mkdir of existing directories
  
  mkdir -p /foo/bar/baz will mkdir each path component and ignore EEXIST.
  
  The NOCACHE lookup will make the namecache unnecessarily evict the existing entry,
  and then fallback to the fs lookup routine eventually leading namei to return an
  error as the directory is already there.
  
  For invocations like mkdir -p /usr/obj/usr/src/sys/GENERIC/modules this triggers
  fallbacks to the slowpath for concurrently executing lookups.
  
  Tested by:	pho
  Discussed with:	kib

Modified:
  head/sys/kern/vfs_cache.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vfs_syscalls.c
  head/sys/kern/vnode_if.src
  head/sys/sys/namei.h
  head/sys/sys/vnode.h

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c	Thu Oct 22 19:25:01 2020	(r366949)
+++ head/sys/kern/vfs_cache.c	Thu Oct 22 19:28:12 2020	(r366950)
@@ -1676,7 +1676,8 @@ cache_lookup_fallback(struct vnode *dvp, struct vnode 
 	int error;
 	bool whiteout;
 
-	MPASS((cnp->cn_flags & (MAKEENTRY | ISDOTDOT)) == MAKEENTRY);
+	MPASS((cnp->cn_flags & ISDOTDOT) == 0);
+	MPASS((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) != 0);
 
 retry:
 	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
@@ -1768,7 +1769,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, st
 
 	MPASS((cnp->cn_flags & ISDOTDOT) == 0);
 
-	if ((cnp->cn_flags & MAKEENTRY) == 0) {
+	if ((cnp->cn_flags & (MAKEENTRY | NC_KEEPPOSENTRY)) == 0) {
 		cache_remove_cnp(dvp, cnp);
 		return (0);
 	}
@@ -2595,6 +2596,35 @@ cache_rename(struct vnode *fdvp, struct vnode *fvp, st
 		cache_remove_cnp(tdvp, tcnp);
 	}
 }
+
+#ifdef INVARIANTS
+/*
+ * Validate that if an entry exists it matches.
+ */
+void
+cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
+{
+	struct namecache *ncp;
+	struct mtx *blp;
+	uint32_t hash;
+
+	hash = cache_get_hash(cnp->cn_nameptr, cnp->cn_namelen, dvp);
+	if (CK_SLIST_EMPTY(NCHHASH(hash)))
+		return;
+	blp = HASH2BUCKETLOCK(hash);
+	mtx_lock(blp);
+	CK_SLIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) {
+		if (ncp->nc_dvp == dvp && ncp->nc_nlen == cnp->cn_namelen &&
+		    !bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen)) {
+			if (ncp->nc_vp != vp)
+				panic("%s: mismatch (%p != %p); ncp %p [%s] dvp %p vp %p\n",
+				    __func__, vp, ncp->nc_vp, ncp, ncp->nc_name, ncp->nc_dvp,
+				    ncp->nc_vp);
+		}
+	}
+	mtx_unlock(blp);
+}
+#endif
 
 /*
  * Flush all entries referencing a particular filesystem.

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c	Thu Oct 22 19:25:01 2020	(r366949)
+++ head/sys/kern/vfs_subr.c	Thu Oct 22 19:28:12 2020	(r366950)
@@ -5587,6 +5587,18 @@ vop_mkdir_post(void *ap, int rc)
 		VFS_KNOTE_LOCKED(dvp, NOTE_WRITE | NOTE_LINK);
 }
 
+#ifdef DEBUG_VFS_LOCKS
+void
+vop_mkdir_debugpost(void *ap, int rc)
+{
+	struct vop_mkdir_args *a;
+
+	a = ap;
+	if (!rc)
+		cache_validate(a->a_dvp, *a->a_vpp, a->a_cnp);
+}
+#endif
+
 void
 vop_mknod_pre(void *ap)
 {

Modified: head/sys/kern/vfs_syscalls.c
==============================================================================
--- head/sys/kern/vfs_syscalls.c	Thu Oct 22 19:25:01 2020	(r366949)
+++ head/sys/kern/vfs_syscalls.c	Thu Oct 22 19:28:12 2020	(r366950)
@@ -3758,8 +3758,8 @@ kern_mkdirat(struct thread *td, int fd, const char *pa
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
-	    NOCACHE, segflg, path, fd, &cap_mkdirat_rights,
-	    td);
+	    NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd,
+	    &cap_mkdirat_rights, td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src	Thu Oct 22 19:25:01 2020	(r366949)
+++ head/sys/kern/vnode_if.src	Thu Oct 22 19:28:12 2020	(r366950)
@@ -336,6 +336,7 @@ vop_rename {
 %% mkdir	vpp	- E -
 %! mkdir	pre	vop_mkdir_pre
 %! mkdir	post	vop_mkdir_post
+%! mkdir	debugpost vop_mkdir_debugpost
 
 vop_mkdir {
 	IN struct vnode *dvp;

Modified: head/sys/sys/namei.h
==============================================================================
--- head/sys/sys/namei.h	Thu Oct 22 19:25:01 2020	(r366949)
+++ head/sys/sys/namei.h	Thu Oct 22 19:28:12 2020	(r366950)
@@ -125,16 +125,19 @@ int	cache_fplookup(struct nameidata *ndp, enum cache_f
 /*
  * namei operational modifier flags, stored in ni_cnd.flags
  */
+#define	NC_NOMAKEENTRY	0x0001	/* name must not be added to cache */
+#define	NC_KEEPPOSENTRY	0x0002	/* don't evict a positive entry */
+#define	NOCACHE		NC_NOMAKEENTRY	/* for compatibility with older code */
 #define	LOCKLEAF	0x0004	/* lock vnode on return */
 #define	LOCKPARENT	0x0008	/* want parent vnode returned locked */
 #define	WANTPARENT	0x0010	/* want parent vnode returned unlocked */
-#define	NOCACHE		0x0020	/* name must not be left in cache */
+/* UNUSED		0x0020 */
 #define	FOLLOW		0x0040	/* follow symbolic links */
 #define	BENEATH		0x0080	/* No escape from the start dir */
 #define	LOCKSHARED	0x0100	/* Shared lock leaf */
 #define	NOFOLLOW	0x0000	/* do not follow symbolic links (pseudo) */
 #define	RBENEATH	0x100000000ULL /* No escape, even tmp, from start dir */
-#define	MODMASK		0xf000001fcULL	/* mask of operational modifiers */
+#define	MODMASK		0xf000001ffULL	/* mask of operational modifiers */
 /*
  * Namei parameter descriptors.
  *

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h	Thu Oct 22 19:25:01 2020	(r366949)
+++ head/sys/sys/vnode.h	Thu Oct 22 19:28:12 2020	(r366950)
@@ -644,6 +644,15 @@ void	cache_purge_negative(struct vnode *vp);
 void	cache_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp,
     struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp);
 void	cache_purgevfs(struct mount *mp);
+#ifdef INVARIANTS
+void	cache_validate(struct vnode *dvp, struct vnode *vp,
+	    struct componentname *cnp);
+#else
+static inline void
+cache_validate(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
+{
+}
+#endif
 int	change_dir(struct vnode *vp, struct thread *td);
 void	cvtstat(struct stat *st, struct ostat *ost);
 void	freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb);
@@ -880,6 +889,7 @@ void	vop_lock_debugpost(void *a, int rc);
 void	vop_unlock_debugpre(void *a);
 void	vop_need_inactive_debugpre(void *a);
 void	vop_need_inactive_debugpost(void *a, int rc);
+void	vop_mkdir_debugpost(void *a, int rc);
 #else
 #define	vop_fplookup_vexec_debugpre(x)		do { } while (0)
 #define	vop_fplookup_vexec_debugpost(x, y)	do { } while (0)
@@ -889,6 +899,7 @@ void	vop_need_inactive_debugpost(void *a, int rc);
 #define	vop_unlock_debugpre(x)			do { } while (0)
 #define	vop_need_inactive_debugpre(x)		do { } while (0)
 #define	vop_need_inactive_debugpost(x, y)	do { } while (0)
+#define	vop_mkdir_debugpost(x, y)		do { } while (0)
 #endif
 
 void	vop_rename_fail(struct vop_rename_args *ap);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202010221928.09MJSC5h019191>