Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 24 Aug 2020 09:00:58 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r364637 - head/sys/kern
Message-ID:  <202008240900.07O90w9Y093073@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Mon Aug 24 09:00:57 2020
New Revision: 364637
URL: https://svnweb.freebsd.org/changeset/base/364637

Log:
  cache: lockless reverse lookup
  
  This enables fully scalable operation for getcwd and significantly improves
  realpath.
  
  For example:
  PATH_CUSTOM=/usr/src ./getcwd_processes -t 104
  before:  1550851
  after: 380135380
  
  Tested by:	pho

Modified:
  head/sys/kern/vfs_cache.c

Modified: head/sys/kern/vfs_cache.c
==============================================================================
--- head/sys/kern/vfs_cache.c	Mon Aug 24 09:00:07 2020	(r364636)
+++ head/sys/kern/vfs_cache.c	Mon Aug 24 09:00:57 2020	(r364637)
@@ -85,6 +85,10 @@ SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct
     "struct vnode *");
 SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *",
     "char *");
+SDT_PROBE_DEFINE2(vfs, namecache, fullpath_smr, hit, "struct vnode *",
+    "const char *");
+SDT_PROBE_DEFINE4(vfs, namecache, fullpath_smr, miss, "struct vnode *",
+    "struct namecache *", "int", "int");
 SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *");
 SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *",
     "char *", "struct vnode *");
@@ -298,6 +302,10 @@ static u_int __read_mostly	ncsize; /* the size as comp
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
+static bool __read_frequently cache_fast_revlookup = true;
+SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_revlookup, CTLFLAG_RW,
+    &cache_fast_revlookup, 0, "");
+
 static struct mtx __exclusive_cache_line	ncneg_shrink_lock;
 
 struct neglist {
@@ -477,6 +485,8 @@ STATNODE_COUNTER(shrinking_skipped,
 static void cache_zap_locked(struct namecache *ncp);
 static int vn_fullpath_hardlink(struct nameidata *ndp, char **retbuf,
     char **freebuf, size_t *buflen);
+static int vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf,
+    char **retbuf, size_t *buflen, bool slash_prefixed, size_t addend);
 static int vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf,
     char **retbuf, size_t *buflen);
 static int vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, char *buf,
@@ -2476,9 +2486,17 @@ vn_getcwd(char *buf, char **retbuf, size_t *buflen)
 	struct pwd *pwd;
 	int error;
 
-	pwd = pwd_hold(curthread);
-	error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, buflen);
-	pwd_drop(pwd);
+	vfs_smr_enter();
+	pwd = pwd_get_smr();
+	error = vn_fullpath_any_smr(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf,
+	    buflen, false, 0);
+	VFS_SMR_ASSERT_NOT_ENTERED();
+	if (error < 0) {
+		pwd = pwd_hold(curthread);
+		error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf,
+		    retbuf, buflen);
+		pwd_drop(pwd);
+	}
 
 #ifdef KTRACE
 	if (KTRPOINT(curthread, KTR_NAMEI) && error == 0)
@@ -2535,9 +2553,15 @@ vn_fullpath(struct vnode *vp, char **retbuf, char **fr
 
 	buflen = MAXPATHLEN;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
-	pwd = pwd_hold(curthread);
-	error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen);
-	pwd_drop(pwd);
+	vfs_smr_enter();
+	pwd = pwd_get_smr();
+	error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, &buflen, false, 0);
+	VFS_SMR_ASSERT_NOT_ENTERED();
+	if (error < 0) {
+		pwd = pwd_hold(curthread);
+		error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen);
+		pwd_drop(pwd);
+	}
 	if (error == 0)
 		*freebuf = buf;
 	else
@@ -2562,7 +2586,12 @@ vn_fullpath_global(struct vnode *vp, char **retbuf, ch
 		return (EINVAL);
 	buflen = MAXPATHLEN;
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
-	error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen);
+	vfs_smr_enter();
+	error = vn_fullpath_any_smr(vp, rootvnode, buf, retbuf, &buflen, false, 0);
+	VFS_SMR_ASSERT_NOT_ENTERED();
+	if (error < 0) {
+		error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen);
+	}
 	if (error == 0)
 		*freebuf = buf;
 	else
@@ -2769,7 +2798,145 @@ vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, 
  * - namecache is not mandatory, meaning names are not guaranteed to be added
  *   (in which case resolving fails)
  */
+static void __inline
+cache_rev_failed_impl(int *reason, int line)
+{
+
+	*reason = line;
+}
+#define cache_rev_failed(var)	cache_rev_failed_impl((var), __LINE__)
+
 static int
+vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf,
+    char **retbuf, size_t *buflen, bool slash_prefixed, size_t addend)
+{
+#ifdef KDTRACE_HOOKS
+	struct vnode *startvp = vp;
+#endif
+	struct vnode *tvp;
+	struct mount *mp;
+	struct namecache *ncp;
+	size_t orig_buflen;
+	int reason;
+	int error;
+#ifdef KDTRACE_HOOKS
+	int i;
+#endif
+	seqc_t vp_seqc, tvp_seqc;
+	u_char nc_flag;
+
+	VFS_SMR_ASSERT_ENTERED();
+
+	if (!cache_fast_revlookup) {
+		vfs_smr_exit();
+		return (-1);
+	}
+
+	orig_buflen = *buflen;
+
+	MPASS(*buflen >= 2);
+
+	if (!slash_prefixed) {
+		MPASS(*buflen >= 2);
+		*buflen -= 1;
+		buf[*buflen] = '\0';
+	}
+
+	if (vp == rdir || vp == rootvnode) {
+		if (!slash_prefixed) {
+			*buflen -= 1;
+			buf[*buflen] = '/';
+		}
+		goto out_ok;
+	}
+
+#ifdef KDTRACE_HOOKS
+	i = 0;
+#endif
+	error = -1;
+	vp_seqc = vn_seqc_read_any(vp);
+	if (seqc_in_modify(vp_seqc)) {
+		cache_rev_failed(&reason);
+		goto out_abort;
+	}
+
+	for (;;) {
+#ifdef KDTRACE_HOOKS
+		i++;
+#endif
+		if ((vp->v_vflag & VV_ROOT) != 0) {
+			mp = atomic_load_ptr(&vp->v_mount);
+			if (mp == NULL) {
+				cache_rev_failed(&reason);
+				goto out_abort;
+			}
+			tvp = atomic_load_ptr(&mp->mnt_vnodecovered);
+			tvp_seqc = vn_seqc_read_any(tvp);
+			if (seqc_in_modify(tvp_seqc)) {
+				cache_rev_failed(&reason);
+				goto out_abort;
+			}
+			if (!vn_seqc_consistent(vp, vp_seqc)) {
+				cache_rev_failed(&reason);
+				goto out_abort;
+			}
+			vp = tvp;
+			vp_seqc = tvp_seqc;
+			continue;
+		}
+		ncp = atomic_load_ptr(&vp->v_cache_dd);
+		if (ncp == NULL) {
+			cache_rev_failed(&reason);
+			goto out_abort;
+		}
+		nc_flag = atomic_load_char(&ncp->nc_flag);
+		if ((nc_flag & NCF_ISDOTDOT) != 0) {
+			cache_rev_failed(&reason);
+			goto out_abort;
+		}
+		if (!cache_ncp_canuse(ncp)) {
+			cache_rev_failed(&reason);
+			goto out_abort;
+		}
+		if (ncp->nc_nlen >= *buflen) {
+			cache_rev_failed(&reason);
+			error = ENOMEM;
+			goto out_abort;
+		}
+		*buflen -= ncp->nc_nlen;
+		memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen);
+		*buflen -= 1;
+		buf[*buflen] = '/';
+		tvp = ncp->nc_dvp;
+		tvp_seqc = vn_seqc_read_any(tvp);
+		if (seqc_in_modify(tvp_seqc)) {
+			cache_rev_failed(&reason);
+			goto out_abort;
+		}
+		if (!vn_seqc_consistent(vp, vp_seqc)) {
+			cache_rev_failed(&reason);
+			goto out_abort;
+		}
+		vp = tvp;
+		vp_seqc = tvp_seqc;
+		if (vp == rdir || vp == rootvnode)
+			break;
+	}
+out_ok:
+	vfs_smr_exit();
+	*retbuf = buf + *buflen;
+	*buflen = orig_buflen - *buflen + addend;
+	SDT_PROBE2(vfs, namecache, fullpath_smr, hit, startvp, *retbuf);
+	return (0);
+
+out_abort:
+	*buflen = orig_buflen;
+	SDT_PROBE4(vfs, namecache, fullpath_smr, miss, startvp, ncp, reason, i);
+	vfs_smr_exit();
+	return (error);
+}
+
+static int
 vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf,
     size_t *buflen)
 {
@@ -2836,7 +3003,6 @@ vn_fullpath_hardlink(struct nameidata *ndp, char **ret
 	slash_prefixed = false;
 
 	buf = malloc(*buflen, M_TEMP, M_WAITOK);
-	pwd = pwd_hold(curthread);
 
 	addend = 0;
 	vp = ndp->ni_vp;
@@ -2880,18 +3046,25 @@ vn_fullpath_hardlink(struct nameidata *ndp, char **ret
 		vp = ndp->ni_dvp;
 	}
 
-	vref(vp);
-	error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen,
+	vfs_smr_enter();
+	pwd = pwd_get_smr();
+	error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, buflen,
 	    slash_prefixed, addend);
-	if (error != 0)
-		goto out_bad;
+	VFS_SMR_ASSERT_NOT_ENTERED();
+	if (error < 0) {
+		pwd = pwd_hold(curthread);
+		vref(vp);
+		error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen,
+		    slash_prefixed, addend);
+		pwd_drop(pwd);
+		if (error != 0)
+			goto out_bad;
+	}
 
-	pwd_drop(pwd);
 	*freebuf = buf;
 
 	return (0);
 out_bad:
-	pwd_drop(pwd);
 	free(buf, M_TEMP);
 	return (error);
 }



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202008240900.07O90w9Y093073>