From owner-svn-src-all@freebsd.org Mon Aug 24 09:00:58 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 7D0D43B6B40; Mon, 24 Aug 2020 09:00:58 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4BZmLf2mbmz4QxX; Mon, 24 Aug 2020 09:00:58 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 4250016139; Mon, 24 Aug 2020 09:00:58 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 07O90wVR093089; Mon, 24 Aug 2020 09:00:58 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 07O90w9Y093073; Mon, 24 Aug 2020 09:00:58 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <202008240900.07O90w9Y093073@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Mon, 24 Aug 2020 09:00:58 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r364637 - head/sys/kern X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: head/sys/kern X-SVN-Commit-Revision: 364637 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 24 Aug 2020 09:00:58 -0000 Author: mjg Date: Mon Aug 24 09:00:57 2020 New Revision: 364637 URL: https://svnweb.freebsd.org/changeset/base/364637 Log: cache: lockless reverse lookup This enables fully scalable operation for getcwd and significantly improves realpath. For example: PATH_CUSTOM=/usr/src ./getcwd_processes -t 104 before: 1550851 after: 380135380 Tested by: pho Modified: head/sys/kern/vfs_cache.c Modified: head/sys/kern/vfs_cache.c ============================================================================== --- head/sys/kern/vfs_cache.c Mon Aug 24 09:00:07 2020 (r364636) +++ head/sys/kern/vfs_cache.c Mon Aug 24 09:00:57 2020 (r364637) @@ -85,6 +85,10 @@ SDT_PROBE_DEFINE3(vfs, namecache, enter, done, "struct "struct vnode *"); SDT_PROBE_DEFINE2(vfs, namecache, enter_negative, done, "struct vnode *", "char *"); +SDT_PROBE_DEFINE2(vfs, namecache, fullpath_smr, hit, "struct vnode *", + "const char *"); +SDT_PROBE_DEFINE4(vfs, namecache, fullpath_smr, miss, "struct vnode *", + "struct namecache *", "int", "int"); SDT_PROBE_DEFINE1(vfs, namecache, fullpath, entry, "struct vnode *"); SDT_PROBE_DEFINE3(vfs, namecache, fullpath, hit, "struct vnode *", "char *", "struct vnode *"); @@ -298,6 +302,10 @@ static u_int __read_mostly ncsize; /* the size as comp struct nchstats nchstats; /* cache effectiveness statistics */ +static bool __read_frequently cache_fast_revlookup = true; +SYSCTL_BOOL(_vfs, OID_AUTO, cache_fast_revlookup, CTLFLAG_RW, + &cache_fast_revlookup, 0, ""); + static struct mtx __exclusive_cache_line ncneg_shrink_lock; struct neglist { @@ -477,6 +485,8 @@ STATNODE_COUNTER(shrinking_skipped, static void cache_zap_locked(struct namecache *ncp); static int vn_fullpath_hardlink(struct nameidata *ndp, char **retbuf, char **freebuf, size_t *buflen); +static int vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf, + char **retbuf, size_t *buflen, bool slash_prefixed, size_t addend); static int vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, size_t *buflen); static int vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, char *buf, @@ -2476,9 +2486,17 @@ vn_getcwd(char *buf, char **retbuf, size_t *buflen) struct pwd *pwd; int error; - pwd = pwd_hold(curthread); - error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, buflen); - pwd_drop(pwd); + vfs_smr_enter(); + pwd = pwd_get_smr(); + error = vn_fullpath_any_smr(pwd->pwd_cdir, pwd->pwd_rdir, buf, retbuf, + buflen, false, 0); + VFS_SMR_ASSERT_NOT_ENTERED(); + if (error < 0) { + pwd = pwd_hold(curthread); + error = vn_fullpath_any(pwd->pwd_cdir, pwd->pwd_rdir, buf, + retbuf, buflen); + pwd_drop(pwd); + } #ifdef KTRACE if (KTRPOINT(curthread, KTR_NAMEI) && error == 0) @@ -2535,9 +2553,15 @@ vn_fullpath(struct vnode *vp, char **retbuf, char **fr buflen = MAXPATHLEN; buf = malloc(buflen, M_TEMP, M_WAITOK); - pwd = pwd_hold(curthread); - error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen); - pwd_drop(pwd); + vfs_smr_enter(); + pwd = pwd_get_smr(); + error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, &buflen, false, 0); + VFS_SMR_ASSERT_NOT_ENTERED(); + if (error < 0) { + pwd = pwd_hold(curthread); + error = vn_fullpath_any(vp, pwd->pwd_rdir, buf, retbuf, &buflen); + pwd_drop(pwd); + } if (error == 0) *freebuf = buf; else @@ -2562,7 +2586,12 @@ vn_fullpath_global(struct vnode *vp, char **retbuf, ch return (EINVAL); buflen = MAXPATHLEN; buf = malloc(buflen, M_TEMP, M_WAITOK); - error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen); + vfs_smr_enter(); + error = vn_fullpath_any_smr(vp, rootvnode, buf, retbuf, &buflen, false, 0); + VFS_SMR_ASSERT_NOT_ENTERED(); + if (error < 0) { + error = vn_fullpath_any(vp, rootvnode, buf, retbuf, &buflen); + } if (error == 0) *freebuf = buf; else @@ -2769,7 +2798,145 @@ vn_fullpath_dir(struct vnode *vp, struct vnode *rdir, * - namecache is not mandatory, meaning names are not guaranteed to be added * (in which case resolving fails) */ +static void __inline +cache_rev_failed_impl(int *reason, int line) +{ + + *reason = line; +} +#define cache_rev_failed(var) cache_rev_failed_impl((var), __LINE__) + static int +vn_fullpath_any_smr(struct vnode *vp, struct vnode *rdir, char *buf, + char **retbuf, size_t *buflen, bool slash_prefixed, size_t addend) +{ +#ifdef KDTRACE_HOOKS + struct vnode *startvp = vp; +#endif + struct vnode *tvp; + struct mount *mp; + struct namecache *ncp; + size_t orig_buflen; + int reason; + int error; +#ifdef KDTRACE_HOOKS + int i; +#endif + seqc_t vp_seqc, tvp_seqc; + u_char nc_flag; + + VFS_SMR_ASSERT_ENTERED(); + + if (!cache_fast_revlookup) { + vfs_smr_exit(); + return (-1); + } + + orig_buflen = *buflen; + + MPASS(*buflen >= 2); + + if (!slash_prefixed) { + MPASS(*buflen >= 2); + *buflen -= 1; + buf[*buflen] = '\0'; + } + + if (vp == rdir || vp == rootvnode) { + if (!slash_prefixed) { + *buflen -= 1; + buf[*buflen] = '/'; + } + goto out_ok; + } + +#ifdef KDTRACE_HOOKS + i = 0; +#endif + error = -1; + vp_seqc = vn_seqc_read_any(vp); + if (seqc_in_modify(vp_seqc)) { + cache_rev_failed(&reason); + goto out_abort; + } + + for (;;) { +#ifdef KDTRACE_HOOKS + i++; +#endif + if ((vp->v_vflag & VV_ROOT) != 0) { + mp = atomic_load_ptr(&vp->v_mount); + if (mp == NULL) { + cache_rev_failed(&reason); + goto out_abort; + } + tvp = atomic_load_ptr(&mp->mnt_vnodecovered); + tvp_seqc = vn_seqc_read_any(tvp); + if (seqc_in_modify(tvp_seqc)) { + cache_rev_failed(&reason); + goto out_abort; + } + if (!vn_seqc_consistent(vp, vp_seqc)) { + cache_rev_failed(&reason); + goto out_abort; + } + vp = tvp; + vp_seqc = tvp_seqc; + continue; + } + ncp = atomic_load_ptr(&vp->v_cache_dd); + if (ncp == NULL) { + cache_rev_failed(&reason); + goto out_abort; + } + nc_flag = atomic_load_char(&ncp->nc_flag); + if ((nc_flag & NCF_ISDOTDOT) != 0) { + cache_rev_failed(&reason); + goto out_abort; + } + if (!cache_ncp_canuse(ncp)) { + cache_rev_failed(&reason); + goto out_abort; + } + if (ncp->nc_nlen >= *buflen) { + cache_rev_failed(&reason); + error = ENOMEM; + goto out_abort; + } + *buflen -= ncp->nc_nlen; + memcpy(buf + *buflen, ncp->nc_name, ncp->nc_nlen); + *buflen -= 1; + buf[*buflen] = '/'; + tvp = ncp->nc_dvp; + tvp_seqc = vn_seqc_read_any(tvp); + if (seqc_in_modify(tvp_seqc)) { + cache_rev_failed(&reason); + goto out_abort; + } + if (!vn_seqc_consistent(vp, vp_seqc)) { + cache_rev_failed(&reason); + goto out_abort; + } + vp = tvp; + vp_seqc = tvp_seqc; + if (vp == rdir || vp == rootvnode) + break; + } +out_ok: + vfs_smr_exit(); + *retbuf = buf + *buflen; + *buflen = orig_buflen - *buflen + addend; + SDT_PROBE2(vfs, namecache, fullpath_smr, hit, startvp, *retbuf); + return (0); + +out_abort: + *buflen = orig_buflen; + SDT_PROBE4(vfs, namecache, fullpath_smr, miss, startvp, ncp, reason, i); + vfs_smr_exit(); + return (error); +} + +static int vn_fullpath_any(struct vnode *vp, struct vnode *rdir, char *buf, char **retbuf, size_t *buflen) { @@ -2836,7 +3003,6 @@ vn_fullpath_hardlink(struct nameidata *ndp, char **ret slash_prefixed = false; buf = malloc(*buflen, M_TEMP, M_WAITOK); - pwd = pwd_hold(curthread); addend = 0; vp = ndp->ni_vp; @@ -2880,18 +3046,25 @@ vn_fullpath_hardlink(struct nameidata *ndp, char **ret vp = ndp->ni_dvp; } - vref(vp); - error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen, + vfs_smr_enter(); + pwd = pwd_get_smr(); + error = vn_fullpath_any_smr(vp, pwd->pwd_rdir, buf, retbuf, buflen, slash_prefixed, addend); - if (error != 0) - goto out_bad; + VFS_SMR_ASSERT_NOT_ENTERED(); + if (error < 0) { + pwd = pwd_hold(curthread); + vref(vp); + error = vn_fullpath_dir(vp, pwd->pwd_rdir, buf, retbuf, buflen, + slash_prefixed, addend); + pwd_drop(pwd); + if (error != 0) + goto out_bad; + } - pwd_drop(pwd); *freebuf = buf; return (0); out_bad: - pwd_drop(pwd); free(buf, M_TEMP); return (error); }