From owner-dev-commits-src-main@freebsd.org Mon Dec 28 02:03:18 2020 Return-Path: Delivered-To: dev-commits-src-main@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id 3F7BD4CDCEF; Mon, 28 Dec 2020 02:03:18 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4D416Z1KVpz4Xcd; Mon, 28 Dec 2020 02:03:18 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 1F47911D95; Mon, 28 Dec 2020 02:03:18 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 0BS23IFo005353; Mon, 28 Dec 2020 02:03:18 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 0BS23Inm005352; Mon, 28 Dec 2020 02:03:18 GMT (envelope-from git) Date: Mon, 28 Dec 2020 02:03:18 GMT Message-Id: <202012280203.0BS23Inm005352@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Mateusz Guzik Subject: git: 002e18eb7f3e - main - vfs: add FAILIFEXISTS flag MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: mjg X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: 002e18eb7f3e1ae972827cb35705502e91c540e3 Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-main@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: "Commit messages for the main branch of the src repository." List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 28 Dec 2020 02:03:18 -0000 The branch main has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=002e18eb7f3e1ae972827cb35705502e91c540e3 commit 002e18eb7f3e1ae972827cb35705502e91c540e3 Author: Mateusz Guzik AuthorDate: 2020-12-27 23:33:04 +0000 Commit: Mateusz Guzik CommitDate: 2020-12-28 01:53:27 +0000 vfs: add FAILIFEXISTS flag Both FreeBSD and Linux mkdir -p walk the tree up ignoring any EEXIST on the way and both are used a lot when building respective kernels. This poses a problem as spurious locking avoidably interferes with concurrent operations like getdirentries on affected directories. Work around the problem by adding FAILIFEXISTS flag. In case of lockless lookup this manages to avoid any work to begin with, there is no speed up for the locked case but perhaps this can be augmented later on. For simplicity the only supported semantics are as used by mkdir. Reviewed by: kib (previous version) Differential Revision: https://reviews.freebsd.org/D27789 --- sys/kern/vfs_cache.c | 16 ++++++++++++++-- sys/kern/vfs_lookup.c | 33 ++++++++++++++++++++++++++++++++- sys/kern/vfs_syscalls.c | 18 +----------------- sys/sys/namei.h | 2 +- 4 files changed, 48 insertions(+), 21 deletions(-) diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 22dfe01ac624..c9bc2074d5e6 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -3736,8 +3736,8 @@ cache_fpl_terminated(struct cache_fpl *fpl) #define CACHE_FPL_SUPPORTED_CN_FLAGS \ (NC_NOMAKEENTRY | NC_KEEPPOSENTRY | LOCKLEAF | LOCKPARENT | WANTPARENT | \ - FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | ISOPEN | \ - NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK) + FAILIFEXISTS | FOLLOW | LOCKSHARED | SAVENAME | SAVESTART | WILLBEDIR | \ + ISOPEN | NOMACCHECK | AUDITVNODE1 | AUDITVNODE2 | NOCAPCHECK) #define CACHE_FPL_INTERNAL_CN_FLAGS \ (ISDOTDOT | MAKEENTRY | ISLASTCN) @@ -3990,6 +3990,11 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl) return (cache_fpl_handled(fpl, EROFS)); } + if (fpl->tvp != NULL && (cnp->cn_flags & FAILIFEXISTS) != 0) { + cache_fpl_smr_exit(fpl); + return (cache_fpl_handled(fpl, EEXIST)); + } + /* * Secure access to dvp; check cache_fplookup_partial_setup for * reasoning. @@ -4070,6 +4075,12 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl) return (cache_fpl_aborted(fpl)); } + if ((cnp->cn_flags & FAILIFEXISTS) != 0) { + vput(dvp); + vput(tvp); + return (cache_fpl_handled(fpl, EEXIST)); + } + if ((cnp->cn_flags & LOCKLEAF) == 0) { VOP_UNLOCK(tvp); } @@ -4221,6 +4232,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl) MPASS(!cache_fpl_isdotdot(cnp)); if (cnp->cn_nameiop != LOOKUP) { + fpl->tvp = NULL; return (cache_fplookup_modifying(fpl)); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 511efaa9d01a..da04dd09af40 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -545,6 +545,16 @@ namei(struct nameidata *ndp) ndp->ni_debugflags |= NAMEI_DBG_CALLED; if (ndp->ni_startdir != NULL) ndp->ni_debugflags |= NAMEI_DBG_HADSTARTDIR; + if (cnp->cn_flags & FAILIFEXISTS) { + KASSERT(cnp->cn_nameiop == CREATE, + ("%s: FAILIFEXISTS passed for op %d", __func__, cnp->cn_nameiop)); + /* + * The limitation below is to restrict hairy corner cases. + */ + KASSERT((cnp->cn_flags & (LOCKPARENT | LOCKLEAF)) == LOCKPARENT, + ("%s: FAILIFEXISTS must be passed with LOCKPARENT and without LOCKLEAF", + __func__)); + } /* * For NDVALIDATE. * @@ -1295,8 +1305,11 @@ success: goto bad2; } } - if (ndp->ni_vp != NULL && ndp->ni_vp->v_type == VDIR) + if (ndp->ni_vp != NULL) { nameicap_tracker_add(ndp, ndp->ni_vp); + if ((cnp->cn_flags & (FAILIFEXISTS | ISSYMLINK)) == FAILIFEXISTS) + goto bad_eexist; + } return (0); bad2: @@ -1311,6 +1324,24 @@ bad: vput(dp); ndp->ni_vp = NULL; return (error); +bad_eexist: + /* + * FAILIFEXISTS handling. + * + * XXX namei called with LOCKPARENT but not LOCKLEAF has the strange + * behaviour of leaving the vnode unlocked if the target is the same + * vnode as the parent. + */ + MPASS((cnp->cn_flags & ISSYMLINK) == 0); + if (ndp->ni_vp == ndp->ni_dvp) + vrele(ndp->ni_dvp); + else + vput(ndp->ni_dvp); + vrele(ndp->ni_vp); + ndp->ni_dvp = NULL; + ndp->ni_vp = NULL; + NDFREE(ndp, NDF_ONLY_PNBUF); + return (EEXIST); } /* diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index cbfff229540a..5a30b06e4e9e 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -3768,7 +3768,6 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode) { struct mount *mp; - struct vnode *vp; struct vattr vattr; struct nameidata nd; int error; @@ -3777,26 +3776,11 @@ kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, restart: bwillwrite(); NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | - NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd, + NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS, segflg, path, fd, &cap_mkdirat_rights, td); nd.ni_cnd.cn_flags |= WILLBEDIR; if ((error = namei(&nd)) != 0) return (error); - vp = nd.ni_vp; - if (vp != NULL) { - NDFREE(&nd, NDF_ONLY_PNBUF); - /* - * XXX namei called with LOCKPARENT but not LOCKLEAF has - * the strange behaviour of leaving the vnode unlocked - * if the target is the same vnode as the parent. - */ - if (vp == nd.ni_dvp) - vrele(nd.ni_dvp); - else - vput(nd.ni_dvp); - vrele(vp); - return (EEXIST); - } if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { NDFREE(&nd, NDF_ONLY_PNBUF); vput(nd.ni_dvp); diff --git a/sys/sys/namei.h b/sys/sys/namei.h index dfb5e5e4cec3..ddba9cbd8912 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -143,7 +143,7 @@ int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, #define LOCKLEAF 0x0004 /* lock vnode on return */ #define LOCKPARENT 0x0008 /* want parent vnode returned locked */ #define WANTPARENT 0x0010 /* want parent vnode returned unlocked */ -/* UNUSED 0x0020 */ +#define FAILIFEXISTS 0x0020 /* return EEXIST if found */ #define FOLLOW 0x0040 /* follow symbolic links */ #define BENEATH 0x0080 /* No escape from the start dir */ #define LOCKSHARED 0x0100 /* Shared lock leaf */