From owner-svn-src-all@freebsd.org Sat Jul 25 10:32:47 2020 Return-Path: Delivered-To: svn-src-all@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id A133C3A675F; Sat, 25 Jul 2020 10:32:47 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4BDMpR3r2jz438n; Sat, 25 Jul 2020 10:32:47 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 67B471A6C7; Sat, 25 Jul 2020 10:32:47 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id 06PAWl5a043380; Sat, 25 Jul 2020 10:32:47 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id 06PAWjP1043373; Sat, 25 Jul 2020 10:32:45 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <202007251032.06PAWjP1043373@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Sat, 25 Jul 2020 10:32:45 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r363518 - in head/sys: kern security/mac sys X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: in head/sys: kern security/mac sys X-SVN-Commit-Revision: 363518 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.33 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 25 Jul 2020 10:32:47 -0000 Author: mjg Date: Sat Jul 25 10:32:45 2020 New Revision: 363518 URL: https://svnweb.freebsd.org/changeset/base/363518 Log: vfs: add the infrastructure for lockless lookup Reviewed by: kib Tested by: pho (in a patchset) Differential Revision: https://reviews.freebsd.org/D25577 Modified: head/sys/kern/kern_descrip.c head/sys/kern/vfs_subr.c head/sys/kern/vnode_if.src head/sys/security/mac/mac_framework.h head/sys/sys/filedesc.h head/sys/sys/mount.h head/sys/sys/vnode.h Modified: head/sys/kern/kern_descrip.c ============================================================================== --- head/sys/kern/kern_descrip.c Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/kern/kern_descrip.c Sat Jul 25 10:32:45 2020 (r363518) @@ -102,8 +102,8 @@ MALLOC_DECLARE(M_FADVISE); static __read_mostly uma_zone_t file_zone; static __read_mostly uma_zone_t filedesc0_zone; -static __read_mostly uma_zone_t pwd_zone; -static __read_mostly smr_t pwd_smr; +__read_mostly uma_zone_t pwd_zone; +VFS_SMR_DECLARE; static int closefp(struct filedesc *fdp, int fd, struct file *fp, struct thread *td, int holdleaders); @@ -3343,21 +3343,30 @@ pwd_hold(struct thread *td) fdp = td->td_proc->p_fd; - smr_enter(pwd_smr); - pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr); + vfs_smr_enter(); + pwd = vfs_smr_entered_load(&fdp->fd_pwd); MPASS(pwd != NULL); if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) { - smr_exit(pwd_smr); + vfs_smr_exit(); return (pwd); } - smr_exit(pwd_smr); + vfs_smr_exit(); FILEDESC_SLOCK(fdp); pwd = pwd_hold_filedesc(fdp); MPASS(pwd != NULL); - FILEDESC_SUNLOCK(fdp); return (pwd); } +struct pwd * +pwd_get_smr(void) +{ + struct pwd *pwd; + + pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd); + MPASS(pwd != NULL); + return (pwd); +} + static struct pwd * pwd_alloc(void) { @@ -4368,7 +4377,11 @@ filelistinit(void *dummy) NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR); - pwd_smr = uma_zone_get_smr(pwd_zone); + /* + * XXXMJG this is a temporary hack due to boot ordering issues against + * the vnode zone. + */ + vfs_smr = uma_zone_get_smr(pwd_zone); mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF); } SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL); Modified: head/sys/kern/vfs_subr.c ============================================================================== --- head/sys/kern/vfs_subr.c Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/kern/vfs_subr.c Sat Jul 25 10:32:45 2020 (r363518) @@ -664,8 +664,8 @@ vntblinit(void *dummy __unused) vnode_list_reclaim_marker = vn_alloc_marker(NULL); TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, - vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR); - vfs_smr = uma_zone_get_smr(vnode_zone); + vnode_init, vnode_fini, UMA_ALIGN_PTR, 0); + uma_zone_set_smr(vnode_zone, vfs_smr); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); /* @@ -2914,6 +2914,22 @@ vget_prep(struct vnode *vp) return (vs); } +void +vget_abort(struct vnode *vp, enum vgetstate vs) +{ + + switch (vs) { + case VGET_USECOUNT: + vrele(vp); + break; + case VGET_HOLDCNT: + vdrop(vp); + break; + default: + __assert_unreachable(); + } +} + int vget(struct vnode *vp, int flags, struct thread *td) { @@ -2925,7 +2941,7 @@ vget(struct vnode *vp, int flags, struct thread *td) return (vget_finish(vp, flags, vs)); } -static int __noinline +static void __noinline vget_finish_vchr(struct vnode *vp) { @@ -2941,7 +2957,7 @@ vget_finish_vchr(struct vnode *vp) #else refcount_release(&vp->v_holdcnt); #endif - return (0); + return; } VI_LOCK(vp); @@ -2953,18 +2969,17 @@ vget_finish_vchr(struct vnode *vp) refcount_release(&vp->v_holdcnt); #endif VI_UNLOCK(vp); - return (0); + return; } v_incr_devcount(vp); refcount_acquire(&vp->v_usecount); VI_UNLOCK(vp); - return (0); } int vget_finish(struct vnode *vp, int flags, enum vgetstate vs) { - int error, old; + int error; if ((flags & LK_INTERLOCK) != 0) ASSERT_VI_LOCKED(vp, __func__); @@ -2976,20 +2991,32 @@ vget_finish(struct vnode *vp, int flags, enum vgetstat error = vn_lock(vp, flags); if (__predict_false(error != 0)) { - if (vs == VGET_USECOUNT) - vrele(vp); - else - vdrop(vp); + vget_abort(vp, vs); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); return (error); } + vget_finish_ref(vp, vs); + return (0); +} + +void +vget_finish_ref(struct vnode *vp, enum vgetstate vs) +{ + int old; + + VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp); + VNPASS(vp->v_holdcnt > 0, vp); + VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp); + if (vs == VGET_USECOUNT) - return (0); + return; - if (__predict_false(vp->v_type == VCHR)) - return (vget_finish_vchr(vp)); + if (__predict_false(vp->v_type == VCHR)) { + vget_finish_vchr(vp); + return; + } /* * We hold the vnode. If the usecount is 0 it will be utilized to keep @@ -3006,7 +3033,6 @@ vget_finish(struct vnode *vp, int flags, enum vgetstat refcount_release(&vp->v_holdcnt); #endif } - return (0); } /* @@ -4424,6 +4450,7 @@ DB_SHOW_COMMAND(mount, db_show_mount) MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT); MNT_KERN_FLAG(MNTK_MARKER); MNT_KERN_FLAG(MNTK_USES_BCACHE); + MNT_KERN_FLAG(MNTK_FPLOOKUP); MNT_KERN_FLAG(MNTK_NOASYNC); MNT_KERN_FLAG(MNTK_UNMOUNT); MNT_KERN_FLAG(MNTK_MWAIT); @@ -5240,6 +5267,38 @@ out: } /* + * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see + * the comment above cache_fplookup for details. + * + * We never deny as priv_check_cred calls are not yet supported, see vaccess. + */ +int +vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred) +{ + + VFS_SMR_ASSERT_ENTERED(); + + /* Check the owner. */ + if (cred->cr_uid == file_uid) { + if (file_mode & S_IXUSR) + return (0); + return (EAGAIN); + } + + /* Otherwise, check the groups (first match) */ + if (groupmember(file_gid, cred)) { + if (file_mode & S_IXGRP) + return (0); + return (EAGAIN); + } + + /* Otherwise, check everyone else. */ + if (file_mode & S_IXOTH) + return (0); + return (EAGAIN); +} + +/* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() @@ -5537,6 +5596,20 @@ vop_rename_pre(void *ap) } #ifdef DEBUG_VFS_LOCKS +void +vop_fplookup_vexec_pre(void *ap __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + +void +vop_fplookup_vexec_post(void *ap __unused, int rc __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + void vop_strategy_pre(void *ap) { Modified: head/sys/kern/vnode_if.src ============================================================================== --- head/sys/kern/vnode_if.src Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/kern/vnode_if.src Sat Jul 25 10:32:45 2020 (r363518) @@ -146,6 +146,17 @@ vop_close { }; +%% fplookup_vexec vp - - - +%! fplookup_vexec pre vop_fplookup_vexec_pre +%! fplookup_vexec post vop_fplookup_vexec_post + +vop_fplookup_vexec { + IN struct vnode *vp; + IN struct ucred *cred; + IN struct thread *td; +}; + + %% access vp L L L vop_access { Modified: head/sys/security/mac/mac_framework.h ============================================================================== --- head/sys/security/mac/mac_framework.h Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/security/mac/mac_framework.h Sat Jul 25 10:32:45 2020 (r363518) @@ -422,13 +422,14 @@ int mac_vnode_check_listextattr(struct ucred *cred, st int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp, struct componentname *cnp); extern bool mac_vnode_check_lookup_fp_flag; +#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag) static inline int mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp, struct componentname *cnp) { mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup"); - if (__predict_false(mac_vnode_check_lookup_fp_flag)) + if (mac_vnode_check_lookup_enabled()) return (mac_vnode_check_lookup_impl(cred, dvp, cnp)); return (0); } Modified: head/sys/sys/filedesc.h ============================================================================== --- head/sys/sys/filedesc.h Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/sys/filedesc.h Sat Jul 25 10:32:45 2020 (r363518) @@ -311,6 +311,7 @@ pwd_set(struct filedesc *fdp, struct pwd *newpwd) smr_serialized_store(&fdp->fd_pwd, newpwd, (FILEDESC_XLOCK_ASSERT(fdp), true)); } +struct pwd *pwd_get_smr(void); #endif /* _KERNEL */ Modified: head/sys/sys/mount.h ============================================================================== --- head/sys/sys/mount.h Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/sys/mount.h Sat Jul 25 10:32:45 2020 (r363518) @@ -420,6 +420,7 @@ void __mnt_vnode_markerfree_lazy(struct vnode #define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */ #define MNTK_VMSETSIZE_BUG 0x00010000 #define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */ +#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */ #define MNTK_NOASYNC 0x00800000 /* disable async */ #define MNTK_UNMOUNT 0x01000000 /* unmount in progress */ #define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */ Modified: head/sys/sys/vnode.h ============================================================================== --- head/sys/sys/vnode.h Sat Jul 25 10:31:52 2020 (r363517) +++ head/sys/sys/vnode.h Sat Jul 25 10:32:45 2020 (r363518) @@ -666,6 +666,8 @@ int vn_path_to_global_path(struct thread *td, struct v int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, accmode_t accmode, struct ucred *cred, int *privused); +int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, + struct ucred *cred); int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid, struct acl *aclp, accmode_t accmode, struct ucred *cred, int *privused); @@ -682,6 +684,8 @@ int vget(struct vnode *vp, int flags, struct thread *t enum vgetstate vget_prep_smr(struct vnode *vp); enum vgetstate vget_prep(struct vnode *vp); int vget_finish(struct vnode *vp, int flags, enum vgetstate vs); +void vget_finish_ref(struct vnode *vp, enum vgetstate vs); +void vget_abort(struct vnode *vp, enum vgetstate vs); void vgone(struct vnode *vp); void vhold(struct vnode *); void vholdl(struct vnode *); @@ -865,6 +869,8 @@ void vop_symlink_post(void *a, int rc); int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef DEBUG_VFS_LOCKS +void vop_fplookup_vexec_pre(void *a); +void vop_fplookup_vexec_post(void *a, int rc); void vop_strategy_pre(void *a); void vop_lock_pre(void *a); void vop_lock_post(void *a, int rc); @@ -872,6 +878,8 @@ void vop_unlock_pre(void *a); void vop_need_inactive_pre(void *a); void vop_need_inactive_post(void *a, int rc); #else +#define vop_fplookup_vexec_pre(x) do { } while (0) +#define vop_fplookup_vexec_post(x, y) do { } while (0) #define vop_strategy_pre(x) do { } while (0) #define vop_lock_pre(x) do { } while (0) #define vop_lock_post(x, y) do { } while (0) @@ -1025,9 +1033,17 @@ int vn_dir_check_exec(struct vnode *vp, struct compone #define VFS_SMR() vfs_smr #define vfs_smr_enter() smr_enter(VFS_SMR()) #define vfs_smr_exit() smr_exit(VFS_SMR()) +#define vfs_smr_entered_load(ptr) smr_entered_load((ptr), VFS_SMR()) #define VFS_SMR_ASSERT_ENTERED() SMR_ASSERT_ENTERED(VFS_SMR()) #define VFS_SMR_ASSERT_NOT_ENTERED() SMR_ASSERT_NOT_ENTERED(VFS_SMR()) #define VFS_SMR_ZONE_SET(zone) uma_zone_set_smr((zone), VFS_SMR()) + +#define vn_load_v_data_smr(vp) ({ \ + struct vnode *_vp = (vp); \ + \ + VFS_SMR_ASSERT_ENTERED(); \ + atomic_load_ptr(&(_vp)->v_data); \ +}) #endif /* _KERNEL */