Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 25 Jul 2020 10:32:45 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r363518 - in head/sys: kern security/mac sys
Message-ID:  <202007251032.06PAWjP1043373@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Sat Jul 25 10:32:45 2020
New Revision: 363518
URL: https://svnweb.freebsd.org/changeset/base/363518

Log:
  vfs: add the infrastructure for lockless lookup
  
  Reviewed by:    kib
  Tested by:      pho (in a patchset)
  Differential Revision:	https://reviews.freebsd.org/D25577

Modified:
  head/sys/kern/kern_descrip.c
  head/sys/kern/vfs_subr.c
  head/sys/kern/vnode_if.src
  head/sys/security/mac/mac_framework.h
  head/sys/sys/filedesc.h
  head/sys/sys/mount.h
  head/sys/sys/vnode.h

Modified: head/sys/kern/kern_descrip.c
==============================================================================
--- head/sys/kern/kern_descrip.c	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/kern/kern_descrip.c	Sat Jul 25 10:32:45 2020	(r363518)
@@ -102,8 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
 
 static __read_mostly uma_zone_t file_zone;
 static __read_mostly uma_zone_t filedesc0_zone;
-static __read_mostly uma_zone_t pwd_zone;
-static __read_mostly smr_t pwd_smr;
+__read_mostly uma_zone_t pwd_zone;
+VFS_SMR_DECLARE;
 
 static int	closefp(struct filedesc *fdp, int fd, struct file *fp,
 		    struct thread *td, int holdleaders);
@@ -3343,21 +3343,30 @@ pwd_hold(struct thread *td)
 
 	fdp = td->td_proc->p_fd;
 
-	smr_enter(pwd_smr);
-	pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
+	vfs_smr_enter();
+	pwd = vfs_smr_entered_load(&fdp->fd_pwd);
 	MPASS(pwd != NULL);
 	if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) {
-		smr_exit(pwd_smr);
+		vfs_smr_exit();
 		return (pwd);
 	}
-	smr_exit(pwd_smr);
+	vfs_smr_exit();
 	FILEDESC_SLOCK(fdp);
 	pwd = pwd_hold_filedesc(fdp);
 	MPASS(pwd != NULL);
-	FILEDESC_SUNLOCK(fdp);
 	return (pwd);
 }
 
+struct pwd *
+pwd_get_smr(void)
+{
+	struct pwd *pwd;
+
+	pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd);
+	MPASS(pwd != NULL);
+	return (pwd);
+}
+
 static struct pwd *
 pwd_alloc(void)
 {
@@ -4368,7 +4377,11 @@ filelistinit(void *dummy)
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
-	pwd_smr = uma_zone_get_smr(pwd_zone);
+	/*
+	 * XXXMJG this is a temporary hack due to boot ordering issues against
+	 * the vnode zone.
+	 */
+	vfs_smr = uma_zone_get_smr(pwd_zone);
 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 }
 SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/kern/vfs_subr.c	Sat Jul 25 10:32:45 2020	(r363518)
@@ -664,8 +664,8 @@ vntblinit(void *dummy __unused)
 	vnode_list_reclaim_marker = vn_alloc_marker(NULL);
 	TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
 	vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
-	    vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
-	vfs_smr = uma_zone_get_smr(vnode_zone);
+	    vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
+	uma_zone_set_smr(vnode_zone, vfs_smr);
 	vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
 	/*
@@ -2914,6 +2914,22 @@ vget_prep(struct vnode *vp)
 	return (vs);
 }
 
+void
+vget_abort(struct vnode *vp, enum vgetstate vs)
+{
+
+	switch (vs) {
+	case VGET_USECOUNT:
+		vrele(vp);
+		break;
+	case VGET_HOLDCNT:
+		vdrop(vp);
+		break;
+	default:
+		__assert_unreachable();
+	}
+}
+
 int
 vget(struct vnode *vp, int flags, struct thread *td)
 {
@@ -2925,7 +2941,7 @@ vget(struct vnode *vp, int flags, struct thread *td)
 	return (vget_finish(vp, flags, vs));
 }
 
-static int __noinline
+static void __noinline
 vget_finish_vchr(struct vnode *vp)
 {
 
@@ -2941,7 +2957,7 @@ vget_finish_vchr(struct vnode *vp)
 #else
 		refcount_release(&vp->v_holdcnt);
 #endif
-		return (0);
+		return;
 	}
 
 	VI_LOCK(vp);
@@ -2953,18 +2969,17 @@ vget_finish_vchr(struct vnode *vp)
 		refcount_release(&vp->v_holdcnt);
 #endif
 		VI_UNLOCK(vp);
-		return (0);
+		return;
 	}
 	v_incr_devcount(vp);
 	refcount_acquire(&vp->v_usecount);
 	VI_UNLOCK(vp);
-	return (0);
 }
 
 int
 vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
 {
-	int error, old;
+	int error;
 
 	if ((flags & LK_INTERLOCK) != 0)
 		ASSERT_VI_LOCKED(vp, __func__);
@@ -2976,20 +2991,32 @@ vget_finish(struct vnode *vp, int flags, enum vgetstat
 
 	error = vn_lock(vp, flags);
 	if (__predict_false(error != 0)) {
-		if (vs == VGET_USECOUNT)
-			vrele(vp);
-		else
-			vdrop(vp);
+		vget_abort(vp, vs);
 		CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
 		    vp);
 		return (error);
 	}
 
+	vget_finish_ref(vp, vs);
+	return (0);
+}
+
+void
+vget_finish_ref(struct vnode *vp, enum vgetstate vs)
+{
+	int old;
+
+	VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
+	VNPASS(vp->v_holdcnt > 0, vp);
+	VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
+
 	if (vs == VGET_USECOUNT)
-		return (0);
+		return;
 
-	if (__predict_false(vp->v_type == VCHR))
-		return (vget_finish_vchr(vp));
+	if (__predict_false(vp->v_type == VCHR)) {
+		vget_finish_vchr(vp);
+		return;
+	}
 
 	/*
 	 * We hold the vnode. If the usecount is 0 it will be utilized to keep
@@ -3006,7 +3033,6 @@ vget_finish(struct vnode *vp, int flags, enum vgetstat
 		refcount_release(&vp->v_holdcnt);
 #endif
 	}
-	return (0);
 }
 
 /*
@@ -4424,6 +4450,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
 	MNT_KERN_FLAG(MNTK_MARKER);
 	MNT_KERN_FLAG(MNTK_USES_BCACHE);
+	MNT_KERN_FLAG(MNTK_FPLOOKUP);
 	MNT_KERN_FLAG(MNTK_NOASYNC);
 	MNT_KERN_FLAG(MNTK_UNMOUNT);
 	MNT_KERN_FLAG(MNTK_MWAIT);
@@ -5240,6 +5267,38 @@ out:
 }
 
 /*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ *
+ * We never deny as priv_check_cred calls are not yet supported, see vaccess.
+ */
+int
+vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred)
+{
+
+	VFS_SMR_ASSERT_ENTERED();
+
+	/* Check the owner. */
+	if (cred->cr_uid == file_uid) {
+		if (file_mode & S_IXUSR)
+			return (0);
+		return (EAGAIN);
+	}
+
+	/* Otherwise, check the groups (first match) */
+	if (groupmember(file_gid, cred)) {
+		if (file_mode & S_IXGRP)
+			return (0);
+		return (EAGAIN);
+	}
+
+	/* Otherwise, check everyone else. */
+	if (file_mode & S_IXOTH)
+		return (0);
+	return (EAGAIN);
+}
+
+/*
  * Common filesystem object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
@@ -5537,6 +5596,20 @@ vop_rename_pre(void *ap)
 }
 
 #ifdef DEBUG_VFS_LOCKS
+void
+vop_fplookup_vexec_pre(void *ap __unused)
+{
+
+	VFS_SMR_ASSERT_ENTERED();
+}
+
+void
+vop_fplookup_vexec_post(void *ap __unused, int rc __unused)
+{
+
+	VFS_SMR_ASSERT_ENTERED();
+}
+
 void
 vop_strategy_pre(void *ap)
 {

Modified: head/sys/kern/vnode_if.src
==============================================================================
--- head/sys/kern/vnode_if.src	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/kern/vnode_if.src	Sat Jul 25 10:32:45 2020	(r363518)
@@ -146,6 +146,17 @@ vop_close {
 };
 
 
+%% fplookup_vexec	vp	- - -
+%! fplookup_vexec	pre	vop_fplookup_vexec_pre
+%! fplookup_vexec	post	vop_fplookup_vexec_post
+
+vop_fplookup_vexec {
+	IN struct vnode *vp;
+	IN struct ucred *cred;
+	IN struct thread *td;
+};
+
+
 %% access	vp	L L L
 
 vop_access {

Modified: head/sys/security/mac/mac_framework.h
==============================================================================
--- head/sys/security/mac/mac_framework.h	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/security/mac/mac_framework.h	Sat Jul 25 10:32:45 2020	(r363518)
@@ -422,13 +422,14 @@ int	mac_vnode_check_listextattr(struct ucred *cred, st
 int	mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp,
  	    struct componentname *cnp);
 extern bool mac_vnode_check_lookup_fp_flag;
+#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag)
 static inline int
 mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp,
     struct componentname *cnp)
 {
 
 	mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup");
-	if (__predict_false(mac_vnode_check_lookup_fp_flag))
+	if (mac_vnode_check_lookup_enabled())
                 return (mac_vnode_check_lookup_impl(cred, dvp, cnp));
 	return (0);
 }

Modified: head/sys/sys/filedesc.h
==============================================================================
--- head/sys/sys/filedesc.h	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/sys/filedesc.h	Sat Jul 25 10:32:45 2020	(r363518)
@@ -311,6 +311,7 @@ pwd_set(struct filedesc *fdp, struct pwd *newpwd)
 	smr_serialized_store(&fdp->fd_pwd, newpwd,
 	    (FILEDESC_XLOCK_ASSERT(fdp), true));
 }
+struct pwd *pwd_get_smr(void);
 
 #endif /* _KERNEL */
 

Modified: head/sys/sys/mount.h
==============================================================================
--- head/sys/sys/mount.h	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/sys/mount.h	Sat Jul 25 10:32:45 2020	(r363518)
@@ -420,6 +420,7 @@ void          __mnt_vnode_markerfree_lazy(struct vnode
 #define	MNTK_TEXT_REFS		0x00008000 /* Keep use ref for text */
 #define	MNTK_VMSETSIZE_BUG	0x00010000
 #define	MNTK_UNIONFS	0x00020000	/* A hack for F_ISUNIONSTACK */
+#define	MNTK_FPLOOKUP	0x00040000	/* fast path lookup is supported */
 #define MNTK_NOASYNC	0x00800000	/* disable async */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h	Sat Jul 25 10:31:52 2020	(r363517)
+++ head/sys/sys/vnode.h	Sat Jul 25 10:32:45 2020	(r363518)
@@ -666,6 +666,8 @@ int	vn_path_to_global_path(struct thread *td, struct v
 int	vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
 	    gid_t file_gid, accmode_t accmode, struct ucred *cred,
 	    int *privused);
+int	vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid,
+	    struct ucred *cred);
 int	vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
 	    struct acl *aclp, accmode_t accmode, struct ucred *cred,
 	    int *privused);
@@ -682,6 +684,8 @@ int	vget(struct vnode *vp, int flags, struct thread *t
 enum vgetstate	vget_prep_smr(struct vnode *vp);
 enum vgetstate	vget_prep(struct vnode *vp);
 int	vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
+void	vget_finish_ref(struct vnode *vp, enum vgetstate vs);
+void	vget_abort(struct vnode *vp, enum vgetstate vs);
 void	vgone(struct vnode *vp);
 void	vhold(struct vnode *);
 void	vholdl(struct vnode *);
@@ -865,6 +869,8 @@ void	vop_symlink_post(void *a, int rc);
 int	vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
 
 #ifdef DEBUG_VFS_LOCKS
+void	vop_fplookup_vexec_pre(void *a);
+void	vop_fplookup_vexec_post(void *a, int rc);
 void	vop_strategy_pre(void *a);
 void	vop_lock_pre(void *a);
 void	vop_lock_post(void *a, int rc);
@@ -872,6 +878,8 @@ void	vop_unlock_pre(void *a);
 void	vop_need_inactive_pre(void *a);
 void	vop_need_inactive_post(void *a, int rc);
 #else
+#define	vop_fplookup_vexec_pre(x)	do { } while (0)
+#define	vop_fplookup_vexec_post(x, y)	do { } while (0)
 #define	vop_strategy_pre(x)	do { } while (0)
 #define	vop_lock_pre(x)		do { } while (0)
 #define	vop_lock_post(x, y)	do { } while (0)
@@ -1025,9 +1033,17 @@ int vn_dir_check_exec(struct vnode *vp, struct compone
 #define VFS_SMR()	vfs_smr
 #define vfs_smr_enter()	smr_enter(VFS_SMR())
 #define vfs_smr_exit()	smr_exit(VFS_SMR())
+#define vfs_smr_entered_load(ptr)	smr_entered_load((ptr), VFS_SMR())
 #define VFS_SMR_ASSERT_ENTERED()	SMR_ASSERT_ENTERED(VFS_SMR())
 #define VFS_SMR_ASSERT_NOT_ENTERED()	SMR_ASSERT_NOT_ENTERED(VFS_SMR())
 #define VFS_SMR_ZONE_SET(zone)	uma_zone_set_smr((zone), VFS_SMR())
+
+#define vn_load_v_data_smr(vp)	({		\
+	struct vnode *_vp = (vp);		\
+						\
+	VFS_SMR_ASSERT_ENTERED();		\
+	atomic_load_ptr(&(_vp)->v_data);	\
+})
 
 #endif /* _KERNEL */
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202007251032.06PAWjP1043373>