Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 25 Jul 2020 10:38:05 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r363520 - in head/sys/ufs: ffs ufs
Message-ID:  <202007251038.06PAc5VH043965@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Sat Jul 25 10:38:05 2020
New Revision: 363520
URL: https://svnweb.freebsd.org/changeset/base/363520

Log:
  ufs: add support for lockless lookup
  
  ACLs are not supported, meaning their presence will force the use of the old lookup.
  
  Reviewed by:    kib
  Tested by:      pho (in a patchset)
  Differential Revision:	https://reviews.freebsd.org/D25579

Modified:
  head/sys/ufs/ffs/ffs_vfsops.c
  head/sys/ufs/ffs/ffs_vnops.c
  head/sys/ufs/ufs/inode.h
  head/sys/ufs/ufs/ufs_acl.c
  head/sys/ufs/ufs/ufs_vnops.c

Modified: head/sys/ufs/ffs/ffs_vfsops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	Sat Jul 25 10:37:15 2020	(r363519)
+++ head/sys/ufs/ffs/ffs_vfsops.c	Sat Jul 25 10:38:05 2020	(r363520)
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 #include <ddb/ddb.h>
 
 static uma_zone_t uma_inode, uma_ufs1, uma_ufs2;
+VFS_SMR_DECLARE;
 
 static int	ffs_mountfs(struct vnode *, struct mount *, struct thread *);
 static void	ffs_oldfscompat_read(struct fs *, struct ufsmount *,
@@ -393,6 +394,7 @@ ffs_mount(struct mount *mp)
 		uma_ufs2 = uma_zcreate("FFS2 dinode",
 		    sizeof(struct ufs2_dinode), NULL, NULL, NULL, NULL,
 		    UMA_ALIGN_PTR, 0);
+		VFS_SMR_ZONE_SET(uma_inode);
 	}
 
 	vfs_deleteopt(mp->mnt_optnew, "groupquota");
@@ -455,6 +457,7 @@ ffs_mount(struct mount *mp)
 	}
 
 	MNT_ILOCK(mp);
+	mp->mnt_kern_flag &= ~MNTK_FPLOOKUP;
 	mp->mnt_flag |= mntorflags;
 	MNT_IUNLOCK(mp);
 	/*
@@ -795,6 +798,17 @@ ffs_mount(struct mount *mp)
 			}
 		}
 	}
+
+	MNT_ILOCK(mp);
+	/*
+	 * This is racy versus lookup, see ufs_fplookup_vexec for details.
+	 */
+	if ((mp->mnt_kern_flag & MNTK_FPLOOKUP) != 0)
+		panic("MNTK_FPLOOKUP set on mount %p when it should not be", mp);
+	if ((mp->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) == 0)
+		mp->mnt_kern_flag |= MNTK_FPLOOKUP;
+	MNT_IUNLOCK(mp);
+
 	vfs_mountedfrom(mp, fspec);
 	return (0);
 }
@@ -1968,14 +1982,14 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
-	ip = uma_zalloc(uma_inode, M_WAITOK | M_ZERO);
+	ip = uma_zalloc_smr(uma_inode, M_WAITOK | M_ZERO);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode("ufs", mp, fs->fs_magic == FS_UFS1_MAGIC ?
 	    &ffs_vnodeops1 : &ffs_vnodeops2, &vp);
 	if (error) {
 		*vpp = NULL;
-		uma_zfree(uma_inode, ip);
+		uma_zfree_smr(uma_inode, ip);
 		return (error);
 	}
 	/*
@@ -2004,7 +2018,7 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 		vp->v_vflag |= VV_FORCEINSMQ;
 	error = insmntque(vp, mp);
 	if (error != 0) {
-		uma_zfree(uma_inode, ip);
+		uma_zfree_smr(uma_inode, ip);
 		*vpp = NULL;
 		return (error);
 	}
@@ -2327,7 +2341,7 @@ ffs_ifree(struct ufsmount *ump, struct inode *ip)
 		uma_zfree(uma_ufs1, ip->i_din1);
 	else if (ip->i_din2 != NULL)
 		uma_zfree(uma_ufs2, ip->i_din2);
-	uma_zfree(uma_inode, ip);
+	uma_zfree_smr(uma_inode, ip);
 }
 
 static int dobkgrdwrite = 1;

Modified: head/sys/ufs/ffs/ffs_vnops.c
==============================================================================
--- head/sys/ufs/ffs/ffs_vnops.c	Sat Jul 25 10:37:15 2020	(r363519)
+++ head/sys/ufs/ffs/ffs_vnops.c	Sat Jul 25 10:38:05 2020	(r363520)
@@ -905,8 +905,10 @@ ffs_write(ap)
 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid &&
 	    ap->a_cred) {
 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID)) {
-			ip->i_mode &= ~(ISUID | ISGID);
+			vn_seqc_write_begin(vp);
+			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
 			DIP_SET(ip, i_mode, ip->i_mode);
+			vn_seqc_write_end(vp);
 		}
 	}
 	if (error) {
@@ -1152,8 +1154,10 @@ ffs_extwrite(struct vnode *vp, struct uio *uio, int io
 	 */
 	if ((ip->i_mode & (ISUID | ISGID)) && resid > uio->uio_resid && ucred) {
 		if (priv_check_cred(ucred, PRIV_VFS_RETAINSUGID)) {
-			ip->i_mode &= ~(ISUID | ISGID);
+			vn_seqc_write_begin(vp);
+			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
 			dp->di_mode = ip->i_mode;
+			vn_seqc_write_end(vp);
 		}
 	}
 	if (error) {

Modified: head/sys/ufs/ufs/inode.h
==============================================================================
--- head/sys/ufs/ufs/inode.h	Sat Jul 25 10:37:15 2020	(r363519)
+++ head/sys/ufs/ufs/inode.h	Sat Jul 25 10:38:05 2020	(r363520)
@@ -43,6 +43,7 @@
 #include <sys/lock.h>
 #include <sys/queue.h>
 #include <ufs/ufs/dinode.h>
+#include <sys/seqc.h>
 
 /*
  * This must agree with the definition in <ufs/ufs/dir.h>.
@@ -149,6 +150,14 @@ struct inode {
 #define UFS_INODE_FLAG_LAZY_MASK_ASSERTABLE \
 	(UFS_INODE_FLAG_LAZY_MASK & ~(IN_LAZYMOD | IN_LAZYACCESS))
 
+#define UFS_INODE_SET_MODE(ip, mode) do {			\
+	struct inode *_ip = (ip);				\
+	int _mode = (mode);					\
+								\
+	ASSERT_VOP_IN_SEQC(ITOV(_ip));				\
+	atomic_store_short(&(_ip)->i_mode, _mode);		\
+} while (0)
+
 #define UFS_INODE_SET_FLAG(ip, flags) do {			\
 	struct inode *_ip = (ip);				\
 	struct vnode *_vp = ITOV(_ip);				\
@@ -229,6 +238,7 @@ struct indir {
 
 /* Convert between inode pointers and vnode pointers. */
 #define	VTOI(vp)	((struct inode *)(vp)->v_data)
+#define	VTOI_SMR(vp)	((struct inode *)vn_load_v_data_smr(vp))
 #define	ITOV(ip)	((ip)->i_vnode)
 
 /* Determine if soft dependencies are being done */

Modified: head/sys/ufs/ufs/ufs_acl.c
==============================================================================
--- head/sys/ufs/ufs/ufs_acl.c	Sat Jul 25 10:37:15 2020	(r363519)
+++ head/sys/ufs/ufs/ufs_acl.c	Sat Jul 25 10:38:05 2020	(r363520)
@@ -139,9 +139,11 @@ ufs_sync_acl_from_inode(struct inode *ip, struct acl *
 void
 ufs_sync_inode_from_acl(struct acl *acl, struct inode *ip)
 {
+	int newmode;
 
-	ip->i_mode &= ACL_PRESERVE_MASK;
-	ip->i_mode |= acl_posix1e_acl_to_mode(acl);
+	newmode = ip->i_mode & ACL_PRESERVE_MASK;
+	newmode |= acl_posix1e_acl_to_mode(acl);
+	UFS_INODE_SET_MODE(ip, newmode);
 	DIP_SET(ip, i_mode, ip->i_mode);
 }
 
@@ -381,7 +383,7 @@ int
 ufs_setacl_nfs4_internal(struct vnode *vp, struct acl *aclp, struct thread *td)
 {
 	int error;
-	mode_t mode;
+	mode_t mode, newmode;
 	struct inode *ip = VTOI(vp);
 
 	KASSERT(acl_nfs4_check(aclp, vp->v_type == VDIR) == 0,
@@ -418,8 +420,9 @@ ufs_setacl_nfs4_internal(struct vnode *vp, struct acl 
 
 	acl_nfs4_sync_mode_from_acl(&mode, aclp);
 
-	ip->i_mode &= ACL_PRESERVE_MASK;
-	ip->i_mode |= mode;
+	newmode = ip->i_mode & ACL_PRESERVE_MASK;
+	newmode |= mode;
+	UFS_INODE_SET_MODE(ip, newmode);
 	DIP_SET(ip, i_mode, ip->i_mode);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 

Modified: head/sys/ufs/ufs/ufs_vnops.c
==============================================================================
--- head/sys/ufs/ufs/ufs_vnops.c	Sat Jul 25 10:37:15 2020	(r363519)
+++ head/sys/ufs/ufs/ufs_vnops.c	Sat Jul 25 10:38:05 2020	(r363520)
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lockf.h>
 #include <sys/conf.h>
 #include <sys/acl.h>
+#include <sys/smr.h>
 
 #include <security/mac/mac_framework.h>
 
@@ -96,10 +97,12 @@ FEATURE(suiddir,
     "Give all new files in directory the same ownership as the directory");
 #endif
 
+VFS_SMR_DECLARE;
 
 #include <ufs/ffs/ffs_extern.h>
 
 static vop_accessx_t	ufs_accessx;
+static vop_fplookup_vexec_t ufs_fplookup_vexec;
 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
 static vop_close_t	ufs_close;
@@ -422,6 +425,46 @@ ufs_accessx(ap)
 	return (error);
 }
 
+/*
+ * VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
+ * the comment above cache_fplookup for details.
+ */
+static int
+ufs_fplookup_vexec(ap)
+	struct vop_fplookup_vexec_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		struct thread *a_td;
+	} */ *ap;
+{
+	struct vnode *vp;
+	struct inode *ip;
+	struct ucred *cred;
+	mode_t all_x, mode;
+
+	vp = ap->a_vp;
+	ip = VTOI_SMR(vp);
+	if (__predict_false(ip == NULL))
+		return (EAGAIN);
+
+	/*
+	 * XXX ACL race
+	 *
+	 * ACLs are not supported and UFS clears/sets this flag on mount and
+	 * remount. However, we may still be racing with seeing them and there
+	 * is no provision to make sure they were accounted for. This matches
+	 * the behavior of the locked case, since the lookup there is also
+	 * racy: mount takes no measures to block anyone from progressing.
+	 */
+	all_x = S_IXUSR | S_IXGRP | S_IXOTH;
+	mode = atomic_load_short(&ip->i_mode);
+	if (__predict_true((mode & all_x) == all_x))
+		return (0);
+
+	cred = ap->a_cred;
+	return (vaccess_vexec_smr(mode, ip->i_uid, ip->i_gid, cred));
+}
+
 /* ARGSUSED */
 static int
 ufs_getattr(ap)
@@ -711,7 +754,7 @@ ufs_chmod(vp, mode, cred, td)
 	struct thread *td;
 {
 	struct inode *ip = VTOI(vp);
-	int error;
+	int newmode, error;
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
@@ -744,8 +787,9 @@ ufs_chmod(vp, mode, cred, td)
 			return (error);
 	}
 
-	ip->i_mode &= ~ALLPERMS;
-	ip->i_mode |= (mode & ALLPERMS);
+	newmode = ip->i_mode & ~ALLPERMS;
+	newmode |= (mode & ALLPERMS);
+	UFS_INODE_SET_MODE(ip, newmode);
 	DIP_SET(ip, i_mode, ip->i_mode);
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 #ifdef UFS_ACL
@@ -869,7 +913,7 @@ good:
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID)) {
-			ip->i_mode &= ~(ISUID | ISGID);
+			UFS_INODE_SET_MODE(ip, ip->i_mode & ~(ISUID | ISGID));
 			DIP_SET(ip, i_mode, ip->i_mode);
 		}
 	}
@@ -1111,7 +1155,10 @@ ufs_rename(ap)
 	int error = 0;
 	struct mount *mp;
 	ino_t ino;
+	bool want_seqc_end;
 
+	want_seqc_end = false;
+
 #ifdef INVARIANTS
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
@@ -1315,6 +1362,13 @@ relock:
 	    tdp->i_effnlink == 0)
 		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 
+	if (tvp != NULL)
+		vn_seqc_write_begin(tvp);
+	vn_seqc_write_begin(tdvp);
+	vn_seqc_write_begin(fvp);
+	vn_seqc_write_begin(fdvp);
+	want_seqc_end = true;
+
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
@@ -1513,6 +1567,14 @@ relock:
 	cache_purge_negative(tdvp);
 
 unlockout:
+	if (want_seqc_end) {
+		if (tvp != NULL)
+			vn_seqc_write_end(tvp);
+		vn_seqc_write_end(tdvp);
+		vn_seqc_write_end(fvp);
+		vn_seqc_write_end(fdvp);
+	}
+
 	vput(fdvp);
 	vput(fvp);
 	if (tvp)
@@ -1556,6 +1618,14 @@ bad:
 	goto unlockout;
 
 releout:
+	if (want_seqc_end) {
+		if (tvp != NULL)
+			vn_seqc_write_end(tvp);
+		vn_seqc_write_end(tdvp);
+		vn_seqc_write_end(fvp);
+		vn_seqc_write_end(fdvp);
+	}
+
 	vrele(fdvp);
 	vrele(fvp);
 	vrele(tdvp);
@@ -1590,7 +1660,7 @@ ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, 
 		 */
 		if (acl->acl_cnt != 0) {
 			dmode = acl_posix1e_newfilemode(dmode, acl);
-			ip->i_mode = dmode;
+			UFS_INODE_SET_MODE(ip, dmode);
 			DIP_SET(ip, i_mode, dmode);
 			*dacl = *acl;
 			ufs_sync_acl_from_inode(ip, acl);
@@ -1602,7 +1672,7 @@ ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, 
 		/*
 		 * Just use the mode as-is.
 		 */
-		ip->i_mode = dmode;
+		UFS_INODE_SET_MODE(ip, dmode);
 		DIP_SET(ip, i_mode, dmode);
 		error = 0;
 		goto out;
@@ -1673,7 +1743,7 @@ ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp,
 			 * the it's not defined case.
 			 */
 			mode = acl_posix1e_newfilemode(mode, acl);
-			ip->i_mode = mode;
+			UFS_INODE_SET_MODE(ip, mode);
 			DIP_SET(ip, i_mode, mode);
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
@@ -1684,7 +1754,7 @@ ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp,
 		/*
 		 * Just use the mode as-is.
 		 */
-		ip->i_mode = mode;
+		UFS_INODE_SET_MODE(ip, mode);
 		DIP_SET(ip, i_mode, mode);
 		error = 0;
 		goto out;
@@ -1796,6 +1866,7 @@ ufs_mkdir(ap)
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
+	vn_seqc_write_begin(tvp);
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 	DIP_SET(ip, i_gid, dp->i_gid);
@@ -1846,6 +1917,7 @@ ufs_mkdir(ap)
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(dp, ip);
 			UFS_VFREE(tvp, ip->i_number, dmode);
+			vn_seqc_write_end(tvp);
 			vgone(tvp);
 			vput(tvp);
 			return (error);
@@ -1861,6 +1933,7 @@ ufs_mkdir(ap)
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(dp, ip);
 		UFS_VFREE(tvp, ip->i_number, dmode);
+		vn_seqc_write_end(tvp);
 		vgone(tvp);
 		vput(tvp);
 		return (error);
@@ -1868,7 +1941,7 @@ ufs_mkdir(ap)
 #endif
 #endif	/* !SUIDDIR */
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
-	ip->i_mode = dmode;
+	UFS_INODE_SET_MODE(ip, dmode);
 	DIP_SET(ip, i_mode, dmode);
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 2;
@@ -1974,6 +2047,7 @@ ufs_mkdir(ap)
 bad:
 	if (error == 0) {
 		*ap->a_vpp = tvp;
+		vn_seqc_write_end(tvp);
 	} else {
 		dp->i_effnlink--;
 		dp->i_nlink--;
@@ -1989,6 +2063,7 @@ bad:
 		UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_mkdir(dp, ip);
+		vn_seqc_write_end(tvp);
 		vgone(tvp);
 		vput(tvp);
 	}
@@ -2637,8 +2712,9 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 	}
 #endif
 #endif	/* !SUIDDIR */
+	vn_seqc_write_begin(tvp); /* Mostly to cover asserts */
 	UFS_INODE_SET_FLAG(ip, IN_ACCESS | IN_CHANGE | IN_UPDATE);
-	ip->i_mode = mode;
+	UFS_INODE_SET_MODE(ip, mode);
 	DIP_SET(ip, i_mode, mode);
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 1;
@@ -2648,7 +2724,7 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 		softdep_setup_create(VTOI(dvp), ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID)) {
-		ip->i_mode &= ~ISGID;
+		UFS_INODE_SET_MODE(ip, ip->i_mode & ~ISGID);
 		DIP_SET(ip, i_mode, ip->i_mode);
 	}
 
@@ -2688,6 +2764,7 @@ ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
 	if (error)
 		goto bad;
+	vn_seqc_write_end(tvp);
 	*vpp = tvp;
 	return (0);
 
@@ -2702,6 +2779,7 @@ bad:
 	UFS_INODE_SET_FLAG(ip, IN_CHANGE);
 	if (DOINGSOFTDEP(tvp))
 		softdep_revert_create(VTOI(dvp), ip);
+	vn_seqc_write_end(tvp);
 	vgone(tvp);
 	vput(tvp);
 	return (error);
@@ -2740,6 +2818,7 @@ struct vop_vector ufs_vnodeops = {
 	.vop_write =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_bmap =		ufs_bmap,
+	.vop_fplookup_vexec =	ufs_fplookup_vexec,
 	.vop_cachedlookup =	ufs_lookup,
 	.vop_close =		ufs_close,
 	.vop_create =		ufs_create,



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202007251038.06PAc5VH043965>