Date: Tue, 2 Oct 2001 12:35:10 -0700 (PDT) From: Matt Dillon <dillon@earth.backplane.com> To: Yevgeniy Aleynikov <eugenea@infospace.com> Cc: peter@FreeBSD.ORG, ache@FreeBSD.ORG, mckusick@mckusick.com, Ken Pizzini <kenp@infospace.com>, Ian Dowse <iedowse@maths.tcd.ie>, hackers@FreeBSD.ORG Subject: Re: bleh. Re: ufs_rename panic Message-ID: <200110021935.f92JZAm59707@earth.backplane.com> References: <200110020506.f9256BJ55255@earth.backplane.com> <3BBA0699.29F589BE@infospace.com>
next in thread | previous in thread | raw e-mail | index | archive | help
Ok, I'm adding -hackers... another email thread got going in -committers. Here is a patch set for -stable. It isn't perfect but it does appear to solve the problem. The one case I don't handle right is if you have a hardlinked file and two renames in two different directories on the same file occur at the same time... that will (improperly) return an error code when, in fact, it's perfectly acceptable to do that. This patch appears to fix the utterly trivial crash reproduction that Yevgeniy was able to produce with a couple of simple race scripts running in the background. What I've done is add a SOFTLOCKLEAF capability to namei(). If set, and the file/directory exists, namei() will generate an extra VREF() on the vnode and set the VSOFTLOCK flag in vp->v_flag. If the vnode already has VSOFTLOCK set, namei() will return EINVAL. Then in rename() and rmdir() I set SOFTLOCKLEAF for the namei resolution and, of course, clean things up when everything is done. The ufs_rename() and ufs_rmdir() code no longer have to do the IN_RENAME hack at all, because it's now handled. This patch set does not yet include fixes to unionfs or the nfs server and is for informational purposes only. Comments? -Matt Index: kern/vfs_lookup.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_lookup.c,v retrieving revision 1.38.2.3 diff -u -r1.38.2.3 vfs_lookup.c --- kern/vfs_lookup.c 2001/08/31 19:36:49 1.38.2.3 +++ kern/vfs_lookup.c 2001/10/02 19:04:21 @@ -372,11 +372,20 @@ error = EISDIR; goto bad; } + if ((cnp->cn_flags & SOFTLOCKLEAF) && + (dp->v_flag & VSOFTLOCK)) { + error = EINVAL; + goto bad; + } if (wantparent) { ndp->ni_dvp = dp; VREF(dp); } ndp->ni_vp = dp; + if (cnp->cn_flags & SOFTLOCKLEAF) { + VREF(dp); + vsetsoftlock(dp); + } if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) VOP_UNLOCK(dp, 0, p); /* XXX This should probably move to the top of function. */ @@ -565,13 +574,20 @@ error = EROFS; goto bad2; } + if ((cnp->cn_flags & SOFTLOCKLEAF) && (dp->v_flag & VSOFTLOCK)) { + error = EINVAL; + goto bad2; + } if (cnp->cn_flags & SAVESTART) { ndp->ni_startdir = ndp->ni_dvp; VREF(ndp->ni_startdir); } if (!wantparent) vrele(ndp->ni_dvp); - + if (cnp->cn_flags & SOFTLOCKLEAF) { + VREF(dp); + vsetsoftlock(dp); + } if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, p); return (0); @@ -654,6 +670,15 @@ error = ENOTDIR; goto bad; } + if ((cnp->cn_flags & SOFTLOCKLEAF) && + (dp->v_flag & VSOFTLOCK)) { + error = EINVAL; + goto bad; + } + if (cnp->cn_flags & SOFTLOCKLEAF) { + VREF(dp); + vsetsoftlock(dp); + } if (!(cnp->cn_flags & LOCKLEAF)) VOP_UNLOCK(dp, 0, p); *vpp = dp; @@ -707,6 +732,10 @@ error = EROFS; goto bad2; } + if ((cnp->cn_flags & SOFTLOCKLEAF) && (dp->v_flag & VSOFTLOCK)) { + error = EINVAL; + goto bad2; + } /* ASSERT(dvp == ndp->ni_startdir) */ if (cnp->cn_flags & SAVESTART) VREF(dvp); @@ -718,6 +747,10 @@ ((cnp->cn_flags & (NOOBJ|LOCKLEAF)) == LOCKLEAF)) vfs_object_create(dp, cnp->cn_proc, cnp->cn_cred); + if (cnp->cn_flags & SOFTLOCKLEAF) { + VREF(dp); + vsetsoftlock(dp); + } if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, p); return (0); Index: kern/vfs_subr.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v retrieving revision 1.249.2.11 diff -u -r1.249.2.11 vfs_subr.c --- kern/vfs_subr.c 2001/09/11 09:49:53 1.249.2.11 +++ kern/vfs_subr.c 2001/10/02 18:45:55 @@ -2927,6 +2961,12 @@ struct nameidata *ndp; const uint flags; { + if (!(flags & NDF_NO_FREE_SOFTLOCKLEAF) && + (ndp->ni_cnd.cn_flags & SOFTLOCKLEAF) && + ndp->ni_vp) { + vclearsoftlock(ndp->ni_vp); + vrele(ndp->ni_vp); + } if (!(flags & NDF_NO_FREE_PNBUF) && (ndp->ni_cnd.cn_flags & HASBUF)) { zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); @@ -2955,3 +2995,24 @@ ndp->ni_startdir = NULL; } } + +void +vsetsoftlock(struct vnode *vp) +{ + int s; + + s = splbio(); + vp->v_flag |= VSOFTLOCK; + splx(s); +} + +void +vclearsoftlock(struct vnode *vp) +{ + int s; + + s = splbio(); + vp->v_flag &= ~VSOFTLOCK; + splx(s); +} + Index: kern/vfs_syscalls.c =================================================================== RCS file: /home/ncvs/src/sys/kern/vfs_syscalls.c,v retrieving revision 1.151.2.9 diff -u -r1.151.2.9 vfs_syscalls.c --- kern/vfs_syscalls.c 2001/08/12 10:48:00 1.151.2.9 +++ kern/vfs_syscalls.c 2001/10/02 19:18:06 @@ -2633,8 +2633,8 @@ int error; bwillwrite(); - NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, - SCARG(uap, from), p); + NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART | SOFTLOCKLEAF, + UIO_USERSPACE, SCARG(uap, from), p); if ((error = namei(&fromnd)) != 0) return (error); fvp = fromnd.ni_vp; @@ -2646,7 +2646,7 @@ /* Translate error code for rename("dir1", "dir2/."). */ if (error == EISDIR && fvp->v_type == VDIR) error = EINVAL; - NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF); vrele(fromnd.ni_dvp); vrele(fvp); goto out1; @@ -2683,12 +2683,14 @@ if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } + fromnd.ni_cnd.cn_flags &= ~SOFTLOCKLEAF; /* XXX hack */ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); - NDFREE(&fromnd, NDF_ONLY_PNBUF); + fromnd.ni_cnd.cn_flags |= SOFTLOCKLEAF; + NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF); NDFREE(&tond, NDF_ONLY_PNBUF); } else { - NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&fromnd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF); NDFREE(&tond, NDF_ONLY_PNBUF); if (tdvp == tvp) vrele(tdvp); @@ -2785,8 +2787,8 @@ struct nameidata nd; bwillwrite(); - NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, - SCARG(uap, path), p); + NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF | SOFTLOCKLEAF, + UIO_USERSPACE, SCARG(uap, path), p); if ((error = namei(&nd)) != 0) return (error); vp = nd.ni_vp; @@ -2812,7 +2814,7 @@ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); } out: - NDFREE(&nd, NDF_ONLY_PNBUF); + NDFREE(&nd, NDF_ONLY_PNBUF & NDF_ONLY_SOFTLOCKLEAF); if (nd.ni_dvp == vp) vrele(nd.ni_dvp); else Index: sys/namei.h =================================================================== RCS file: /home/ncvs/src/sys/sys/namei.h,v retrieving revision 1.29.2.2 diff -u -r1.29.2.2 namei.h --- sys/namei.h 2001/09/30 21:12:54 1.29.2.2 +++ sys/namei.h 2001/10/02 18:43:34 @@ -114,7 +114,7 @@ #define FOLLOW 0x0040 /* follow symbolic links */ #define NOOBJ 0x0080 /* don't create object */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ -#define MODMASK 0x00fc /* mask of operational modifiers */ +#define MODMASK (0x00fc|SOFTLOCKLEAF) /* * Namei parameter descriptors. * @@ -143,7 +143,7 @@ #define WILLBEDIR 0x080000 /* new files will be dirs; allow trailing / */ #define ISUNICODE 0x100000 /* current component name is unicode*/ #define PDIRUNLOCK 0x200000 /* file system lookup() unlocked parent dir */ -#define PARAMASK 0x1fff00 /* mask of parameter descriptors */ +#define SOFTLOCKLEAF 0x400000 /* set VSOFTLOCK in vnode */ /* * Initialization of an nameidata structure. */ @@ -180,7 +180,9 @@ #define NDF_NO_VP_PUT 0x0000000c #define NDF_NO_STARTDIR_RELE 0x00000010 #define NDF_NO_FREE_PNBUF 0x00000020 +#define NDF_NO_FREE_SOFTLOCKLEAF 0x00000040 #define NDF_ONLY_PNBUF (~NDF_NO_FREE_PNBUF) +#define NDF_ONLY_SOFTLOCKLEAF (~NDF_NO_FREE_SOFTLOCKLEAF) void NDFREE __P((struct nameidata *, const uint)); Index: sys/vnode.h =================================================================== RCS file: /home/ncvs/src/sys/sys/vnode.h,v retrieving revision 1.111.2.12 diff -u -r1.111.2.12 vnode.h --- sys/vnode.h 2001/09/22 09:21:48 1.111.2.12 +++ sys/vnode.h 2001/10/02 18:46:10 @@ -169,6 +169,7 @@ #define VTBFREE 0x100000 /* This vnode is on the to-be-freelist */ #define VONWORKLST 0x200000 /* On syncer work-list */ #define VMOUNT 0x400000 /* Mount in progress */ +#define VSOFTLOCK 0x800000 /* Soft lock on vnode (directory entry) */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value @@ -630,6 +632,8 @@ void vrele __P((struct vnode *vp)); void vref __P((struct vnode *vp)); void vbusy __P((struct vnode *vp)); +void vsetsoftlock __P((struct vnode *vp)); +void vclearsoftlock __P((struct vnode *vp)); extern vop_t **default_vnodeop_p; extern vop_t **spec_vnodeop_p; Index: ufs/ufs/ufs_vnops.c =================================================================== RCS file: /home/ncvs/src/sys/ufs/ufs/ufs_vnops.c,v retrieving revision 1.131.2.4 diff -u -r1.131.2.4 ufs_vnops.c --- ufs/ufs/ufs_vnops.c 2001/08/28 17:28:49 1.131.2.4 +++ ufs/ufs/ufs_vnops.c 2001/10/02 19:16:40 @@ -997,13 +997,11 @@ * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || - dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || - (ip->i_flag & IN_RENAME)) { + dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) { VOP_UNLOCK(fvp, 0, p); error = EINVAL; goto abortit; } - ip->i_flag |= IN_RENAME; oldparent = dp->i_number; doingdirectory = 1; } @@ -1224,13 +1222,12 @@ return (0); } /* - * Ensure that the directory entry still exists and has not - * changed while the new name has been entered. If the source is - * a file then the entry may have been unlinked or renamed. In - * either case there is no further work to be done. If the source - * is a directory then it cannot have been rmdir'ed; the IN_RENAME - * flag ensures that it cannot be moved by another rename or removed - * by a rmdir. + * The source has been VSOFTLOCKd by the caller, preventing rename + * or rmdir, but not prevent unlink. At the moment we allow the unlink + * race and so do not panic if relookup fails or the inodes do not + * match, because files can be hardlinked and our flag is on the vnode + * rather then the directory entry in the namei cache. But source + * directories must still exist. */ if (xp != ip) { if (doingdirectory) @@ -1248,7 +1245,6 @@ cache_purge(fdvp); } error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0); - xp->i_flag &= ~IN_RENAME; } VN_KNOTE(fvp, NOTE_RENAME); if (dp) @@ -1263,13 +1259,10 @@ vput(ITOV(xp)); vput(ITOV(dp)); out: - if (doingdirectory) - ip->i_flag &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) { ip->i_effnlink--; ip->i_nlink--; ip->i_flag |= IN_CHANGE; - ip->i_flag &= ~IN_RENAME; if (DOINGSOFTDEP(fvp)) softdep_change_linkcnt(ip); vput(fvp); @@ -1503,18 +1496,16 @@ dp = VTOI(dvp); /* - * Do not remove a directory that is in the process of being renamed. * Verify the directory is empty (and valid). Rmdir ".." will not be * valid since ".." will contain a reference to the current directory * and thus be non-empty. Do not allow the removal of mounted on * directories (this can happen when an NFS exported filesystem * tries to remove a locally mounted on directory). + * + * The syscall wrapper already protects us from attempting to remove + * a directory that is undergoing a rename. */ error = 0; - if (ip->i_flag & IN_RENAME) { - error = EINVAL; - goto out; - } if (ip->i_effnlink != 2 || !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) { error = ENOTEMPTY; To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200110021935.f92JZAm59707>