Date: Wed, 24 Sep 2014 18:57:28 +0300 From: Konstantin Belousov <kostikbel@gmail.com> To: Peter Holm <peter@holm.cc> Cc: FreeBSD FS <freebsd-fs@freebsd.org> Subject: Re: Deadlock with umount -f involving tmpfs on top of ZFS on r271170 Message-ID: <20140924155728.GN8870@kib.kiev.ua> In-Reply-To: <20140924153045.GA15685@x2.osted.lan> References: <5420D5FC.4030600@FreeBSD.org> <20140923131244.GC8870@kib.kiev.ua> <5422240F.4080003@FreeBSD.org> <20140924102758.GH8870@kib.kiev.ua> <20140924132605.GA11772@x2.osted.lan> <20140924134725.GI8870@kib.kiev.ua> <20140924153045.GA15685@x2.osted.lan>
next in thread | previous in thread | raw e-mail | index | archive | help
On Wed, Sep 24, 2014 at 05:30:45PM +0200, Peter Holm wrote: > On Wed, Sep 24, 2014 at 04:47:25PM +0300, Konstantin Belousov wrote: > > On Wed, Sep 24, 2014 at 03:26:05PM +0200, Peter Holm wrote: > > > The patch is an improvement, but: > > > > > > http://people.freebsd.org/~pho/stress/log/kostik718.txt > > > > Does you load included both rename and link, or only one of those > > syscalls ? I see a bug in the rename part of the patch, below is > > the update. > > > > Both. I have split the tests in two now. Uptime is by now one hour. > I'll let that run for a few hours more, before switching to random > tests. > > I did get this page fault once: > http://people.freebsd.org/~pho/stress/log/kostik719.txt > but I guess it's unrelated? I have recompiled uma_core.c and > vm_pageout with "-O0" in case it shows up again. This looks unrelated. But, in the log, I see user-controllable LOR caused by my patch. Please use the following update instead. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index b3b7ed5..a4aa19e 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1551,10 +1551,10 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, cap_rights_t rights; int error; +again: bwillwrite(); NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); -again: if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1563,50 +1563,65 @@ again: vrele(vp); return (EPERM); /* POSIX */ } - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { - vrele(vp); - return (error); - } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); - error = EEXIST; - } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { + vrele(vp); + return (EEXIST); + } else if (nd.ni_dvp->v_mount != vp->v_mount) { /* - * Check for cross-device links. No need to - * recheck vp->v_type, since it cannot change - * for non-doomed vnode. + * Cross-device link. No need to recheck + * vp->v_type, since it cannot change, except + * to VBAD. */ - if (nd.ni_dvp->v_mount != vp->v_mount) - error = EXDEV; - else - error = can_hardlink(vp, td->td_ucred); - if (error == 0) + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vrele(vp); + return (EXDEV); + } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { + error = can_hardlink(vp, td->td_ucred); #ifdef MAC + if (error == 0) error = mac_vnode_check_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); - if (error == 0) #endif - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + if (error != 0) { + vput(vp); + vput(nd.ni_dvp); + NDFREE(&nd, NDF_ONLY_PNBUF); + return (error); + } + error = vn_start_write(vp, &mp, V_NOWAIT); + if (error != 0) { + vput(vp); + vput(nd.ni_dvp); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_start_write(NULL, &mp, + V_XSLEEP | PCATCH); + if (error != 0) + return (error); + goto again; + } + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0); vput(nd.ni_dvp); + vn_finished_write(mp); + NDFREE(&nd, NDF_ONLY_PNBUF); } else { vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); - vn_finished_write(mp); goto again; } - NDFREE(&nd, NDF_ONLY_PNBUF); } vrele(vp); - vn_finished_write(mp); return (error); } @@ -3519,6 +3534,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, cap_rights_t rights; int error; +again: bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | @@ -3539,14 +3555,6 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, VOP_UNLOCK(fromnd.ni_vp, 0); #endif fvp = fromnd.ni_vp; - if (error == 0) - error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); - if (error != 0) { - NDFREE(&fromnd, NDF_ONLY_PNBUF); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out1; - } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2, pathseg, new, newfd, cap_rights_init(&rights, CAP_LINKAT), td); @@ -3559,11 +3567,28 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); - vn_finished_write(mp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; + error = vn_start_write(fvp, &mp, V_NOWAIT); + if (error != 0) { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + if (tvp != NULL) + vput(tvp); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + vrele(fromnd.ni_dvp); + vrele(fvp); + vrele(tond.ni_startdir); + error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); + if (error != 0) + return (error); + goto again; + } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140924155728.GN8870>