Date: Wed, 24 Sep 2014 13:27:58 +0300 From: Konstantin Belousov <kostikbel@gmail.com> To: Bryan Drewery <bdrewery@FreeBSD.org> Cc: FreeBSD FS <freebsd-fs@FreeBSD.org> Subject: Re: Deadlock with umount -f involving tmpfs on top of ZFS on r271170 Message-ID: <20140924102758.GH8870@kib.kiev.ua> In-Reply-To: <5422240F.4080003@FreeBSD.org> References: <5420D5FC.4030600@FreeBSD.org> <20140923131244.GC8870@kib.kiev.ua> <5422240F.4080003@FreeBSD.org>
next in thread | previous in thread | raw e-mail | index | archive | help
On Tue, Sep 23, 2014 at 08:53:19PM -0500, Bryan Drewery wrote: > I tried your patch and still ran into the deadlock. Here is the debug > information: https://people.freebsd.org/~bdrewery/vfs_deadlock.2.txt I see what is going on. Previous patch cannot help there. The problem is that kern_linkat() starts write with vn_start_write(9), and then tries to execute namei(9) with a path potentially crossing the mount point. If the mount point is being unmounted, unmount cannot lay suspension on the filesystem due to writer, but it owns the covered vnode lock. On the other hand, namei(9) is unable to cross the mount point since covered vnode is locked, thus writer does not make progress. The similar issue exists in rename code. The deadlock is only possible for filesystems which suspend writes on unmount; such fs are UFS and tmpfs. Try this. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index b3b7ed5..9775480 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1551,10 +1551,10 @@ kern_linkat(struct thread *td, int fd1, int fd2, char *path1, char *path2, cap_rights_t rights; int error; +again: bwillwrite(); NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); -again: if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1563,14 +1563,11 @@ again: vrele(vp); return (EPERM); /* POSIX */ } - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { - vrele(vp); - return (error); - } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else @@ -1587,26 +1584,41 @@ again: error = EXDEV; else error = can_hardlink(vp, td->td_ucred); - if (error == 0) #ifdef MAC + if (error == 0) error = mac_vnode_check_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); - if (error == 0) #endif - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + if (error != 0) { + vput(vp); + vput(nd.ni_dvp); + NDFREE(&nd, NDF_ONLY_PNBUF); + return (error); + } + error = vn_start_write(vp, &mp, V_NOWAIT); + if (error != 0) { + vput(vp); + vput(nd.ni_dvp); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_start_write(NULL, &mp, + V_XSLEEP | PCATCH); + if (error != 0) + return (error); + goto again; + } + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0); vput(nd.ni_dvp); + vn_finished_write(mp); + NDFREE(&nd, NDF_ONLY_PNBUF); } else { vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); - vn_finished_write(mp); goto again; } - NDFREE(&nd, NDF_ONLY_PNBUF); } vrele(vp); - vn_finished_write(mp); return (error); } @@ -3519,6 +3531,7 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, cap_rights_t rights; int error; +again: bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | @@ -3539,14 +3552,6 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, VOP_UNLOCK(fromnd.ni_vp, 0); #endif fvp = fromnd.ni_vp; - if (error == 0) - error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); - if (error != 0) { - NDFREE(&fromnd, NDF_ONLY_PNBUF); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out1; - } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2, pathseg, new, newfd, cap_rights_init(&rights, CAP_LINKAT), td); @@ -3559,11 +3564,28 @@ kern_renameat(struct thread *td, int oldfd, char *old, int newfd, char *new, NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); - vn_finished_write(mp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; + error = vn_start_write(fvp, &mp, V_NOWAIT); + if (error != 0) { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + if (tvp != NULL) + vput(tvp); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + vrele(fromnd.ni_dvp); + vrele(fvp); + vrele(tond.ni_startdir); + error = vn_start_write(NULL, &mp, V_WAIT | PCATCH); + if (error != 0) + return (error); + goto again; + } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20140924102758.GH8870>