From owner-svn-src-all@FreeBSD.ORG Thu Sep 25 20:42:26 2014 Return-Path: Delivered-To: svn-src-all@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTPS id 413B2DF9; Thu, 25 Sep 2014 20:42:26 +0000 (UTC) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 2108BD76; Thu, 25 Sep 2014 20:42:26 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.9/8.14.9) with ESMTP id s8PKgQJp087365; Thu, 25 Sep 2014 20:42:26 GMT (envelope-from kib@FreeBSD.org) Received: (from kib@localhost) by svn.freebsd.org (8.14.9/8.14.9/Submit) id s8PKgPQp087364; Thu, 25 Sep 2014 20:42:25 GMT (envelope-from kib@FreeBSD.org) Message-Id: <201409252042.s8PKgPQp087364@svn.freebsd.org> X-Authentication-Warning: svn.freebsd.org: kib set sender to kib@FreeBSD.org using -f From: Konstantin Belousov Date: Thu, 25 Sep 2014 20:42:25 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r272130 - head/sys/kern X-SVN-Group: head MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.18-1 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 25 Sep 2014 20:42:26 -0000 Author: kib Date: Thu Sep 25 20:42:25 2014 New Revision: 272130 URL: http://svnweb.freebsd.org/changeset/base/272130 Log: In kern_linkat() and kern_renameat(), do not call namei(9) while holding a write reference on the filesystem. Try to get write reference in unblocked way after all vnodes are resolved; if failed, drop all locks and retry after waiting for suspension end. The VFS_UNMOUNT() methods for UFS and tmpfs try to establish suspension on unmount, while covered vnode is locked by VFS, which prevents namei() from stepping over the mount point. The thread doing namei() sleeps on the covered vnode lock, owning the write ref. Reported by: bdrewery Tested by: bdrewery (previous version), pho Sponsored by: The FreeBSD Foundation MFC after: 1 week Modified: head/sys/kern/vfs_syscalls.c Modified: head/sys/kern/vfs_syscalls.c ============================================================================== --- head/sys/kern/vfs_syscalls.c Thu Sep 25 20:40:24 2014 (r272129) +++ head/sys/kern/vfs_syscalls.c Thu Sep 25 20:42:25 2014 (r272130) @@ -1551,10 +1551,10 @@ kern_linkat(struct thread *td, int fd1, cap_rights_t rights; int error; +again: bwillwrite(); NDINIT_AT(&nd, LOOKUP, follow | AUDITVNODE1, segflg, path1, fd1, td); -again: if ((error = namei(&nd)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1563,50 +1563,65 @@ again: vrele(vp); return (EPERM); /* POSIX */ } - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { - vrele(vp); - return (error); - } NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE2, segflg, path2, fd2, cap_rights_init(&rights, CAP_LINKAT), td); if ((error = namei(&nd)) == 0) { if (nd.ni_vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_dvp == nd.ni_vp) vrele(nd.ni_dvp); else vput(nd.ni_dvp); vrele(nd.ni_vp); - error = EEXIST; - } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { + vrele(vp); + return (EEXIST); + } else if (nd.ni_dvp->v_mount != vp->v_mount) { /* - * Check for cross-device links. No need to - * recheck vp->v_type, since it cannot change - * for non-doomed vnode. + * Cross-device link. No need to recheck + * vp->v_type, since it cannot change, except + * to VBAD. */ - if (nd.ni_dvp->v_mount != vp->v_mount) - error = EXDEV; - else - error = can_hardlink(vp, td->td_ucred); - if (error == 0) + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vrele(vp); + return (EXDEV); + } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { + error = can_hardlink(vp, td->td_ucred); #ifdef MAC + if (error == 0) error = mac_vnode_check_link(td->td_ucred, nd.ni_dvp, vp, &nd.ni_cnd); - if (error == 0) #endif - error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + if (error != 0) { + vput(vp); + vput(nd.ni_dvp); + NDFREE(&nd, NDF_ONLY_PNBUF); + return (error); + } + error = vn_start_write(vp, &mp, V_NOWAIT); + if (error != 0) { + vput(vp); + vput(nd.ni_dvp); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_start_write(NULL, &mp, + V_XSLEEP | PCATCH); + if (error != 0) + return (error); + goto again; + } + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); VOP_UNLOCK(vp, 0); vput(nd.ni_dvp); + vn_finished_write(mp); + NDFREE(&nd, NDF_ONLY_PNBUF); } else { vput(nd.ni_dvp); NDFREE(&nd, NDF_ONLY_PNBUF); vrele(vp); - vn_finished_write(mp); goto again; } - NDFREE(&nd, NDF_ONLY_PNBUF); } vrele(vp); - vn_finished_write(mp); return (error); } @@ -3519,6 +3534,7 @@ kern_renameat(struct thread *td, int old cap_rights_t rights; int error; +again: bwillwrite(); #ifdef MAC NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | @@ -3539,14 +3555,6 @@ kern_renameat(struct thread *td, int old VOP_UNLOCK(fromnd.ni_vp, 0); #endif fvp = fromnd.ni_vp; - if (error == 0) - error = vn_start_write(fvp, &mp, V_WAIT | PCATCH); - if (error != 0) { - NDFREE(&fromnd, NDF_ONLY_PNBUF); - vrele(fromnd.ni_dvp); - vrele(fvp); - goto out1; - } NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2, pathseg, new, newfd, cap_rights_init(&rights, CAP_LINKAT), td); @@ -3559,11 +3567,30 @@ kern_renameat(struct thread *td, int old NDFREE(&fromnd, NDF_ONLY_PNBUF); vrele(fromnd.ni_dvp); vrele(fvp); - vn_finished_write(mp); goto out1; } tdvp = tond.ni_dvp; tvp = tond.ni_vp; + error = vn_start_write(fvp, &mp, V_NOWAIT); + if (error != 0) { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + if (tvp != NULL) + vput(tvp); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + vrele(fromnd.ni_dvp); + vrele(fvp); + vrele(tond.ni_startdir); + if (fromnd.ni_startdir != NULL) + vrele(fromnd.ni_startdir); + error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); + if (error != 0) + return (error); + goto again; + } if (tvp != NULL) { if (fvp->v_type == VDIR && tvp->v_type != VDIR) { error = ENOTDIR;