Date: Fri, 22 Jul 2011 08:55:10 -0400 From: John Baldwin <jhb@freebsd.org> To: freebsd-net@freebsd.org Cc: Jeremiah Lott <jlott@averesystems.com>, rmacklem@freebsd.org Subject: Re: LOR with nfsclient "sillyrename" Message-ID: <201107220855.10774.jhb@freebsd.org> In-Reply-To: <44626428-CF14-4B20-AB57-6D4E8F4678AE@averesystems.com> References: <44626428-CF14-4B20-AB57-6D4E8F4678AE@averesystems.com>
next in thread | previous in thread | raw e-mail | index | archive | help
On Thursday, July 21, 2011 4:19:59 pm Jeremiah Lott wrote: > We're seeing nfsclient deadlocks with what looks like lock order reversal after removing a "silly rename". It is fairly rare, but we've seen it happen a few times. I included relevant back traces from an occurrence. From what I can see, nfs_inactive() is called with the vnode locked. If there is a silly-rename, it will call vrele() on its parent directory, which can potentially try to lock the parent directory. Since this is the opposite order of the lock acquisition in lookup, it can deadlock. This happened in a FreeBSD7 build, but I looked through freebsd head and didn't see any change that addressed this. Anyone seen this before? I haven't seen this before, but your analysis looks correct to me. Perhaps the best fix would be to defer the actual freeing of the sillyrename to an asynchronous task? Maybe something like this (untested, uncompiled): Index: nfsclient/nfsnode.h =================================================================== --- nfsclient/nfsnode.h (revision 224254) +++ nfsclient/nfsnode.h (working copy) @@ -36,6 +36,7 @@ #ifndef _NFSCLIENT_NFSNODE_H_ #define _NFSCLIENT_NFSNODE_H_ +#include <sys/_task.h> #if !defined(_NFSCLIENT_NFS_H_) && !defined(_KERNEL) #include <nfs/nfs.h> #endif @@ -45,8 +46,10 @@ * can be removed by nfs_inactive() */ struct sillyrename { + struct task s_task; struct ucred *s_cred; struct vnode *s_dvp; + struct vnode *s_vp; int (*s_removeit)(struct sillyrename *sp); long s_namlen; char s_name[32]; Index: nfsclient/nfs_vnops.c =================================================================== --- nfsclient/nfs_vnops.c (revision 224254) +++ nfsclient/nfs_vnops.c (working copy) @@ -1757,7 +1757,6 @@ { /* * Make sure that the directory vnode is still valid. - * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); @@ -2754,8 +2753,10 @@ M_NFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; + sp->s_vp = vp; sp->s_removeit = nfs_removeit; VREF(dvp); + vhold(vp); /* * Fudge together a funny name. Index: nfsclient/nfs_node.c =================================================================== --- nfsclient/nfs_node.c (revision 224254) +++ nfsclient/nfs_node.c (working copy) @@ -47,6 +47,7 @@ #include <sys/proc.h> #include <sys/socket.h> #include <sys/sysctl.h> +#include <sys/taskqueue.h> #include <sys/vnode.h> #include <vm/uma.h> @@ -185,6 +186,26 @@ return (0); } +static void +nfs_freesillyrename(void *arg, int pending) +{ + struct sillyrename *sp; + + sp = arg; + vn_lock(sp->s_dvp, LK_SHARED | LK_RETRY); + vn_lock(sp->s_vp, LK_EXCLUSIVE | LK_RETRY); + (void)nfs_vinvalbuf(ap->a_vp, 0, td, 1); + /* + * Remove the silly file that was rename'd earlier + */ + (sp->s_removeit)(sp); + crfree(sp->s_cred); + vput(sp->s_dvp); + VOP_UNLOCK(sp->s_vp, 0); + vdrop(sp->s_vp); + free((caddr_t)sp, M_NFSREQ); +} + int nfs_inactive(struct vop_inactive_args *ap) { @@ -200,15 +221,9 @@ } else sp = NULL; if (sp) { + TASK_INIT(&sp->task, 0, nfs_freesillyrename, sp); + taskqueue_enqueue(taskqueue_thread, &sp->task); mtx_unlock(&np->n_mtx); - (void)nfs_vinvalbuf(ap->a_vp, 0, td, 1); - /* - * Remove the silly file that was rename'd earlier - */ - (sp->s_removeit)(sp); - crfree(sp->s_cred); - vrele(sp->s_dvp); - free((caddr_t)sp, M_NFSREQ); mtx_lock(&np->n_mtx); } np->n_flag &= NMODIFIED; -- John Baldwin
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201107220855.10774.jhb>