From owner-freebsd-current@FreeBSD.ORG Sun Aug 15 08:53:44 2004 Return-Path: Delivered-To: freebsd-current@freebsd.org Received: from mx1.FreeBSD.org (mx1.freebsd.org [216.136.204.125]) by hub.freebsd.org (Postfix) with ESMTP id 786CF16A4CE; Sun, 15 Aug 2004 08:53:44 +0000 (GMT) Received: from smtp.des.no (flood.des.no [217.116.83.31]) by mx1.FreeBSD.org (Postfix) with ESMTP id 91F7E43D3F; Sun, 15 Aug 2004 08:53:43 +0000 (GMT) (envelope-from des@des.no) Received: by smtp.des.no (Pony Express, from userid 666) id 6EC1C530C; Sun, 15 Aug 2004 10:53:42 +0200 (CEST) Received: from dwp.des.no (des.no [80.203.228.37]) by smtp.des.no (Pony Express) with ESMTP id 7882D5308; Sun, 15 Aug 2004 10:53:33 +0200 (CEST) Received: by dwp.des.no (Postfix, from userid 2602) id 246AAB872; Sun, 15 Aug 2004 10:53:33 +0200 (CEST) To: Robert Watson References: From: des@des.no (=?iso-8859-1?q?Dag-Erling_Sm=F8rgrav?=) Date: Sun, 15 Aug 2004 10:53:33 +0200 In-Reply-To: (Robert Watson's message of "Fri, 13 Aug 2004 11:20:00 -0400 (EDT)") Message-ID: User-Agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (berkeley-unix) MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="=-=-=" X-Spam-Checker-Version: SpamAssassin 2.63 (2004-01-11) on flood.des.no X-Spam-Level: X-Spam-Status: No, hits=0.0 required=5.0 tests=AWL autolearn=no version=2.63 cc: Martin Blapp cc: freebsd-current@freebsd.org Subject: Re: Witness panic in pseudofs_vncache X-BeenThere: freebsd-current@freebsd.org X-Mailman-Version: 2.1.1 Precedence: list List-Id: Discussions about the use of FreeBSD-current List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sun, 15 Aug 2004 08:53:44 -0000 --=-=-= Content-Type: text/plain; charset=iso-8859-1 Content-Transfer-Encoding: quoted-printable Robert Watson writes: > This appears to be a bug in pseudofs due to calling vgone() while holding > the pfs_vncache_mutex, as vgone() can sleep due to acquiring lockmgr > locks. The fix is likely to make this looping even less efficient by > dropping the mutex before calling vgone() on a vnode, and then restarting. > I've CC'd DES because this is his baby. What do you think of the attached patch? DES --=20 Dag-Erling Sm=F8rgrav - des@des.no --=-=-= Content-Type: text/x-patch Content-Disposition: attachment; filename=pseudofs_vncache.diff Index: sys/fs/pseudofs/pseudofs_vncache.c =================================================================== RCS file: /home/ncvs/src/sys/fs/pseudofs/pseudofs_vncache.c,v retrieving revision 1.25 diff -u -r1.25 pseudofs_vncache.c --- sys/fs/pseudofs/pseudofs_vncache.c 14 Mar 2004 15:57:45 -0000 1.25 +++ sys/fs/pseudofs/pseudofs_vncache.c 15 Aug 2004 08:49:21 -0000 @@ -80,8 +80,7 @@ void pfs_vncache_load(void) { - mtx_init(&pfs_vncache_mutex, "pseudofs_vncache", NULL, - MTX_DEF | MTX_RECURSE); + mtx_init(&pfs_vncache_mutex, "pseudofs_vncache", NULL, MTX_DEF); pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL, EVENTHANDLER_PRI_ANY); } @@ -220,24 +219,35 @@ static void pfs_exit(void *arg, struct proc *p) { - struct pfs_vdata *pvd, *prev; + struct pfs_vdata *pvd; + struct vnode *vnp; mtx_lock(&Giant); - mtx_lock(&pfs_vncache_mutex); /* - * The double loop is necessary because vgone() indirectly - * calls pfs_vncache_free() which frees pvd, so we have to - * backtrace one step every time we free a vnode. + * This is extremely inefficient due to the fact that vgone() not + * only indirectly modifies the vnode cache, but may also sleep. + * We can neither hold pfs_vncache_mutex across a vgone() call, + * nor make any assumptions about the state of the cache after + * vgone() returns. In consequence, we must start over after + * every vgone() call, and keep trying until we manage to traverse + * the entire cache. + * + * The only way to improve this situation is to change the data + * structure used to implement the cache. An obvious choice in + * this particular case would be a BST sorted by PID. */ - /* XXX linear search... not very efficient */ - for (pvd = pfs_vncache; pvd != NULL; pvd = pvd->pvd_next) { - while (pvd != NULL && pvd->pvd_pid == p->p_pid) { - prev = pvd->pvd_prev; - vgone(pvd->pvd_vnode); - pvd = prev ? prev->pvd_next : pfs_vncache; + mtx_lock(&pfs_vncache_mutex); + pvd = pfs_vncache; + while (pvd != NULL) { + if (pvd->pvd_pid == p->p_pid) { + vnp = pvd->pvd_vnode; + mtx_unlock(&pfs_vncache_mutex); + vgone(vnp); + mtx_lock(&pfs_vncache_mutex); + pvd = pfs_vncache; + } else { + pvd = pvd->pvd_next; } - if (pvd == NULL) - break; } mtx_unlock(&pfs_vncache_mutex); mtx_unlock(&Giant); @@ -249,22 +259,25 @@ int pfs_disable(struct pfs_node *pn) { - struct pfs_vdata *pvd, *prev; + struct pfs_vdata *pvd; + struct vnode *vnp; if (pn->pn_flags & PFS_DISABLED) return (0); - mtx_lock(&pfs_vncache_mutex); pn->pn_flags |= PFS_DISABLED; - /* see the comment about the double loop in pfs_exit() */ - /* XXX linear search... not very efficient */ - for (pvd = pfs_vncache; pvd != NULL; pvd = pvd->pvd_next) { - while (pvd != NULL && pvd->pvd_pn == pn) { - prev = pvd->pvd_prev; - vgone(pvd->pvd_vnode); - pvd = prev ? prev->pvd_next : pfs_vncache; + /* XXX see comment above nearly identical code in pfs_exit() */ + mtx_lock(&pfs_vncache_mutex); + pvd = pfs_vncache; + while (pvd != NULL) { + if (pvd->pvd_pn == pn) { + vnp = pvd->pvd_vnode; + mtx_unlock(&pfs_vncache_mutex); + vgone(vnp); + mtx_lock(&pfs_vncache_mutex); + pvd = pfs_vncache; + } else { + pvd = pvd->pvd_next; } - if (pvd == NULL) - break; } mtx_unlock(&pfs_vncache_mutex); return (0); --=-=-=--