Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 15 Aug 2004 10:53:33 +0200
From:      des@des.no (=?iso-8859-1?q?Dag-Erling_Sm=F8rgrav?=)
To:        Robert Watson <rwatson@freebsd.org>
Cc:        freebsd-current@freebsd.org
Subject:   Re: Witness panic in pseudofs_vncache
Message-ID:  <xzpfz6ohjea.fsf@dwp.des.no>
In-Reply-To: <Pine.NEB.3.96L.1040813111746.73100B-100000@fledge.watson.org> (Robert Watson's message of "Fri, 13 Aug 2004 11:20:00 -0400 (EDT)")
References:  <Pine.NEB.3.96L.1040813111746.73100B-100000@fledge.watson.org>

next in thread | previous in thread | raw e-mail | index | archive | help
--=-=-=
Content-Type: text/plain; charset=iso-8859-1
Content-Transfer-Encoding: quoted-printable

Robert Watson <rwatson@freebsd.org> writes:
> This appears to be a bug in pseudofs due to calling vgone() while holding
> the pfs_vncache_mutex, as vgone() can sleep due to acquiring lockmgr
> locks.  The fix is likely to make this looping even less efficient by
> dropping the mutex before calling vgone() on a vnode, and then restarting.
> I've CC'd DES because this is his baby.

What do you think of the attached patch?

DES
--=20
Dag-Erling Sm=F8rgrav - des@des.no


--=-=-=
Content-Type: text/x-patch
Content-Disposition: attachment; filename=pseudofs_vncache.diff

Index: sys/fs/pseudofs/pseudofs_vncache.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/pseudofs/pseudofs_vncache.c,v
retrieving revision 1.25
diff -u -r1.25 pseudofs_vncache.c
--- sys/fs/pseudofs/pseudofs_vncache.c	14 Mar 2004 15:57:45 -0000	1.25
+++ sys/fs/pseudofs/pseudofs_vncache.c	15 Aug 2004 08:49:21 -0000
@@ -80,8 +80,7 @@
 void
 pfs_vncache_load(void)
 {
-	mtx_init(&pfs_vncache_mutex, "pseudofs_vncache", NULL,
-	    MTX_DEF | MTX_RECURSE);
+	mtx_init(&pfs_vncache_mutex, "pseudofs_vncache", NULL, MTX_DEF);
 	pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
@@ -220,24 +219,35 @@
 static void
 pfs_exit(void *arg, struct proc *p)
 {
-	struct pfs_vdata *pvd, *prev;
+	struct pfs_vdata *pvd;
+	struct vnode *vnp;
 
 	mtx_lock(&Giant);
-	mtx_lock(&pfs_vncache_mutex);
 	/*
-	 * The double loop is necessary because vgone() indirectly
-	 * calls pfs_vncache_free() which frees pvd, so we have to
-	 * backtrace one step every time we free a vnode.
+	 * This is extremely inefficient due to the fact that vgone() not
+	 * only indirectly modifies the vnode cache, but may also sleep.
+	 * We can neither hold pfs_vncache_mutex across a vgone() call,
+	 * nor make any assumptions about the state of the cache after
+	 * vgone() returns.  In consequence, we must start over after
+	 * every vgone() call, and keep trying until we manage to traverse
+	 * the entire cache.
+	 *
+	 * The only way to improve this situation is to change the data
+	 * structure used to implement the cache.  An obvious choice in
+	 * this particular case would be a BST sorted by PID.
 	 */
-	/* XXX linear search... not very efficient */
-	for (pvd = pfs_vncache; pvd != NULL; pvd = pvd->pvd_next) {
-		while (pvd != NULL && pvd->pvd_pid == p->p_pid) {
-			prev = pvd->pvd_prev;
-			vgone(pvd->pvd_vnode);
-			pvd = prev ? prev->pvd_next : pfs_vncache;
+	mtx_lock(&pfs_vncache_mutex);
+	pvd = pfs_vncache;
+	while (pvd != NULL) {
+		if (pvd->pvd_pid == p->p_pid) {
+			vnp = pvd->pvd_vnode;
+			mtx_unlock(&pfs_vncache_mutex);
+			vgone(vnp);
+			mtx_lock(&pfs_vncache_mutex);
+			pvd = pfs_vncache;
+		} else {
+			pvd = pvd->pvd_next;
 		}
-		if (pvd == NULL)
-			break;
 	}
 	mtx_unlock(&pfs_vncache_mutex);
 	mtx_unlock(&Giant);
@@ -249,22 +259,25 @@
 int
 pfs_disable(struct pfs_node *pn)
 {
-	struct pfs_vdata *pvd, *prev;
+	struct pfs_vdata *pvd;
+	struct vnode *vnp;
 
 	if (pn->pn_flags & PFS_DISABLED)
 		return (0);
-	mtx_lock(&pfs_vncache_mutex);
 	pn->pn_flags |= PFS_DISABLED;
-	/* see the comment about the double loop in pfs_exit() */
-	/* XXX linear search... not very efficient */
-	for (pvd = pfs_vncache; pvd != NULL; pvd = pvd->pvd_next) {
-		while (pvd != NULL && pvd->pvd_pn == pn) {
-			prev = pvd->pvd_prev;
-			vgone(pvd->pvd_vnode);
-			pvd = prev ? prev->pvd_next : pfs_vncache;
+	/* XXX see comment above nearly identical code in pfs_exit() */
+	mtx_lock(&pfs_vncache_mutex);
+	pvd = pfs_vncache;
+	while (pvd != NULL) {
+		if (pvd->pvd_pn == pn) {
+			vnp = pvd->pvd_vnode;
+			mtx_unlock(&pfs_vncache_mutex);
+			vgone(vnp);
+			mtx_lock(&pfs_vncache_mutex);
+			pvd = pfs_vncache;
+		} else {
+			pvd = pvd->pvd_next;
 		}
-		if (pvd == NULL)
-			break;
 	}
 	mtx_unlock(&pfs_vncache_mutex);
 	return (0);

--=-=-=--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?xzpfz6ohjea.fsf>