From owner-freebsd-stable  Thu Aug 12 12:33:54 1999
Delivered-To: freebsd-stable@freebsd.org
Received: from rover.village.org (rover.village.org [204.144.255.49])
	by hub.freebsd.org (Postfix) with ESMTP id 08CC614F80
	for <stable@freebsd.org>; Thu, 12 Aug 1999 12:33:42 -0700 (PDT)
	(envelope-from imp@harmony.village.org)
Received: from harmony.village.org (harmony.village.org [10.0.0.6])
	by rover.village.org (8.9.3/8.9.3) with ESMTP id NAA24458
	for <stable@freebsd.org>; Thu, 12 Aug 1999 13:33:49 -0600 (MDT)
	(envelope-from imp@harmony.village.org)
Received: from harmony.village.org (localhost.village.org [127.0.0.1]) by harmony.village.org (8.9.3/8.8.3) with ESMTP id NAA05527 for <stable@freebsd.org>; Thu, 12 Aug 1999 13:33:42 -0600 (MDT)
Message-Id: <199908121933.NAA05527@harmony.village.org>
To: stable@freebsd.org
Subject: Kludge-o-round for 1 3.2 STABLE NFS problem
Date: Thu, 12 Aug 1999 13:33:41 -0600
From: Warner Losh <imp@village.org>
Sender: owner-freebsd-stable@FreeBSD.ORG
Precedence: bulk
X-Loop: FreeBSD.ORG


We were having all kinds of problems using NFS on locally mounted file
systems (that is, the client and server were on the same machine).  It
turns out that this can cause deadlock when large amounts of data are
processed (a simple cat /tmp/11M-file /local-nfa/tmp would cause it).
After working with Matt Dillon for a while, I came up with the
following kludge-o-round.  It messes with the very performance
sensitive getnewbuffer() routine in vfs_bio.c, and generally works
hard to prevent the deadlock from happening.  It isn't a suitable fix
for -stable, so I'm posting it here for people to use in the
interrum.  It only impacts locally mounted nfs file systems and note
remotely mounted ones.  The only reason that we hit this was due to
our heavy use of the automounter...

It also is relevant only for -stable.  Matt's changes in -current
(which can't easily be merged into -stable) make this deadlock
impossible.

I'm working with Matt to come up with a committable fix, but until
then I thought I'd post this for people experiencing hangs on their
-current systems with locally mounted file systems.  It is provided
as-is with no warrantee or support what so ever.

Warner

Index: kern/vfs_bio.c
===================================================================
RCS file: /base/FreeBSD-CVS/src/sys/kern/vfs_bio.c,v
retrieving revision 1.193.2.6
diff -u -r1.193.2.6 vfs_bio.c
--- vfs_bio.c	1999/08/09 00:48:50	1.193.2.6
+++ vfs_bio.c	1999/08/11 18:52:02
@@ -51,6 +51,8 @@
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
+/* XXX this is bogus, but needed */
+#include <sys/proc.h>
 
 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
 
@@ -760,8 +762,8 @@
 			--numdirtybuffers;
 			bp->b_flags &= ~B_DELWRI;
 		}
-		vfs_bio_need_satisfy();
 	}
+	vfs_bio_need_satisfy();
 
 	/* unlock */
 	bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
@@ -979,20 +981,65 @@
 	 * We keep the file I/O from hogging metadata I/O
 	 * This is desirable because file data is cached in the
 	 * VM/Buffer cache even if a buffer is freed.
+	 *
+	 * If the NFS server is causing this potential flush out, then don't
+	 * flush NFS buffers.  This avoids a deadlock situation where the
+	 * nfs server needs a buffer and indirectly causes the nfs client
+	 * to try to flush a buffer out, which never completes because the
+	 * nfs server is locking the receive queue.
 	 */
+queue_age:
 	if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) {
 #if !defined(MAX_PERF)
 		if (bp->b_qindex != QUEUE_AGE)
 			panic("getnewbuf: inconsistent AGE queue, qindex=%d",
 			    bp->b_qindex);
 #endif
-	} else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) {
+		if ((curproc->p_flag & P_NFSSERV) && 
+		    bp->b_vp->v_tag == VT_NFS && (bp->b_flags & B_DELWRI)) {
+			if (bp->b_usecount == 0) {
+				bp = NULL;
+				goto queue_lru;
+			}
+			bp->b_usecount--;
+			TAILQ_REMOVE(&bufqueues[QUEUE_AGE], bp, b_freelist);
+			if (TAILQ_FIRST(&bufqueues[QUEUE_AGE]) != NULL) {
+				TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp,
+				    b_freelist);
+				goto queue_age;
+			}
+			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp,
+			    b_freelist);
+			bp = NULL;
+		}
+	}
+queue_lru:
+	if (!bp && (bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) {
 #if !defined(MAX_PERF)
 		if (bp->b_qindex != QUEUE_LRU)
 			panic("getnewbuf: inconsistent LRU queue, qindex=%d",
 			    bp->b_qindex);
 #endif
+		if ((curproc->p_flag & P_NFSSERV) && 
+		    bp->b_vp->v_tag == VT_NFS && (bp->b_flags & B_DELWRI)) {
+			if (bp->b_usecount == 0) {
+				bp = NULL;
+				goto waitbuf;
+			}
+			bp->b_usecount--;
+			TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
+			if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
+				TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp,
+				    b_freelist);
+				bp = NULL;
+				goto queue_lru;
+			}
+			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, 
+			    b_freelist);
+			bp = NULL;
+		}
 	}
+waitbuf:	
 	if (!bp) {
 		/* wait for a free buffer of any kind */
 		needsbuffer |= VFS_BIO_NEED_ANY;
@@ -1022,7 +1069,6 @@
 			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 		}
 	}
-
 
 	/* if we are a delayed write, convert to an async write */
 	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
Index: nfs/nfs_syscalls.c
===================================================================
RCS file: /base/FreeBSD-CVS/src/sys/nfs/nfs_syscalls.c,v
retrieving revision 1.44.2.1
diff -u -r1.44.2.1 nfs_syscalls.c
--- nfs_syscalls.c	1999/06/30 22:05:18	1.44.2.1
+++ nfs_syscalls.c	1999/08/11 16:21:58
@@ -483,6 +483,7 @@
 		bzero((caddr_t)nfsd, sizeof (struct nfsd));
 		s = splnet();
 		nfsd->nfsd_procp = p;
+		p->p_flag |= P_NFSSERV;
 		TAILQ_INSERT_TAIL(&nfsd_head, nfsd, nfsd_chain);
 		nfs_numnfsd++;
 	} else
Index: sys/proc.h
===================================================================
RCS file: /base/FreeBSD-CVS/src/sys/sys/proc.h,v
retrieving revision 1.66.2.4
diff -u -r1.66.2.4 proc.h
--- proc.h	1999/05/14 06:32:41	1.66.2.4
+++ proc.h	1999/08/11 16:21:41
@@ -268,6 +268,7 @@
 
 #define	P_NOCLDWAIT	0x400000 /* No zombies if child dies */
 
+#define P_NFSSERV	0x2000000	/* Stamp NFS servers */
 
 /*
  * MOVE TO ucred.h?


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-stable" in the body of the message