From owner-freebsd-current  Mon Jul 23  9:12:27 2001
Delivered-To: freebsd-current@freebsd.org
Received: from salmon.maths.tcd.ie (salmon.maths.tcd.ie [134.226.81.11])
	by hub.freebsd.org (Postfix) with SMTP
	id E9A8037B407; Mon, 23 Jul 2001 09:12:14 -0700 (PDT)
	(envelope-from iedowse@maths.tcd.ie)
Received: from walton.maths.tcd.ie by salmon.maths.tcd.ie with SMTP
          id <aa22684@salmon>; 23 Jul 2001 17:12:13 +0100 (BST)
To: Maxim Sobolev <sobomax@FreeBSD.org>
Cc: current@FreeBSD.org
Subject: Re: NFS client unable to recover from server crash 
In-Reply-To: Your message of "Mon, 23 Jul 2001 18:53:05 +0300."
             <200107231554.f6NFsLk65848@vega.vega.com> 
Date: Mon, 23 Jul 2001 17:12:13 +0100
From: Ian Dowse <iedowse@maths.tcd.ie>
Message-ID:  <200107231712.aa22684@salmon.maths.tcd.ie>
Sender: owner-freebsd-current@FreeBSD.ORG
Precedence: bulk
List-ID: <freebsd-current.FreeBSD.ORG>
List-Archive: <http://docs.freebsd.org/mail/> (Web Archive)
List-Help: <mailto:majordomo@FreeBSD.ORG?subject=help> (List Instructions)
List-Subscribe: <mailto:majordomo@FreeBSD.ORG?subject=subscribe%20freebsd-current>
List-Unsubscribe: <mailto:majordomo@FreeBSD.ORG?subject=unsubscribe%20freebsd-current>
X-Loop: FreeBSD.ORG

In message <200107231554.f6NFsLk65848@vega.vega.com>, Maxim Sobolev writes:
>I found that after introduction of the new RPC NFS client is no longer
>able to recover from server crash (both cluent and server are 5-CURRENT
>systems). After a well known `nfs server not responding' message, client
>hangs and even though server comes back in a minute or two it doesn't
>recover and just sits in this state forvewer. All unmount requests gets
>stuck in the kernel, so as a processes that accessing files from that
>mount point. This doesn't looks like a right thing and obviously should
>be fixed before 5.0-RELEASE.

I've seen some similar effects, but I don't think it has anything
to do with the new RPC code, as that only runs at mount time. It
would be useful if you could use tcpdump to see if any requests
are being transmitted, and if they are getting responses. Also
try running kdgb on the client to get a kernel backtrace of the
stuck processes.

Is this a UDP or TCP based mount?

If you are feeling brave, you could also try the patch below. It
is a selection of changes to the kernel NFS code that I have built
up over the last few months. I don't think it could solve the hangs,
but it should improve the chance of interruptible mounts accepting
^C while waiting, and (just added the other day) umount -f should
work while the server is down even if processes are hung.

Ian


Index: nfs.h
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs.h,v
retrieving revision 1.59
diff -u -r1.59 nfs.h
--- nfs.h	2001/04/17 20:45:21	1.59
+++ nfs.h	2001/07/20 13:19:51
@@ -633,6 +633,7 @@
 			      struct mbuf *));
 int	nfs_adv __P((struct mbuf **, caddr_t *, int, int));
 void	nfs_nhinit __P((void));
+void	nfs_nmcancelreqs __P((struct nfsmount *));
 void	nfs_timer __P((void*));
 int	nfsrv_dorec __P((struct nfssvc_sock *, struct nfsd *, 
 			 struct nfsrv_descript **));
Index: nfs_nqlease.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_nqlease.c,v
retrieving revision 1.59
diff -u -r1.59 nfs_nqlease.c
--- nfs_nqlease.c	2001/05/01 08:13:14	1.59
+++ nfs_nqlease.c	2001/05/01 14:29:22
@@ -952,7 +952,9 @@
 }
 
 /*
- * Called for client side callbacks
+ * Called for client side callbacks.
+ * NB: We are responsible for freeing `mrep' in all cases, but note
+ * that anything that does a 'goto nfsmout' frees it for us.
  */
 int
 nqnfs_callback(nmp, mrep, md, dpos)
@@ -982,8 +984,10 @@
 	nfsd->nd_md = md;
 	nfsd->nd_dpos = dpos;
 	error = nfs_getreq(nfsd, &tnfsd, FALSE);
-	if (error)
+	if (error) {
+		m_freem(mrep);
 		return (error);
+	}
 	md = nfsd->nd_md;
 	dpos = nfsd->nd_dpos;
 	if (nfsd->nd_procnum != NQNFSPROC_EVICTED) {
Index: nfs_socket.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_socket.c,v
retrieving revision 1.66
diff -u -r1.66 nfs_socket.c
--- nfs_socket.c	2001/05/01 08:13:14	1.66
+++ nfs_socket.c	2001/07/20 13:45:01
@@ -144,7 +144,8 @@
  */
 #define	NFS_CWNDSCALE	256
 #define	NFS_MAXCWND	(NFS_CWNDSCALE * 32)
-static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+#define NFS_NBACKOFF	8
+static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, };
 int nfsrtton = 0;
 struct nfsrtt nfsrtt;
 struct callout_handle	nfs_timer_handle;
@@ -299,11 +300,17 @@
 		splx(s);
 	}
 	if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
-		so->so_rcv.sb_timeo = (5 * hz);
-		so->so_snd.sb_timeo = (5 * hz);
+		so->so_rcv.sb_timeo = (2 * hz);
+		so->so_snd.sb_timeo = (2 * hz);
 	} else {
-		so->so_rcv.sb_timeo = 0;
-		so->so_snd.sb_timeo = 0;
+		/*
+		 * We would normally set the timeouts to 0 (never time out)
+		 * for non-interruptible mounts. However, nfs_nmcancelreqs()
+		 * can still prematurely terminate requests, so avoid
+		 * waiting forever.
+		 */
+		so->so_rcv.sb_timeo = 10 * hz;
+		so->so_snd.sb_timeo = 10 * hz;
 	}
 
 	/*
@@ -1400,10 +1407,18 @@
 	for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) {
 		nmp = rep->r_nmp;
 		if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
-			continue;
-		if (nfs_sigintr(nmp, rep, rep->r_procp)) {
-			nfs_softterm(rep);
 			continue;
+		/*
+		 * Test for signals on interruptible mounts. We try to
+		 * maintain normal (uninterruptible) semantics while the
+		 * server is up, but respond quickly to signals when it
+		 * is down.
+		 */
+		if (nmp->nm_timeouts >= NFS_NBACKOFF / 2) {
+			if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+				nfs_softterm(rep);
+				continue;
+			}
 		}
 		if (rep->r_rtt >= 0) {
 			rep->r_rtt++;
@@ -1415,7 +1430,7 @@
 				timeo *= nfs_backoff[nmp->nm_timeouts - 1];
 			if (rep->r_rtt <= timeo)
 				continue;
-			if (nmp->nm_timeouts < 8)
+			if (nmp->nm_timeouts < NFS_NBACKOFF)
 				nmp->nm_timeouts++;
 		}
 		/*
@@ -1438,8 +1453,6 @@
 				rep->r_rexmit = NFS_MAXREXMIT;
 			continue;
 		}
-		if ((so = nmp->nm_so) == NULL)
-			continue;
 
 		/*
 		 * If there is enough space and the window allows..
@@ -1447,6 +1460,8 @@
 		 * Set r_rtt to -1 in case we fail to send it now.
 		 */
 		rep->r_rtt = -1;
+		if ((so = nmp->nm_so) == NULL)
+			continue;
 		if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
 		   ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
 		    (rep->r_flags & R_SENT) ||
@@ -1510,6 +1525,27 @@
 }
 
 /*
+ * Mark all outstanding requests pertaining to a nfs mount with R_SOFTTERM.
+ * This is used by forced unmounts to terminate any outstanding RPCs.
+ */
+void
+nfs_nmcancelreqs(nmp)
+	struct nfsmount *nmp;
+{
+	struct nfsreq *req;
+	int s;
+
+	s = splnet();
+	for (req = nfs_reqq.tqh_first; req != 0; req = req->r_chain.tqe_next) {
+		if (nmp != req->r_nmp || req->r_mrep != NULL ||
+		    (req->r_flags & R_SOFTTERM))
+			continue;
+		nfs_softterm(req);
+	}
+	splx(s);
+}
+
+/*
  * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT).
  * The nm_send count is decremented now to avoid deadlocks when the process in
  * soreceive() hasn't yet managed to send its own request.
@@ -1576,7 +1612,7 @@
 	} else
 		p = (struct proc *)0;
 	while (*statep & NFSSTA_SNDLOCK) {
-		if (nfs_sigintr(rep->r_nmp, rep, p))
+		if (rep != NULL && (rep->r_flags & R_SOFTTERM))
 			return (EINTR);
 		*statep |= NFSSTA_WANTSND;
 		(void) tsleep((caddr_t)statep, slpflag | (PZERO - 1),
@@ -1620,7 +1656,7 @@
 	else
 		slpflag = 0;
 	while (*statep & NFSSTA_RCVLOCK) {
-		if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+		if (rep != NULL && (rep->r_flags & R_SOFTTERM))
 			return (EINTR);
 		*statep |= NFSSTA_WANTRCV;
 		(void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk",
@@ -1638,6 +1674,9 @@
 			slptimeo = 2 * hz;
 		}
 	}
+	/* Always fail if our request has been cancelled. */
+	if (rep != NULL && (rep->r_flags & R_SOFTTERM))
+		return (EINTR);
 	*statep |= NFSSTA_RCVLOCK;
 	return (0);
 }
Index: nfs_subs.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_subs.c,v
retrieving revision 1.103
diff -u -r1.103 nfs_subs.c
--- nfs_subs.c	2001/07/04 16:20:16	1.103
+++ nfs_subs.c	2001/07/10 21:46:16
@@ -1120,7 +1120,7 @@
 	nfs_true = txdr_unsigned(TRUE);
 	nfs_false = txdr_unsigned(FALSE);
 	nfs_xdrneg1 = txdr_unsigned(-1);
-	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
+	nfs_ticks = (hz * NFS_TICKINTVL + 999) / 1000;
 	if (nfs_ticks < 1)
 		nfs_ticks = 1;
 	/* Ensure async daemons disabled */
Index: nfs_vfsops.c
===================================================================
RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_vfsops.c,v
retrieving revision 1.100
diff -u -r1.100 nfs_vfsops.c
--- nfs_vfsops.c	2001/06/28 04:10:07	1.100
+++ nfs_vfsops.c	2001/07/20 13:47:21
@@ -624,7 +624,7 @@
 	splx(s);
 
 	if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
-		nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+		nmp->nm_timeo = (argp->timeo * NFS_HZ + 9) / 10;
 		if (nmp->nm_timeo < NFS_MINTIMEO)
 			nmp->nm_timeo = NFS_MINTIMEO;
 		else if (nmp->nm_timeo > NFS_MAXTIMEO)
@@ -970,6 +970,10 @@
 	nmp->nm_state |= NFSSTA_DISMINPROG;
 	while (nmp->nm_inprog != NULLVP)
 		(void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+
+	/* In the forced case, cancel any outstanding requests. */
+	if (flags & FORCECLOSE)
+		nfs_nmcancelreqs(nmp);
 
 	/* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */
 	error = vflush(mp, 1, flags);

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message