From owner-freebsd-current Mon Jul 23 9:12:27 2001 Delivered-To: freebsd-current@freebsd.org Received: from salmon.maths.tcd.ie (salmon.maths.tcd.ie [134.226.81.11]) by hub.freebsd.org (Postfix) with SMTP id E9A8037B407; Mon, 23 Jul 2001 09:12:14 -0700 (PDT) (envelope-from iedowse@maths.tcd.ie) Received: from walton.maths.tcd.ie by salmon.maths.tcd.ie with SMTP id ; 23 Jul 2001 17:12:13 +0100 (BST) To: Maxim Sobolev Cc: current@FreeBSD.org Subject: Re: NFS client unable to recover from server crash In-Reply-To: Your message of "Mon, 23 Jul 2001 18:53:05 +0300." <200107231554.f6NFsLk65848@vega.vega.com> Date: Mon, 23 Jul 2001 17:12:13 +0100 From: Ian Dowse Message-ID: <200107231712.aa22684@salmon.maths.tcd.ie> Sender: owner-freebsd-current@FreeBSD.ORG Precedence: bulk List-ID: List-Archive: (Web Archive) List-Help: (List Instructions) List-Subscribe: List-Unsubscribe: X-Loop: FreeBSD.ORG In message <200107231554.f6NFsLk65848@vega.vega.com>, Maxim Sobolev writes: >I found that after introduction of the new RPC NFS client is no longer >able to recover from server crash (both cluent and server are 5-CURRENT >systems). After a well known `nfs server not responding' message, client >hangs and even though server comes back in a minute or two it doesn't >recover and just sits in this state forvewer. All unmount requests gets >stuck in the kernel, so as a processes that accessing files from that >mount point. This doesn't looks like a right thing and obviously should >be fixed before 5.0-RELEASE. I've seen some similar effects, but I don't think it has anything to do with the new RPC code, as that only runs at mount time. It would be useful if you could use tcpdump to see if any requests are being transmitted, and if they are getting responses. Also try running kdgb on the client to get a kernel backtrace of the stuck processes. Is this a UDP or TCP based mount? If you are feeling brave, you could also try the patch below. It is a selection of changes to the kernel NFS code that I have built up over the last few months. I don't think it could solve the hangs, but it should improve the chance of interruptible mounts accepting ^C while waiting, and (just added the other day) umount -f should work while the server is down even if processes are hung. Ian Index: nfs.h =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs.h,v retrieving revision 1.59 diff -u -r1.59 nfs.h --- nfs.h 2001/04/17 20:45:21 1.59 +++ nfs.h 2001/07/20 13:19:51 @@ -633,6 +633,7 @@ struct mbuf *)); int nfs_adv __P((struct mbuf **, caddr_t *, int, int)); void nfs_nhinit __P((void)); +void nfs_nmcancelreqs __P((struct nfsmount *)); void nfs_timer __P((void*)); int nfsrv_dorec __P((struct nfssvc_sock *, struct nfsd *, struct nfsrv_descript **)); Index: nfs_nqlease.c =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_nqlease.c,v retrieving revision 1.59 diff -u -r1.59 nfs_nqlease.c --- nfs_nqlease.c 2001/05/01 08:13:14 1.59 +++ nfs_nqlease.c 2001/05/01 14:29:22 @@ -952,7 +952,9 @@ } /* - * Called for client side callbacks + * Called for client side callbacks. + * NB: We are responsible for freeing `mrep' in all cases, but note + * that anything that does a 'goto nfsmout' frees it for us. */ int nqnfs_callback(nmp, mrep, md, dpos) @@ -982,8 +984,10 @@ nfsd->nd_md = md; nfsd->nd_dpos = dpos; error = nfs_getreq(nfsd, &tnfsd, FALSE); - if (error) + if (error) { + m_freem(mrep); return (error); + } md = nfsd->nd_md; dpos = nfsd->nd_dpos; if (nfsd->nd_procnum != NQNFSPROC_EVICTED) { Index: nfs_socket.c =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_socket.c,v retrieving revision 1.66 diff -u -r1.66 nfs_socket.c --- nfs_socket.c 2001/05/01 08:13:14 1.66 +++ nfs_socket.c 2001/07/20 13:45:01 @@ -144,7 +144,8 @@ */ #define NFS_CWNDSCALE 256 #define NFS_MAXCWND (NFS_CWNDSCALE * 32) -static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, }; +#define NFS_NBACKOFF 8 +static int nfs_backoff[NFS_NBACKOFF] = { 2, 4, 8, 16, 32, 64, 128, 256, }; int nfsrtton = 0; struct nfsrtt nfsrtt; struct callout_handle nfs_timer_handle; @@ -299,11 +300,17 @@ splx(s); } if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) { - so->so_rcv.sb_timeo = (5 * hz); - so->so_snd.sb_timeo = (5 * hz); + so->so_rcv.sb_timeo = (2 * hz); + so->so_snd.sb_timeo = (2 * hz); } else { - so->so_rcv.sb_timeo = 0; - so->so_snd.sb_timeo = 0; + /* + * We would normally set the timeouts to 0 (never time out) + * for non-interruptible mounts. However, nfs_nmcancelreqs() + * can still prematurely terminate requests, so avoid + * waiting forever. + */ + so->so_rcv.sb_timeo = 10 * hz; + so->so_snd.sb_timeo = 10 * hz; } /* @@ -1400,10 +1407,18 @@ for (rep = nfs_reqq.tqh_first; rep != 0; rep = rep->r_chain.tqe_next) { nmp = rep->r_nmp; if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) - continue; - if (nfs_sigintr(nmp, rep, rep->r_procp)) { - nfs_softterm(rep); continue; + /* + * Test for signals on interruptible mounts. We try to + * maintain normal (uninterruptible) semantics while the + * server is up, but respond quickly to signals when it + * is down. + */ + if (nmp->nm_timeouts >= NFS_NBACKOFF / 2) { + if (nfs_sigintr(nmp, rep, rep->r_procp)) { + nfs_softterm(rep); + continue; + } } if (rep->r_rtt >= 0) { rep->r_rtt++; @@ -1415,7 +1430,7 @@ timeo *= nfs_backoff[nmp->nm_timeouts - 1]; if (rep->r_rtt <= timeo) continue; - if (nmp->nm_timeouts < 8) + if (nmp->nm_timeouts < NFS_NBACKOFF) nmp->nm_timeouts++; } /* @@ -1438,8 +1453,6 @@ rep->r_rexmit = NFS_MAXREXMIT; continue; } - if ((so = nmp->nm_so) == NULL) - continue; /* * If there is enough space and the window allows.. @@ -1447,6 +1460,8 @@ * Set r_rtt to -1 in case we fail to send it now. */ rep->r_rtt = -1; + if ((so = nmp->nm_so) == NULL) + continue; if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len && ((nmp->nm_flag & NFSMNT_DUMBTIMR) || (rep->r_flags & R_SENT) || @@ -1510,6 +1525,27 @@ } /* + * Mark all outstanding requests pertaining to a nfs mount with R_SOFTTERM. + * This is used by forced unmounts to terminate any outstanding RPCs. + */ +void +nfs_nmcancelreqs(nmp) + struct nfsmount *nmp; +{ + struct nfsreq *req; + int s; + + s = splnet(); + for (req = nfs_reqq.tqh_first; req != 0; req = req->r_chain.tqe_next) { + if (nmp != req->r_nmp || req->r_mrep != NULL || + (req->r_flags & R_SOFTTERM)) + continue; + nfs_softterm(req); + } + splx(s); +} + +/* * Flag a request as being about to terminate (due to NFSMNT_INT/NFSMNT_SOFT). * The nm_send count is decremented now to avoid deadlocks when the process in * soreceive() hasn't yet managed to send its own request. @@ -1576,7 +1612,7 @@ } else p = (struct proc *)0; while (*statep & NFSSTA_SNDLOCK) { - if (nfs_sigintr(rep->r_nmp, rep, p)) + if (rep != NULL && (rep->r_flags & R_SOFTTERM)) return (EINTR); *statep |= NFSSTA_WANTSND; (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), @@ -1620,7 +1656,7 @@ else slpflag = 0; while (*statep & NFSSTA_RCVLOCK) { - if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp)) + if (rep != NULL && (rep->r_flags & R_SOFTTERM)) return (EINTR); *statep |= NFSSTA_WANTRCV; (void) tsleep((caddr_t)statep, slpflag | (PZERO - 1), "nfsrcvlk", @@ -1638,6 +1674,9 @@ slptimeo = 2 * hz; } } + /* Always fail if our request has been cancelled. */ + if (rep != NULL && (rep->r_flags & R_SOFTTERM)) + return (EINTR); *statep |= NFSSTA_RCVLOCK; return (0); } Index: nfs_subs.c =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_subs.c,v retrieving revision 1.103 diff -u -r1.103 nfs_subs.c --- nfs_subs.c 2001/07/04 16:20:16 1.103 +++ nfs_subs.c 2001/07/10 21:46:16 @@ -1120,7 +1120,7 @@ nfs_true = txdr_unsigned(TRUE); nfs_false = txdr_unsigned(FALSE); nfs_xdrneg1 = txdr_unsigned(-1); - nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000; + nfs_ticks = (hz * NFS_TICKINTVL + 999) / 1000; if (nfs_ticks < 1) nfs_ticks = 1; /* Ensure async daemons disabled */ Index: nfs_vfsops.c =================================================================== RCS file: /dump/FreeBSD-CVS/src/sys/nfs/nfs_vfsops.c,v retrieving revision 1.100 diff -u -r1.100 nfs_vfsops.c --- nfs_vfsops.c 2001/06/28 04:10:07 1.100 +++ nfs_vfsops.c 2001/07/20 13:47:21 @@ -624,7 +624,7 @@ splx(s); if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { - nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; + nmp->nm_timeo = (argp->timeo * NFS_HZ + 9) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) @@ -970,6 +970,10 @@ nmp->nm_state |= NFSSTA_DISMINPROG; while (nmp->nm_inprog != NULLVP) (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0); + + /* In the forced case, cancel any outstanding requests. */ + if (flags & FORCECLOSE) + nfs_nmcancelreqs(nmp); /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ error = vflush(mp, 1, flags); To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-current" in the body of the message