Date: Thu, 7 May 2009 15:42:10 -0400 From: Doug Rabson <dfr@rabson.org> To: Tom McLaughlin <tmclaugh@sdf.lonestar.org> Cc: current@freebsd.org Subject: Re: NFS lockd/statd lock up network connection Message-ID: <05FB85E2-1BB9-4EA7-9ED2-C92CB1014783@rabson.org> In-Reply-To: <49FCB96E.1010604@sdf.lonestar.org> References: <49D851FC.4090103@sdf.lonestar.org> <A42E28AD-54AF-407D-9E5A-38A95A56DFBC@rabson.org> <49FCB96E.1010604@sdf.lonestar.org>
next in thread | previous in thread | raw e-mail | index | archive | help
--Apple-Mail-9--722357766 Content-Type: text/plain; charset=US-ASCII; format=flowed; delsp=yes Content-Transfer-Encoding: 7bit On 2 May 2009, at 17:21, Tom McLaughlin wrote: > Doug Rabson wrote, On 04/08/2009 03:20 PM: >> On 5 Apr 2009, at 07:38, Tom McLaughlin wrote: >>> Hey, I have a recent -CURRENT box which has a mount exported from an >>> OpenBSD NFS server. Recently I enabled lockd and statd on the >>> machine >>> but this has started to cause the network connection on the >>> machine to lockup. I find the following in dmesg: >>> >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> nfs server exports:/mnt/raid0/net/home: lockd not responding >>> NLM: failed to contact remote rpcbind, stat = 5, port = 28416 >>> >>> Additionally I see this when trying to restart netif: >>> >>> em0: Could not setup receive structures >>> >>> I've tried building with NFS_LEGACYRPC but that has not changed >>> anything. Additionally I've tested this on 7-STABLE and while >>> lockd still does not work (so, looks like I'll still have to work >>> around my need for NFS locking) the network connection at least >>> does not lock up. Is what I'm seeing evidence of some further >>> problem? >> It looks as if lockd is not running on the server. The NFS locking >> protocol needs it enabled at both ends. Also, NFS_LEGACYRPC won't >> affect this - the record locking code always uses the new RPC code. > > Hi Doug, lockd is runing on both ends. The problem appears to be > with the system running out of mbuf clusters when using lockd. [1] > For now I'm mounting the particular mount with nolockd as an option > to get around this. I've gotten errors with my -STABLE box using > this mount with lockd enabled but at least the system didn't run out > of mbuf clusters and lose all network connectivity. Could you please try the attached (unfortunately untested - I'm at BSDCan) patch and see if it affects your problem. This patch attempts to fix PR 130628 which appears similar to your issue. --Apple-Mail-9--722357766 Content-Disposition: attachment; filename=rpc-deadlock.diff Content-Type: application/octet-stream; x-unix-mode=0664; name="rpc-deadlock.diff" Content-Transfer-Encoding: 7bit Index: rpc/svc_dg.c =================================================================== --- rpc/svc_dg.c (revision 191886) +++ rpc/svc_dg.c (working copy) @@ -53,6 +53,7 @@ #include <sys/queue.h> #include <sys/socket.h> #include <sys/socketvar.h> +#include <sys/sx.h> #include <sys/systm.h> #include <sys/uio.h> @@ -118,7 +119,7 @@ xprt = mem_alloc(sizeof (SVCXPRT)); memset(xprt, 0, sizeof (SVCXPRT)); - mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF); + sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = NULL; @@ -161,6 +162,9 @@ svc_dg_stat(SVCXPRT *xprt) { + if (soreadable(xprt->xp_socket)) + return (XPRT_MOREREQS); + return (XPRT_IDLE); } @@ -173,22 +177,17 @@ int error, rcvflag; /* + * Serialise access to the socket. + */ + sx_xlock(&xprt->xp_lock); + + /* * The socket upcall calls xprt_active() which will eventually * cause the server to call us here. We attempt to read a * packet from the socket and process it. If the read fails, * we have drained all pending requests so we call * xprt_inactive(). - * - * The lock protects us in the case where a new packet arrives - * on the socket after our call to soreceive fails with - * EWOULDBLOCK - the call to xprt_active() in the upcall will - * happen only after our call to xprt_inactive() which ensures - * that we will remain active. It might be possible to use - * SOCKBUF_LOCK for this - its not clear to me what locks are - * held during the upcall. */ - mtx_lock(&xprt->xp_lock); - uio.uio_resid = 1000000000; uio.uio_td = curthread; mreq = NULL; @@ -196,8 +195,19 @@ error = soreceive(xprt->xp_socket, &raddr, &uio, &mreq, NULL, &rcvflag); if (error == EWOULDBLOCK) { - xprt_inactive(xprt); - mtx_unlock(&xprt->xp_lock); + /* + * We must re-test for readability after taking the + * lock to protect us in the case where a new packet + * arrives on the socket after our call to soreceive + * fails with EWOULDBLOCK. The pool lock protects us + * from racing the upcall after our soreadable() call + * returns false. + */ + mtx_lock(&xprt->xp_pool->sp_lock); + if (!soreadable(xprt->xp_socket)) + xprt_inactive_locked(xprt); + mtx_unlock(&xprt->xp_pool->sp_lock); + sx_xunlock(&xprt->xp_lock); return (FALSE); } @@ -208,11 +218,11 @@ xprt->xp_socket->so_rcv.sb_flags &= ~SB_UPCALL; SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); xprt_inactive(xprt); - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); return (FALSE); } - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); KASSERT(raddr->sa_len < xprt->xp_rtaddr.maxlen, ("Unexpected remote address length")); @@ -301,7 +311,7 @@ xprt_unregister(xprt); - mtx_destroy(&xprt->xp_lock); + sx_destroy(&xprt->xp_lock); if (xprt->xp_socket) (void)soclose(xprt->xp_socket); @@ -328,7 +338,5 @@ { SVCXPRT *xprt = (SVCXPRT *) arg; - mtx_lock(&xprt->xp_lock); xprt_active(xprt); - mtx_unlock(&xprt->xp_lock); } Index: rpc/svc_vc.c =================================================================== --- rpc/svc_vc.c (revision 191886) +++ rpc/svc_vc.c (working copy) @@ -54,6 +54,7 @@ #include <sys/queue.h> #include <sys/socket.h> #include <sys/socketvar.h> +#include <sys/sx.h> #include <sys/systm.h> #include <sys/uio.h> #include <netinet/tcp.h> @@ -142,7 +143,7 @@ } xprt = mem_alloc(sizeof(SVCXPRT)); - mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF); + sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = NULL; @@ -219,7 +220,7 @@ cd->strm_stat = XPRT_IDLE; xprt = mem_alloc(sizeof(SVCXPRT)); - mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF); + sx_init(&xprt->xp_lock, "xprt->xp_lock"); xprt->xp_pool = pool; xprt->xp_socket = so; xprt->xp_p1 = cd; @@ -255,9 +256,9 @@ * Throw the transport into the active list in case it already * has some data buffered. */ - mtx_lock(&xprt->xp_lock); + sx_xlock(&xprt->xp_lock); xprt_active(xprt); - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); return (xprt); cleanup_svc_vc_create: @@ -347,22 +348,27 @@ * connection from the socket and turn it into a new * transport. If the accept fails, we have drained all pending * connections so we call xprt_inactive(). - * - * The lock protects us in the case where a new connection arrives - * on the socket after our call to accept fails with - * EWOULDBLOCK - the call to xprt_active() in the upcall will - * happen only after our call to xprt_inactive() which ensures - * that we will remain active. It might be possible to use - * SOCKBUF_LOCK for this - its not clear to me what locks are - * held during the upcall. */ - mtx_lock(&xprt->xp_lock); + sx_xlock(&xprt->xp_lock); error = svc_vc_accept(xprt->xp_socket, &so); if (error == EWOULDBLOCK) { - xprt_inactive(xprt); - mtx_unlock(&xprt->xp_lock); + /* + * We must re-test for new connections after taking + * the lock to protect us in the case where a new + * connection arrives after our call to accept fails + * with EWOULDBLOCK. The pool lock protects us from + * racing the upcall after our TAILQ_EMPTY() call + * returns false. + */ + ACCEPT_LOCK(); + mtx_lock(&xprt->xp_pool->sp_lock); + if (TAILQ_EMPTY(&xprt->xp_socket->so_comp)) + xprt_inactive_locked(xprt); + mtx_unlock(&xprt->xp_pool->sp_lock); + ACCEPT_UNLOCK(); + sx_xunlock(&xprt->xp_lock); return (FALSE); } @@ -373,11 +379,11 @@ xprt->xp_socket->so_rcv.sb_flags &= ~SB_UPCALL; SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); xprt_inactive(xprt); - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); return (FALSE); } - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); sa = 0; error = soaccept(so, &sa); @@ -422,7 +428,7 @@ xprt_unregister(xprt); - mtx_destroy(&xprt->xp_lock); + sx_destroy(&xprt->xp_lock); if (xprt->xp_socket) (void)soclose(xprt->xp_socket); @@ -483,21 +489,29 @@ /* * Return XPRT_MOREREQS if we have buffered data and we are - * mid-record or if we have enough data for a record marker. + * mid-record or if we have enough data for a record + * marker. Since this is only a hint, we read mpending and + * resid outside the lock. We do need to take the lock if we + * have to traverse the mbuf chain. */ if (cd->mpending) { if (cd->resid) return (XPRT_MOREREQS); n = 0; + sx_xlock(&xprt->xp_lock); m = cd->mpending; while (m && n < sizeof(uint32_t)) { n += m->m_len; m = m->m_next; } + sx_xunlock(&xprt->xp_lock); if (n >= sizeof(uint32_t)) return (XPRT_MOREREQS); } + if (soreadable(xprt->xp_socket)) + return (XPRT_MOREREQS); + return (XPRT_IDLE); } @@ -509,6 +523,12 @@ struct mbuf *m; int error, rcvflag; + /* + * Serialise access to the socket and our own record parsing + * state. + */ + sx_xlock(&xprt->xp_lock); + for (;;) { /* * If we have an mbuf chain in cd->mpending, try to parse a @@ -584,6 +604,7 @@ */ xdrmbuf_create(&xprt->xp_xdrreq, cd->mreq, XDR_DECODE); cd->mreq = NULL; + sx_xunlock(&xprt->xp_lock); if (! xdr_callmsg(&xprt->xp_xdrreq, msg)) { XDR_DESTROY(&xprt->xp_xdrreq); return (FALSE); @@ -602,17 +623,7 @@ * the result in cd->mpending. If the read fails, * we have drained both cd->mpending and the socket so * we can call xprt_inactive(). - * - * The lock protects us in the case where a new packet arrives - * on the socket after our call to soreceive fails with - * EWOULDBLOCK - the call to xprt_active() in the upcall will - * happen only after our call to xprt_inactive() which ensures - * that we will remain active. It might be possible to use - * SOCKBUF_LOCK for this - its not clear to me what locks are - * held during the upcall. */ - mtx_lock(&xprt->xp_lock); - uio.uio_resid = 1000000000; uio.uio_td = curthread; m = NULL; @@ -621,8 +632,20 @@ &rcvflag); if (error == EWOULDBLOCK) { - xprt_inactive(xprt); - mtx_unlock(&xprt->xp_lock); + /* + * We must re-test for readability after + * taking the lock to protect us in the case + * where a new packet arrives on the socket + * after our call to soreceive fails with + * EWOULDBLOCK. The pool lock protects us from + * racing the upcall after our soreadable() + * call returns false. + */ + mtx_lock(&xprt->xp_pool->sp_lock); + if (!soreadable(xprt->xp_socket)) + xprt_inactive_locked(xprt); + mtx_unlock(&xprt->xp_pool->sp_lock); + sx_xunlock(&xprt->xp_lock); return (FALSE); } @@ -634,7 +657,7 @@ SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv); xprt_inactive(xprt); cd->strm_stat = XPRT_DIED; - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); return (FALSE); } @@ -642,8 +665,9 @@ /* * EOF - the other end has closed the socket. */ + xprt_inactive(xprt); cd->strm_stat = XPRT_DIED; - mtx_unlock(&xprt->xp_lock); + sx_xunlock(&xprt->xp_lock); return (FALSE); } @@ -651,8 +675,6 @@ m_last(cd->mpending)->m_next = m; else cd->mpending = m; - - mtx_unlock(&xprt->xp_lock); } } @@ -739,9 +761,7 @@ { SVCXPRT *xprt = (SVCXPRT *) arg; - mtx_lock(&xprt->xp_lock); xprt_active(xprt); - mtx_unlock(&xprt->xp_lock); } #if 0 Index: rpc/svc.c =================================================================== --- rpc/svc.c (revision 191886) +++ rpc/svc.c (working copy) @@ -178,18 +178,23 @@ } void -xprt_inactive(SVCXPRT *xprt) +xprt_inactive_locked(SVCXPRT *xprt) { SVCPOOL *pool = xprt->xp_pool; - mtx_lock(&pool->sp_lock); - if (xprt->xp_active) { TAILQ_REMOVE(&pool->sp_active, xprt, xp_alink); xprt->xp_active = FALSE; } - wakeup(&pool->sp_active); +} +void +xprt_inactive(SVCXPRT *xprt) +{ + SVCPOOL *pool = xprt->xp_pool; + + mtx_lock(&pool->sp_lock); + xprt_inactive_locked(xprt); mtx_unlock(&pool->sp_lock); } Index: rpc/svc.h =================================================================== --- rpc/svc.h (revision 191886) +++ rpc/svc.h (working copy) @@ -47,6 +47,7 @@ #include <sys/queue.h> #include <sys/_lock.h> #include <sys/_mutex.h> +#include <sys/_sx.h> #endif /* @@ -128,7 +129,7 @@ */ typedef struct __rpc_svcxprt { #ifdef _KERNEL - struct mtx xp_lock; + struct sx xp_lock; struct __rpc_svcpool *xp_pool; /* owning pool (see below) */ TAILQ_ENTRY(__rpc_svcxprt) xp_link; TAILQ_ENTRY(__rpc_svcxprt) xp_alink; @@ -332,6 +333,7 @@ __BEGIN_DECLS extern void xprt_active(SVCXPRT *); extern void xprt_inactive(SVCXPRT *); +extern void xprt_inactive_locked(SVCXPRT *); __END_DECLS #endif --Apple-Mail-9--722357766 Content-Type: text/plain; charset=US-ASCII; format=flowed Content-Transfer-Encoding: 7bit --Apple-Mail-9--722357766--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?05FB85E2-1BB9-4EA7-9ED2-C92CB1014783>