Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 7 May 2009 15:42:10 -0400
From:      Doug Rabson <dfr@rabson.org>
To:        Tom McLaughlin <tmclaugh@sdf.lonestar.org>
Cc:        current@freebsd.org
Subject:   Re: NFS lockd/statd lock up network connection
Message-ID:  <05FB85E2-1BB9-4EA7-9ED2-C92CB1014783@rabson.org>
In-Reply-To: <49FCB96E.1010604@sdf.lonestar.org>
References:  <49D851FC.4090103@sdf.lonestar.org> <A42E28AD-54AF-407D-9E5A-38A95A56DFBC@rabson.org> <49FCB96E.1010604@sdf.lonestar.org>

next in thread | previous in thread | raw e-mail | index | archive | help

--Apple-Mail-9--722357766
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed;
	delsp=yes
Content-Transfer-Encoding: 7bit


On 2 May 2009, at 17:21, Tom McLaughlin wrote:

> Doug Rabson wrote, On 04/08/2009 03:20 PM:
>> On 5 Apr 2009, at 07:38, Tom McLaughlin wrote:
>>> Hey, I have a recent -CURRENT box which has a mount exported from an
>>> OpenBSD NFS server.  Recently I enabled lockd and statd on the  
>>> machine
>>> but this has started to cause the network connection on the  
>>> machine  to lockup.  I find the following in dmesg:
>>>
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> nfs server exports:/mnt/raid0/net/home: lockd not responding
>>> NLM: failed to contact remote rpcbind, stat = 5, port = 28416
>>>
>>> Additionally I see this when trying to restart netif:
>>>
>>> em0: Could not setup receive structures
>>>
>>> I've tried building with NFS_LEGACYRPC but that has not changed   
>>> anything.  Additionally I've tested this on 7-STABLE and while  
>>> lockd  still does not work (so, looks like I'll still have to work  
>>> around  my need for NFS locking) the network connection at least  
>>> does not  lock up.  Is what I'm seeing evidence of some further  
>>> problem?
>> It looks as if lockd is not running on the server. The NFS locking   
>> protocol needs it enabled at both ends. Also, NFS_LEGACYRPC won't   
>> affect this - the record locking code always uses the new RPC code.
>
> Hi Doug, lockd is runing on both ends.  The problem appears to be  
> with the system running out of mbuf clusters when using lockd. [1]   
> For now I'm mounting the particular mount with nolockd as an option  
> to get around this.  I've gotten errors with my -STABLE box using  
> this mount with lockd enabled but at least the system didn't run out  
> of mbuf clusters and lose all network connectivity.

Could you please try the attached (unfortunately untested - I'm at  
BSDCan) patch and see if it affects your problem. This patch attempts  
to fix PR 130628 which appears similar to your issue.


--Apple-Mail-9--722357766
Content-Disposition: attachment;
	filename=rpc-deadlock.diff
Content-Type: application/octet-stream; x-unix-mode=0664;
	name="rpc-deadlock.diff"
Content-Transfer-Encoding: 7bit

Index: rpc/svc_dg.c
===================================================================
--- rpc/svc_dg.c	(revision 191886)
+++ rpc/svc_dg.c	(working copy)
@@ -53,6 +53,7 @@
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/sx.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
 
@@ -118,7 +119,7 @@
 
 	xprt = mem_alloc(sizeof (SVCXPRT));
 	memset(xprt, 0, sizeof (SVCXPRT));
-	mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF);
+	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = NULL;
@@ -161,6 +162,9 @@
 svc_dg_stat(SVCXPRT *xprt)
 {
 
+	if (soreadable(xprt->xp_socket))
+		return (XPRT_MOREREQS);
+
 	return (XPRT_IDLE);
 }
 
@@ -173,22 +177,17 @@
 	int error, rcvflag;
 
 	/*
+	 * Serialise access to the socket.
+	 */
+	sx_xlock(&xprt->xp_lock);
+
+	/*
 	 * The socket upcall calls xprt_active() which will eventually
 	 * cause the server to call us here. We attempt to read a
 	 * packet from the socket and process it. If the read fails,
 	 * we have drained all pending requests so we call
 	 * xprt_inactive().
-	 *
-	 * The lock protects us in the case where a new packet arrives
-	 * on the socket after our call to soreceive fails with
-	 * EWOULDBLOCK - the call to xprt_active() in the upcall will
-	 * happen only after our call to xprt_inactive() which ensures
-	 * that we will remain active. It might be possible to use
-	 * SOCKBUF_LOCK for this - its not clear to me what locks are
-	 * held during the upcall.
 	 */
-	mtx_lock(&xprt->xp_lock);
-
 	uio.uio_resid = 1000000000;
 	uio.uio_td = curthread;
 	mreq = NULL;
@@ -196,8 +195,19 @@
 	error = soreceive(xprt->xp_socket, &raddr, &uio, &mreq, NULL, &rcvflag);
 
 	if (error == EWOULDBLOCK) {
-		xprt_inactive(xprt);
-		mtx_unlock(&xprt->xp_lock);
+		/*
+		 * We must re-test for readability after taking the
+		 * lock to protect us in the case where a new packet
+		 * arrives on the socket after our call to soreceive
+		 * fails with EWOULDBLOCK. The pool lock protects us
+		 * from racing the upcall after our soreadable() call
+		 * returns false.
+		 */
+		mtx_lock(&xprt->xp_pool->sp_lock);
+		if (!soreadable(xprt->xp_socket))
+			xprt_inactive_locked(xprt);
+		mtx_unlock(&xprt->xp_pool->sp_lock);
+		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
@@ -208,11 +218,11 @@
 		xprt->xp_socket->so_rcv.sb_flags &= ~SB_UPCALL;
 		SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
 		xprt_inactive(xprt);
-		mtx_unlock(&xprt->xp_lock);
+		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
-	mtx_unlock(&xprt->xp_lock);
+	sx_xunlock(&xprt->xp_lock);
 
 	KASSERT(raddr->sa_len < xprt->xp_rtaddr.maxlen,
 	    ("Unexpected remote address length"));
@@ -301,7 +311,7 @@
 
 	xprt_unregister(xprt);
 
-	mtx_destroy(&xprt->xp_lock);
+	sx_destroy(&xprt->xp_lock);
 	if (xprt->xp_socket)
 		(void)soclose(xprt->xp_socket);
 
@@ -328,7 +338,5 @@
 {
 	SVCXPRT *xprt = (SVCXPRT *) arg;
 
-	mtx_lock(&xprt->xp_lock);
 	xprt_active(xprt);
-	mtx_unlock(&xprt->xp_lock);
 }
Index: rpc/svc_vc.c
===================================================================
--- rpc/svc_vc.c	(revision 191886)
+++ rpc/svc_vc.c	(working copy)
@@ -54,6 +54,7 @@
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/sx.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
 #include <netinet/tcp.h>
@@ -142,7 +143,7 @@
 	}
 
 	xprt = mem_alloc(sizeof(SVCXPRT));
-	mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF);
+	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = NULL;
@@ -219,7 +220,7 @@
 	cd->strm_stat = XPRT_IDLE;
 
 	xprt = mem_alloc(sizeof(SVCXPRT));
-	mtx_init(&xprt->xp_lock, "xprt->xp_lock", NULL, MTX_DEF);
+	sx_init(&xprt->xp_lock, "xprt->xp_lock");
 	xprt->xp_pool = pool;
 	xprt->xp_socket = so;
 	xprt->xp_p1 = cd;
@@ -255,9 +256,9 @@
 	 * Throw the transport into the active list in case it already
 	 * has some data buffered.
 	 */
-	mtx_lock(&xprt->xp_lock);
+	sx_xlock(&xprt->xp_lock);
 	xprt_active(xprt);
-	mtx_unlock(&xprt->xp_lock);
+	sx_xunlock(&xprt->xp_lock);
 
 	return (xprt);
 cleanup_svc_vc_create:
@@ -347,22 +348,27 @@
 	 * connection from the socket and turn it into a new
 	 * transport. If the accept fails, we have drained all pending
 	 * connections so we call xprt_inactive().
-	 *
-	 * The lock protects us in the case where a new connection arrives
-	 * on the socket after our call to accept fails with
-	 * EWOULDBLOCK - the call to xprt_active() in the upcall will
-	 * happen only after our call to xprt_inactive() which ensures
-	 * that we will remain active. It might be possible to use
-	 * SOCKBUF_LOCK for this - its not clear to me what locks are
-	 * held during the upcall.
 	 */
-	mtx_lock(&xprt->xp_lock);
+	sx_xlock(&xprt->xp_lock);
 
 	error = svc_vc_accept(xprt->xp_socket, &so);
 
 	if (error == EWOULDBLOCK) {
-		xprt_inactive(xprt);
-		mtx_unlock(&xprt->xp_lock);
+		/*
+		 * We must re-test for new connections after taking
+		 * the lock to protect us in the case where a new
+		 * connection arrives after our call to accept fails
+		 * with EWOULDBLOCK. The pool lock protects us from
+		 * racing the upcall after our TAILQ_EMPTY() call
+		 * returns false.
+		 */
+		ACCEPT_LOCK();
+		mtx_lock(&xprt->xp_pool->sp_lock);
+		if (TAILQ_EMPTY(&xprt->xp_socket->so_comp))
+			xprt_inactive_locked(xprt);
+		mtx_unlock(&xprt->xp_pool->sp_lock);
+		ACCEPT_UNLOCK();
+		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
@@ -373,11 +379,11 @@
 		xprt->xp_socket->so_rcv.sb_flags &= ~SB_UPCALL;
 		SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
 		xprt_inactive(xprt);
-		mtx_unlock(&xprt->xp_lock);
+		sx_xunlock(&xprt->xp_lock);
 		return (FALSE);
 	}
 
-	mtx_unlock(&xprt->xp_lock);
+	sx_xunlock(&xprt->xp_lock);
 
 	sa = 0;
 	error = soaccept(so, &sa);
@@ -422,7 +428,7 @@
 
 	xprt_unregister(xprt);
 
-	mtx_destroy(&xprt->xp_lock);
+	sx_destroy(&xprt->xp_lock);
 	if (xprt->xp_socket)
 		(void)soclose(xprt->xp_socket);
 
@@ -483,21 +489,29 @@
 
 	/*
 	 * Return XPRT_MOREREQS if we have buffered data and we are
-	 * mid-record or if we have enough data for a record marker.
+	 * mid-record or if we have enough data for a record
+	 * marker. Since this is only a hint, we read mpending and
+	 * resid outside the lock. We do need to take the lock if we
+	 * have to traverse the mbuf chain.
 	 */
 	if (cd->mpending) {
 		if (cd->resid)
 			return (XPRT_MOREREQS);
 		n = 0;
+		sx_xlock(&xprt->xp_lock);
 		m = cd->mpending;
 		while (m && n < sizeof(uint32_t)) {
 			n += m->m_len;
 			m = m->m_next;
 		}
+		sx_xunlock(&xprt->xp_lock);
 		if (n >= sizeof(uint32_t))
 			return (XPRT_MOREREQS);
 	}
 
+	if (soreadable(xprt->xp_socket))
+		return (XPRT_MOREREQS);
+
 	return (XPRT_IDLE);
 }
 
@@ -509,6 +523,12 @@
 	struct mbuf *m;
 	int error, rcvflag;
 
+	/*
+	 * Serialise access to the socket and our own record parsing
+	 * state.
+	 */
+	sx_xlock(&xprt->xp_lock);
+
 	for (;;) {
 		/*
 		 * If we have an mbuf chain in cd->mpending, try to parse a
@@ -584,6 +604,7 @@
 				 */
 				xdrmbuf_create(&xprt->xp_xdrreq, cd->mreq, XDR_DECODE);
 				cd->mreq = NULL;
+				sx_xunlock(&xprt->xp_lock);
 				if (! xdr_callmsg(&xprt->xp_xdrreq, msg)) {
 					XDR_DESTROY(&xprt->xp_xdrreq);
 					return (FALSE);
@@ -602,17 +623,7 @@
 		 * the result in cd->mpending. If the read fails,
 		 * we have drained both cd->mpending and the socket so
 		 * we can call xprt_inactive().
-		 *
-		 * The lock protects us in the case where a new packet arrives
-		 * on the socket after our call to soreceive fails with
-		 * EWOULDBLOCK - the call to xprt_active() in the upcall will
-		 * happen only after our call to xprt_inactive() which ensures
-		 * that we will remain active. It might be possible to use
-		 * SOCKBUF_LOCK for this - its not clear to me what locks are
-		 * held during the upcall.
 		 */
-		mtx_lock(&xprt->xp_lock);
-
 		uio.uio_resid = 1000000000;
 		uio.uio_td = curthread;
 		m = NULL;
@@ -621,8 +632,20 @@
 		    &rcvflag);
 
 		if (error == EWOULDBLOCK) {
-			xprt_inactive(xprt);
-			mtx_unlock(&xprt->xp_lock);
+			/*
+			 * We must re-test for readability after
+			 * taking the lock to protect us in the case
+			 * where a new packet arrives on the socket
+			 * after our call to soreceive fails with
+			 * EWOULDBLOCK. The pool lock protects us from
+			 * racing the upcall after our soreadable()
+			 * call returns false.
+			 */
+			mtx_lock(&xprt->xp_pool->sp_lock);
+			if (!soreadable(xprt->xp_socket))
+				xprt_inactive_locked(xprt);
+			mtx_unlock(&xprt->xp_pool->sp_lock);
+			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
@@ -634,7 +657,7 @@
 			SOCKBUF_UNLOCK(&xprt->xp_socket->so_rcv);
 			xprt_inactive(xprt);
 			cd->strm_stat = XPRT_DIED;
-			mtx_unlock(&xprt->xp_lock);
+			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
@@ -642,8 +665,9 @@
 			/*
 			 * EOF - the other end has closed the socket.
 			 */
+			xprt_inactive(xprt);
 			cd->strm_stat = XPRT_DIED;
-			mtx_unlock(&xprt->xp_lock);
+			sx_xunlock(&xprt->xp_lock);
 			return (FALSE);
 		}
 
@@ -651,8 +675,6 @@
 			m_last(cd->mpending)->m_next = m;
 		else
 			cd->mpending = m;
-
-		mtx_unlock(&xprt->xp_lock);
 	}
 }
 
@@ -739,9 +761,7 @@
 {
 	SVCXPRT *xprt = (SVCXPRT *) arg;
 
-	mtx_lock(&xprt->xp_lock);
 	xprt_active(xprt);
-	mtx_unlock(&xprt->xp_lock);
 }
 
 #if 0
Index: rpc/svc.c
===================================================================
--- rpc/svc.c	(revision 191886)
+++ rpc/svc.c	(working copy)
@@ -178,18 +178,23 @@
 }
 
 void
-xprt_inactive(SVCXPRT *xprt)
+xprt_inactive_locked(SVCXPRT *xprt)
 {
 	SVCPOOL *pool = xprt->xp_pool;
 
-	mtx_lock(&pool->sp_lock);
-
 	if (xprt->xp_active) {
 		TAILQ_REMOVE(&pool->sp_active, xprt, xp_alink);
 		xprt->xp_active = FALSE;
 	}
-	wakeup(&pool->sp_active);
+}
 
+void
+xprt_inactive(SVCXPRT *xprt)
+{
+	SVCPOOL *pool = xprt->xp_pool;
+
+	mtx_lock(&pool->sp_lock);
+	xprt_inactive_locked(xprt);
 	mtx_unlock(&pool->sp_lock);
 }
 
Index: rpc/svc.h
===================================================================
--- rpc/svc.h	(revision 191886)
+++ rpc/svc.h	(working copy)
@@ -47,6 +47,7 @@
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
+#include <sys/_sx.h>
 #endif
 
 /*
@@ -128,7 +129,7 @@
  */
 typedef struct __rpc_svcxprt {
 #ifdef _KERNEL
-	struct mtx	xp_lock;
+	struct sx	xp_lock;
 	struct __rpc_svcpool *xp_pool;  /* owning pool (see below) */
 	TAILQ_ENTRY(__rpc_svcxprt) xp_link;
 	TAILQ_ENTRY(__rpc_svcxprt) xp_alink;
@@ -332,6 +333,7 @@
 __BEGIN_DECLS
 extern void	xprt_active(SVCXPRT *);
 extern void	xprt_inactive(SVCXPRT *);
+extern void	xprt_inactive_locked(SVCXPRT *);
 __END_DECLS
 
 #endif

--Apple-Mail-9--722357766
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed
Content-Transfer-Encoding: 7bit



--Apple-Mail-9--722357766--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?05FB85E2-1BB9-4EA7-9ED2-C92CB1014783>