Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 10 Jan 2012 02:16:38 +0000 (UTC)
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org
Subject:   svn commit: r229902 - in stable/9/sys: fs/nfs nfsclient
Message-ID:  <201201100216.q0A2GcAJ077011@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rmacklem
Date: Tue Jan 10 02:16:38 2012
New Revision: 229902
URL: http://svn.freebsd.org/changeset/base/229902

Log:
  MFC: r228757
  jwd@ reported a problem via email where the old NFS client would
  get a reply of EEXIST from an NFS server when a Mkdir RPC was retried,
  for an NFS over UDP mount.
  Upon investigation, it was found that the client was retransmitting
  the Mkdir RPC request over UDP, but with a different xid. As such,
  the retransmitted message would miss the Duplicate Request Cache
  in the server, causing it to reply EEXIST. The kernel client side
  UDP rpc code has two timers. The first one causes a retransmit using
  the same xid and socket and was set to a fixed value of 3seconds.
  (The default can be overridden via CLSET_RETRY_TIMEOUT.)
  The second one creates a new socket and xid and should be larger
  than the first. However, both NFS clients were setting the second
  timer to nm_timeo ("timeout=<value>" mount argument), which defaulted to
  1second, so the first timer would never time out.
  This patch fixes both NFS clients so that they set the first timer
  using nm_timeo and makes the second timer larger than the first one.

Modified:
  stable/9/sys/fs/nfs/nfs_commonkrpc.c
  stable/9/sys/nfsclient/nfs_krpc.c
Directory Properties:
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)

Modified: stable/9/sys/fs/nfs/nfs_commonkrpc.c
==============================================================================
--- stable/9/sys/fs/nfs/nfs_commonkrpc.c	Tue Jan 10 01:02:40 2012	(r229901)
+++ stable/9/sys/fs/nfs/nfs_commonkrpc.c	Tue Jan 10 02:16:38 2012	(r229902)
@@ -168,6 +168,7 @@ newnfs_connect(struct nfsmount *nmp, str
 	struct socket *so;
 	int one = 1, retries, error = 0;
 	struct thread *td = curthread;
+	struct timeval timo;
 
 	/*
 	 * We need to establish the socket using the credentials of
@@ -264,9 +265,18 @@ newnfs_connect(struct nfsmount *nmp, str
 			CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
 		if ((nmp->nm_flag & NFSMNT_RESVPORT))
 			CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
-		if (NFSHASSOFT(nmp))
-			retries = nmp->nm_retry;
-		else
+		if (NFSHASSOFT(nmp)) {
+			if (nmp->nm_sotype == SOCK_DGRAM)
+				/*
+				 * For UDP, the large timeout for a reconnect
+				 * will be set to "nm_retry * nm_timeo / 2", so
+				 * we only want to do 2 reconnect timeout
+				 * retries.
+				 */
+				retries = 2;
+			else
+				retries = nmp->nm_retry;
+		} else
 			retries = INT_MAX;
 	} else {
 		/*
@@ -284,6 +294,27 @@ newnfs_connect(struct nfsmount *nmp, str
 	}
 	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
 
+	if (nmp != NULL) {
+		/*
+		 * For UDP, there are 2 timeouts:
+		 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
+		 *   that does a retransmit of an RPC request using the same 
+		 *   socket and xid. This is what you normally want to do,
+		 *   since NFS servers depend on "same xid" for their
+		 *   Duplicate Request Cache.
+		 * - timeout specified in CLNT_CALL_MBUF(), which specifies when
+		 *   retransmits on the same socket should fail and a fresh
+		 *   socket created. Each of these timeouts counts as one
+		 *   CLSET_RETRIES as set above.
+		 * Set the initial retransmit timeout for UDP. This timeout
+		 * doesn't exist for TCP and the following call just fails,
+		 * which is ok.
+		 */
+		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
+		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
+		CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
+	}
+
 	mtx_lock(&nrp->nr_mtx);
 	if (nrp->nr_client != NULL) {
 		/*
@@ -442,7 +473,7 @@ newnfs_request(struct nfsrv_descript *nd
 {
 	u_int32_t *tl;
 	time_t waituntil;
-	int i, j, set_uid = 0, set_sigset = 0;
+	int i, j, set_uid = 0, set_sigset = 0, timeo;
 	int trycnt, error = 0, usegssname = 0, secflavour = AUTH_SYS;
 	u_int16_t procnum;
 	u_int trylater_delay = 1;
@@ -628,6 +659,12 @@ newnfs_request(struct nfsrv_descript *nd
 	}
 	trycnt = 0;
 tryagain:
+	/*
+	 * This timeout specifies when a new socket should be created,
+	 * along with new xid values. For UDP, this should be done
+	 * infrequently, since retransmits of RPC requests should normally
+	 * use the same xid.
+	 */
 	if (nmp == NULL) {
 		timo.tv_usec = 0;
 		if (clp == NULL)
@@ -642,8 +679,22 @@ tryagain:
 			else
 				timo.tv_sec = NFS_TCPTIMEO;
 		} else {
-			timo.tv_sec = nmp->nm_timeo / NFS_HZ;
-			timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ;
+			if (NFSHASSOFT(nmp)) {
+				/*
+				 * CLSET_RETRIES is set to 2, so this should be
+				 * half of the total timeout required.
+				 */
+				timeo = nmp->nm_retry * nmp->nm_timeo / 2;
+				if (timeo < 1)
+					timeo = 1;
+				timo.tv_sec = timeo / NFS_HZ;
+				timo.tv_usec = (timeo % NFS_HZ) * 1000000 /
+				    NFS_HZ;
+			} else {
+				/* For UDP hard mounts, use a large value. */
+				timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
+				timo.tv_usec = 0;
+			}
 		}
 
 		if (rep != NULL) {

Modified: stable/9/sys/nfsclient/nfs_krpc.c
==============================================================================
--- stable/9/sys/nfsclient/nfs_krpc.c	Tue Jan 10 01:02:40 2012	(r229901)
+++ stable/9/sys/nfsclient/nfs_krpc.c	Tue Jan 10 02:16:38 2012	(r229902)
@@ -191,6 +191,7 @@ nfs_connect(struct nfsmount *nmp)
 	struct netconfig *nconf;
 	rpcvers_t vers;
 	int one = 1, retries;
+	struct timeval timo;
 
 	/*
 	 * We need to establish the socket using the credentials of
@@ -258,12 +259,37 @@ nfs_connect(struct nfsmount *nmp)
 		CLNT_CONTROL(client, CLSET_INTERRUPTIBLE, &one);
 	if (nmp->nm_flag & NFSMNT_RESVPORT)
 		CLNT_CONTROL(client, CLSET_PRIVPORT, &one);
-	if (nmp->nm_flag & NFSMNT_SOFT)
-		retries = nmp->nm_retry;
-	else
+	if ((nmp->nm_flag & NFSMNT_SOFT) != 0) {
+		if (nmp->nm_sotype == SOCK_DGRAM)
+			/*
+			 * For UDP, the large timeout for a reconnect will
+			 * be set to "nm_retry * nm_timeo / 2", so we only
+			 * want to do 2 reconnect timeout retries.
+			 */
+			retries = 2;
+		else
+			retries = nmp->nm_retry;
+	} else
 		retries = INT_MAX;
 	CLNT_CONTROL(client, CLSET_RETRIES, &retries);
 
+	/*
+	 * For UDP, there are 2 timeouts:
+	 * - CLSET_RETRY_TIMEOUT sets the initial timeout for the timer
+	 *   that does a retransmit of an RPC request using the same socket
+	 *   and xid. This is what you normally want to do, since NFS
+	 *   servers depend on "same xid" for their Duplicate Request Cache.
+	 * - timeout specified in CLNT_CALL_MBUF(), which specifies when
+	 *   retransmits on the same socket should fail and a fresh socket
+	 *   created. Each of these timeouts counts as one CLSET_RETRIES,
+	 *   as set above.
+	 * Set the initial retransmit timeout for UDP. This timeout doesn't
+	 * exist for TCP and the following call just fails, which is ok.
+	 */
+	timo.tv_sec = nmp->nm_timeo / NFS_HZ;
+	timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
+	CLNT_CONTROL(client, CLSET_RETRY_TIMEOUT, &timo);
+
 	mtx_lock(&nmp->nm_mtx);
 	if (nmp->nm_client) {
 		/*
@@ -411,7 +437,7 @@ nfs_request(struct vnode *vp, struct mbu
 	struct mbuf *md;
 	time_t waituntil;
 	caddr_t dpos;
-	int error = 0;
+	int error = 0, timeo;
 	struct timeval now;
 	AUTH *auth = NULL;
 	enum nfs_rto_timer_t timer;
@@ -486,8 +512,32 @@ nfs_request(struct vnode *vp, struct mbu
 
 	nfsstats.rpcrequests++;
 tryagain:
-	timo.tv_sec = nmp->nm_timeo / NFS_HZ;
-	timo.tv_usec = (nmp->nm_timeo * 1000000) / NFS_HZ;
+	/*
+	 * This timeout specifies when a new socket should be created,
+	 * along with new xid values. For UDP, this should be done
+	 * infrequently, since retransmits of RPC requests should normally
+	 * use the same xid.
+	 */
+	if (nmp->nm_sotype == SOCK_DGRAM) {
+		if ((nmp->nm_flag & NFSMNT_SOFT) != 0) {
+			/*
+			 * CLSET_RETRIES is set to 2, so this should be half
+			 * of the total timeout required.
+			 */
+			timeo = nmp->nm_retry * nmp->nm_timeo / 2;
+			if (timeo < 1)
+				timeo = 1;
+			timo.tv_sec = timeo / NFS_HZ;
+			timo.tv_usec = (timeo % NFS_HZ) * 1000000 / NFS_HZ;
+		} else {
+			/* For UDP hard mounts, use a large value. */
+			timo.tv_sec = NFS_MAXTIMEO / NFS_HZ;
+			timo.tv_usec = 0;
+		}
+	} else {
+		timo.tv_sec = nmp->nm_timeo / NFS_HZ;
+		timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * 1000000 / NFS_HZ;
+	}
 	mrep = NULL;
 	stat = CLNT_CALL_MBUF(nmp->nm_client, &ext,
 	    (nmp->nm_flag & NFSMNT_NFSV3) ? procnum : nfsv2_procid[procnum],



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201201100216.q0A2GcAJ077011>