Skip site navigation (1)Skip section navigation (2)
Date:      Wed, 14 Aug 2013 21:11:26 +0000 (UTC)
From:      Rick Macklem <rmacklem@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r254337 - in head/sys/fs: nfs nfsserver
Message-ID:  <201308142111.r7ELBQfC044549@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: rmacklem
Date: Wed Aug 14 21:11:26 2013
New Revision: 254337
URL: http://svnweb.freebsd.org/changeset/base/254337

Log:
  Fix several performance related issues in the new NFS server's
  DRC for NFS over TCP.
  - Increase the size of the hash tables.
  - Create a separate mutex for each hash list of the TCP hash table.
  - Single thread the code that deletes stale cache entries.
  - Add a tunable called vfs.nfsd.tcphighwater, which can be increased
    to allow the cache to grow larger, avoiding the overhead of frequent
    scans to delete stale cache entries.
    (The default value will result in frequent scans to delete stale cache
     entries, analagous to what the pre-patched code does.)
  - Add a tunable called vfs.nfsd.cachetcp that can be used to disable
    DRC caching for NFS over TCP, since the old NFS server didn't DRC cache TCP.
  It also adjusts the size of nfsrc_floodlevel dynamically, so that it is
  always greater than vfs.nfsd.tcphighwater.
  
  For UDP the algorithm remains the same as the pre-patched code, but the
  tunable vfs.nfsd.udphighwater can be used to allow the cache to grow
  larger and reduce the overhead caused by frequent scans for stale entries.
  UDP also uses a larger hash table size than the pre-patched code.
  
  Reported by:	wollman
  Tested by:	wollman (earlier version of patch)
  Submitted by:	ivoras (earlier patch)
  Reviewed by:	jhb (earlier version of patch)
  MFC after:	1 month

Modified:
  head/sys/fs/nfs/nfsport.h
  head/sys/fs/nfs/nfsrvcache.h
  head/sys/fs/nfsserver/nfs_nfsdcache.c
  head/sys/fs/nfsserver/nfs_nfsdport.c

Modified: head/sys/fs/nfs/nfsport.h
==============================================================================
--- head/sys/fs/nfs/nfsport.h	Wed Aug 14 20:20:42 2013	(r254336)
+++ head/sys/fs/nfs/nfsport.h	Wed Aug 14 21:11:26 2013	(r254337)
@@ -603,11 +603,6 @@ void nfsrvd_rcv(struct socket *, void *,
 #define	NFSREQSPINLOCK		extern struct mtx nfs_req_mutex
 #define	NFSLOCKREQ()		mtx_lock(&nfs_req_mutex)
 #define	NFSUNLOCKREQ()		mtx_unlock(&nfs_req_mutex)
-#define	NFSCACHEMUTEX		extern struct mtx nfs_cache_mutex
-#define	NFSCACHEMUTEXPTR	(&nfs_cache_mutex)
-#define	NFSLOCKCACHE()		mtx_lock(&nfs_cache_mutex)
-#define	NFSUNLOCKCACHE()	mtx_unlock(&nfs_cache_mutex)
-#define	NFSCACHELOCKREQUIRED()	mtx_assert(&nfs_cache_mutex, MA_OWNED)
 #define	NFSSOCKMUTEX		extern struct mtx nfs_slock_mutex
 #define	NFSSOCKMUTEXPTR		(&nfs_slock_mutex)
 #define	NFSLOCKSOCK()		mtx_lock(&nfs_slock_mutex)

Modified: head/sys/fs/nfs/nfsrvcache.h
==============================================================================
--- head/sys/fs/nfs/nfsrvcache.h	Wed Aug 14 20:20:42 2013	(r254336)
+++ head/sys/fs/nfs/nfsrvcache.h	Wed Aug 14 21:11:26 2013	(r254337)
@@ -41,8 +41,9 @@
 #define	NFSRVCACHE_MAX_SIZE	2048
 #define	NFSRVCACHE_MIN_SIZE	  64
 
-#define	NFSRVCACHE_HASHSIZE	20
+#define	NFSRVCACHE_HASHSIZE	500
 
+/* Cache table entry. */
 struct nfsrvcache {
 	LIST_ENTRY(nfsrvcache) rc_hash;		/* Hash chain */
 	TAILQ_ENTRY(nfsrvcache)	rc_lru;		/* UDP lru chain */
@@ -104,4 +105,11 @@ struct nfsrvcache {
 
 LIST_HEAD(nfsrvhashhead, nfsrvcache);
 
+/* The fine-grained locked cache hash table for TCP. */
+struct nfsrchash_bucket {
+	struct mtx		mtx;
+	char			lock_name[16];
+	struct nfsrvhashhead	tbl;
+};
+
 #endif	/* _NFS_NFSRVCACHE_H_ */

Modified: head/sys/fs/nfsserver/nfs_nfsdcache.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdcache.c	Wed Aug 14 20:20:42 2013	(r254336)
+++ head/sys/fs/nfsserver/nfs_nfsdcache.c	Wed Aug 14 21:11:26 2013	(r254337)
@@ -160,15 +160,51 @@ __FBSDID("$FreeBSD$");
 #include <fs/nfs/nfsport.h>
 
 extern struct nfsstats newnfsstats;
-NFSCACHEMUTEX;
+extern struct mtx nfsrc_udpmtx;
+extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
 int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0;
 #endif	/* !APPLEKEXT */
 
-static int nfsrc_tcpnonidempotent = 1;
-static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0;
+SYSCTL_DECL(_vfs_nfsd);
+
+static u_int	nfsrc_tcphighwater = 0;
+static int
+sysctl_tcphighwater(SYSCTL_HANDLER_ARGS)
+{
+	int error, newhighwater;
+
+	newhighwater = nfsrc_tcphighwater;
+	error = sysctl_handle_int(oidp, &newhighwater, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (newhighwater < 0)
+		return (EINVAL);
+	if (newhighwater >= nfsrc_floodlevel)
+		nfsrc_floodlevel = newhighwater + newhighwater / 5;
+	nfsrc_tcphighwater = newhighwater;
+	return (0);
+}
+SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0,
+    sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU",
+    "High water mark for TCP cache entries");
+
+static u_int	nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW,
+    &nfsrc_udphighwater, 0,
+    "High water mark for UDP cache entries");
+static u_int	nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW,
+    &nfsrc_tcptimeout, 0,
+    "Timeout for TCP entries in the DRC");
+static u_int nfsrc_tcpnonidempotent = 1;
+SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW,
+    &nfsrc_tcpnonidempotent, 0,
+    "Enable the DRC for NFS over TCP");
+
+static int nfsrc_udpcachesize = 0;
 static TAILQ_HEAD(, nfsrvcache) nfsrvudplru;
-static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE],
-    nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
+static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE];
+
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
  */
@@ -197,10 +233,11 @@ static int newnfsv2_procid[NFS_V3NPROCS]
 	NFSV2PROC_NOOP,
 };
 
+#define	nfsrc_hash(xid)	(((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE)
 #define	NFSRCUDPHASH(xid) \
-	(&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
+	(&nfsrvudphashtbl[nfsrc_hash(xid)])
 #define	NFSRCHASH(xid) \
-	(&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE])
+	(&nfsrchash_table[nfsrc_hash(xid)].tbl)
 #define	TRUE	1
 #define	FALSE	0
 #define	NFSRVCACHE_CHECKLEN	100
@@ -251,6 +288,18 @@ static int nfsrc_getlenandcksum(mbuf_t m
 static void nfsrc_marksametcpconn(u_int64_t);
 
 /*
+ * Return the correct mutex for this cache entry.
+ */
+static __inline struct mtx *
+nfsrc_cachemutex(struct nfsrvcache *rp)
+{
+
+	if ((rp->rc_flag & RC_UDP) != 0)
+		return (&nfsrc_udpmtx);
+	return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx);
+}
+
+/*
  * Initialize the server request cache list
  */
 APPLESTATIC void
@@ -264,7 +313,7 @@ nfsrvd_initcache(void)
 	inited = 1;
 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 		LIST_INIT(&nfsrvudphashtbl[i]);
-		LIST_INIT(&nfsrvhashtbl[i]);
+		LIST_INIT(&nfsrchash_table[i].tbl);
 	}
 	TAILQ_INIT(&nfsrvudplru);
 	nfsrc_tcpsavedreplies = 0;
@@ -325,10 +374,12 @@ nfsrc_getudp(struct nfsrv_descript *nd, 
 	struct sockaddr_in6 *saddr6;
 	struct nfsrvhashhead *hp;
 	int ret = 0;
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(newrp);
 	hp = NFSRCUDPHASH(newrp->rc_xid);
 loop:
-	NFSLOCKCACHE();
+	mtx_lock(mutex);
 	LIST_FOREACH(rp, hp, rc_hash) {
 	    if (newrp->rc_xid == rp->rc_xid &&
 		newrp->rc_proc == rp->rc_proc &&
@@ -336,8 +387,8 @@ loop:
 		nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) {
 			if ((rp->rc_flag & RC_LOCKED) != 0) {
 				rp->rc_flag |= RC_WANTED;
-				(void)mtx_sleep(rp, NFSCACHEMUTEXPTR,
-				    (PZERO - 1) | PDROP, "nfsrc", 10 * hz);
+				(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+				    "nfsrc", 10 * hz);
 				goto loop;
 			}
 			if (rp->rc_flag == 0)
@@ -347,14 +398,14 @@ loop:
 			TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru);
 			if (rp->rc_flag & RC_INPROG) {
 				newnfsstats.srvcache_inproghits++;
-				NFSUNLOCKCACHE();
+				mtx_unlock(mutex);
 				ret = RC_DROPIT;
 			} else if (rp->rc_flag & RC_REPSTATUS) {
 				/*
 				 * V2 only.
 				 */
 				newnfsstats.srvcache_nonidemdonehits++;
-				NFSUNLOCKCACHE();
+				mtx_unlock(mutex);
 				nfsrvd_rephead(nd);
 				*(nd->nd_errp) = rp->rc_status;
 				ret = RC_REPLY;
@@ -362,7 +413,7 @@ loop:
 					NFSRVCACHE_UDPTIMEOUT;
 			} else if (rp->rc_flag & RC_REPMBUF) {
 				newnfsstats.srvcache_nonidemdonehits++;
-				NFSUNLOCKCACHE();
+				mtx_unlock(mutex);
 				nd->nd_mreq = m_copym(rp->rc_reply, 0,
 					M_COPYALL, M_WAITOK);
 				ret = RC_REPLY;
@@ -377,7 +428,7 @@ loop:
 		}
 	}
 	newnfsstats.srvcache_misses++;
-	newnfsstats.srvcache_size++;
+	atomic_add_int(&newnfsstats.srvcache_size, 1);
 	nfsrc_udpcachesize++;
 
 	newrp->rc_flag |= RC_INPROG;
@@ -392,7 +443,7 @@ loop:
 	}
 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
 	TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 	nd->nd_rp = newrp;
 	ret = RC_DOIT;
 
@@ -410,12 +461,14 @@ nfsrvd_updatecache(struct nfsrv_descript
 	struct nfsrvcache *rp;
 	struct nfsrvcache *retrp = NULL;
 	mbuf_t m;
+	struct mtx *mutex;
 
 	rp = nd->nd_rp;
 	if (!rp)
 		panic("nfsrvd_updatecache null rp");
 	nd->nd_rp = NULL;
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	nfsrc_lock(rp);
 	if (!(rp->rc_flag & RC_INPROG))
 		panic("nfsrvd_updatecache not inprog");
@@ -430,7 +483,7 @@ nfsrvd_updatecache(struct nfsrv_descript
 	 */
 	if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) {
 		newnfsstats.srvcache_nonidemdonehits++;
-		NFSUNLOCKCACHE();
+		mtx_unlock(mutex);
 		nd->nd_repstat = 0;
 		if (nd->nd_mreq)
 			mbuf_freem(nd->nd_mreq);
@@ -438,7 +491,7 @@ nfsrvd_updatecache(struct nfsrv_descript
 			panic("reply from cache");
 		nd->nd_mreq = m_copym(rp->rc_reply, 0,
 		    M_COPYALL, M_WAITOK);
-		rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT;
+		rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 		nfsrc_unlock(rp);
 		goto out;
 	}
@@ -463,29 +516,28 @@ nfsrvd_updatecache(struct nfsrv_descript
 		    nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) {
 			rp->rc_status = nd->nd_repstat;
 			rp->rc_flag |= RC_REPSTATUS;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 		} else {
 			if (!(rp->rc_flag & RC_UDP)) {
-			    nfsrc_tcpsavedreplies++;
+			    atomic_add_int(&nfsrc_tcpsavedreplies, 1);
 			    if (nfsrc_tcpsavedreplies >
 				newnfsstats.srvcache_tcppeak)
 				newnfsstats.srvcache_tcppeak =
 				    nfsrc_tcpsavedreplies;
 			}
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK);
-			NFSLOCKCACHE();
+			mtx_lock(mutex);
 			rp->rc_reply = m;
 			rp->rc_flag |= RC_REPMBUF;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 		}
 		if (rp->rc_flag & RC_UDP) {
 			rp->rc_timestamp = NFSD_MONOSEC +
 			    NFSRVCACHE_UDPTIMEOUT;
 			nfsrc_unlock(rp);
 		} else {
-			rp->rc_timestamp = NFSD_MONOSEC +
-			    NFSRVCACHE_TCPTIMEOUT;
+			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 			if (rp->rc_refcnt > 0)
 				nfsrc_unlock(rp);
 			else
@@ -493,7 +545,7 @@ nfsrvd_updatecache(struct nfsrv_descript
 		}
 	} else {
 		nfsrc_freecache(rp);
-		NFSUNLOCKCACHE();
+		mtx_unlock(mutex);
 	}
 
 out:
@@ -509,14 +561,16 @@ out:
 APPLESTATIC void
 nfsrvd_delcache(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(rp);
 	if (!(rp->rc_flag & RC_INPROG))
 		panic("nfsrvd_delcache not in prog");
-	NFSLOCKCACHE();
+	mtx_lock(mutex);
 	rp->rc_flag &= ~RC_INPROG;
 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED))
 		nfsrc_freecache(rp);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
@@ -528,7 +582,9 @@ APPLESTATIC void
 nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err)
 {
 	tcp_seq tmp_seq;
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(rp);
 	if (!(rp->rc_flag & RC_LOCKED))
 		panic("nfsrvd_sentcache not locked");
 	if (!err) {
@@ -537,10 +593,10 @@ nfsrvd_sentcache(struct nfsrvcache *rp, 
 		     so->so_proto->pr_protocol != IPPROTO_TCP)
 			panic("nfs sent cache");
 		if (nfsrv_getsockseqnum(so, &tmp_seq)) {
-			NFSLOCKCACHE();
+			mtx_lock(mutex);
 			rp->rc_tcpseq = tmp_seq;
 			rp->rc_flag |= RC_TCPSEQ;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 		}
 	}
 	nfsrc_unlock(rp);
@@ -559,11 +615,13 @@ nfsrc_gettcp(struct nfsrv_descript *nd, 
 	struct nfsrvcache *hitrp;
 	struct nfsrvhashhead *hp, nfsrc_templist;
 	int hit, ret = 0;
+	struct mtx *mutex;
 
+	mutex = nfsrc_cachemutex(newrp);
 	hp = NFSRCHASH(newrp->rc_xid);
 	newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum);
 tryagain:
-	NFSLOCKCACHE();
+	mtx_lock(mutex);
 	hit = 1;
 	LIST_INIT(&nfsrc_templist);
 	/*
@@ -621,8 +679,8 @@ tryagain:
 		rp = hitrp;
 		if ((rp->rc_flag & RC_LOCKED) != 0) {
 			rp->rc_flag |= RC_WANTED;
-			(void)mtx_sleep(rp, NFSCACHEMUTEXPTR,
-			    (PZERO - 1) | PDROP, "nfsrc", 10 * hz);
+			(void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP,
+			    "nfsrc", 10 * hz);
 			goto tryagain;
 		}
 		if (rp->rc_flag == 0)
@@ -630,7 +688,7 @@ tryagain:
 		rp->rc_flag |= RC_LOCKED;
 		if (rp->rc_flag & RC_INPROG) {
 			newnfsstats.srvcache_inproghits++;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			if (newrp->rc_sockref == rp->rc_sockref)
 				nfsrc_marksametcpconn(rp->rc_sockref);
 			ret = RC_DROPIT;
@@ -639,24 +697,22 @@ tryagain:
 			 * V2 only.
 			 */
 			newnfsstats.srvcache_nonidemdonehits++;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			if (newrp->rc_sockref == rp->rc_sockref)
 				nfsrc_marksametcpconn(rp->rc_sockref);
 			ret = RC_REPLY;
 			nfsrvd_rephead(nd);
 			*(nd->nd_errp) = rp->rc_status;
-			rp->rc_timestamp = NFSD_MONOSEC +
-				NFSRVCACHE_TCPTIMEOUT;
+			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 		} else if (rp->rc_flag & RC_REPMBUF) {
 			newnfsstats.srvcache_nonidemdonehits++;
-			NFSUNLOCKCACHE();
+			mtx_unlock(mutex);
 			if (newrp->rc_sockref == rp->rc_sockref)
 				nfsrc_marksametcpconn(rp->rc_sockref);
 			ret = RC_REPLY;
 			nd->nd_mreq = m_copym(rp->rc_reply, 0,
 				M_COPYALL, M_WAITOK);
-			rp->rc_timestamp = NFSD_MONOSEC +
-				NFSRVCACHE_TCPTIMEOUT;
+			rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout;
 		} else {
 			panic("nfs tcp cache1");
 		}
@@ -665,7 +721,7 @@ tryagain:
 		goto out;
 	}
 	newnfsstats.srvcache_misses++;
-	newnfsstats.srvcache_size++;
+	atomic_add_int(&newnfsstats.srvcache_size, 1);
 
 	/*
 	 * For TCP, multiple entries for a key are allowed, so don't
@@ -674,7 +730,7 @@ tryagain:
 	newrp->rc_cachetime = NFSD_MONOSEC;
 	newrp->rc_flag |= RC_INPROG;
 	LIST_INSERT_HEAD(hp, newrp, rc_hash);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 	nd->nd_rp = newrp;
 	ret = RC_DOIT;
 
@@ -685,16 +741,17 @@ out:
 
 /*
  * Lock a cache entry.
- * Also puts a mutex lock on the cache list.
  */
 static void
 nfsrc_lock(struct nfsrvcache *rp)
 {
-	NFSCACHELOCKREQUIRED();
+	struct mtx *mutex;
+
+	mutex = nfsrc_cachemutex(rp);
+	mtx_assert(mutex, MA_OWNED);
 	while ((rp->rc_flag & RC_LOCKED) != 0) {
 		rp->rc_flag |= RC_WANTED;
-		(void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1,
-		    "nfsrc", 0);
+		(void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0);
 	}
 	rp->rc_flag |= RC_LOCKED;
 }
@@ -705,11 +762,13 @@ nfsrc_lock(struct nfsrvcache *rp)
 static void
 nfsrc_unlock(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	rp->rc_flag &= ~RC_LOCKED;
 	nfsrc_wanted(rp);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
@@ -732,7 +791,6 @@ static void
 nfsrc_freecache(struct nfsrvcache *rp)
 {
 
-	NFSCACHELOCKREQUIRED();
 	LIST_REMOVE(rp, rc_hash);
 	if (rp->rc_flag & RC_UDP) {
 		TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru);
@@ -742,10 +800,10 @@ nfsrc_freecache(struct nfsrvcache *rp)
 	if (rp->rc_flag & RC_REPMBUF) {
 		mbuf_freem(rp->rc_reply);
 		if (!(rp->rc_flag & RC_UDP))
-			nfsrc_tcpsavedreplies--;
+			atomic_add_int(&nfsrc_tcpsavedreplies, -1);
 	}
 	FREE((caddr_t)rp, M_NFSRVCACHE);
-	newnfsstats.srvcache_size--;
+	atomic_add_int(&newnfsstats.srvcache_size, -1);
 }
 
 /*
@@ -757,20 +815,21 @@ nfsrvd_cleancache(void)
 	struct nfsrvcache *rp, *nextrp;
 	int i;
 
-	NFSLOCKCACHE();
 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
-		LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
+		mtx_lock(&nfsrchash_table[i].mtx);
+		LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp)
 			nfsrc_freecache(rp);
-		}
+		mtx_unlock(&nfsrchash_table[i].mtx);
 	}
+	mtx_lock(&nfsrc_udpmtx);
 	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
 		LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) {
 			nfsrc_freecache(rp);
 		}
 	}
 	newnfsstats.srvcache_size = 0;
+	mtx_unlock(&nfsrc_udpmtx);
 	nfsrc_tcpsavedreplies = 0;
-	NFSUNLOCKCACHE();
 }
 
 /*
@@ -780,28 +839,97 @@ static void
 nfsrc_trimcache(u_int64_t sockref, struct socket *so)
 {
 	struct nfsrvcache *rp, *nextrp;
-	int i;
+	int i, j, k, time_histo[10];
+	time_t thisstamp;
+	static time_t udp_lasttrim = 0, tcp_lasttrim = 0;
+	static int onethread = 0;
 
-	NFSLOCKCACHE();
-	TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
-		if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
-		     && rp->rc_refcnt == 0
-		     && ((rp->rc_flag & RC_REFCNT) ||
-			 NFSD_MONOSEC > rp->rc_timestamp ||
-			 nfsrc_udpcachesize > nfsrc_udphighwater))
-			nfsrc_freecache(rp);
-	}
-	for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
-		LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) {
+	if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0)
+		return;
+	if (NFSD_MONOSEC != udp_lasttrim ||
+	    nfsrc_udpcachesize >= (nfsrc_udphighwater +
+	    nfsrc_udphighwater / 2)) {
+		mtx_lock(&nfsrc_udpmtx);
+		udp_lasttrim = NFSD_MONOSEC;
+		TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) {
 			if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED))
 			     && rp->rc_refcnt == 0
 			     && ((rp->rc_flag & RC_REFCNT) ||
-				 NFSD_MONOSEC > rp->rc_timestamp ||
-				 nfsrc_activesocket(rp, sockref, so)))
+				 udp_lasttrim > rp->rc_timestamp ||
+				 nfsrc_udpcachesize > nfsrc_udphighwater))
 				nfsrc_freecache(rp);
 		}
+		mtx_unlock(&nfsrc_udpmtx);
+	}
+	if (NFSD_MONOSEC != tcp_lasttrim ||
+	    nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) {
+		for (i = 0; i < 10; i++)
+			time_histo[i] = 0;
+		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+			mtx_lock(&nfsrchash_table[i].mtx);
+			if (i == 0)
+				tcp_lasttrim = NFSD_MONOSEC;
+			LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash,
+			    nextrp) {
+				if (!(rp->rc_flag &
+				     (RC_INPROG|RC_LOCKED|RC_WANTED))
+				     && rp->rc_refcnt == 0) {
+					/*
+					 * The timestamps range from roughly the
+					 * present (tcp_lasttrim) to the present
+					 * + nfsrc_tcptimeout. Generate a simple
+					 * histogram of where the timeouts fall.
+					 */
+					j = rp->rc_timestamp - tcp_lasttrim;
+					if (j >= nfsrc_tcptimeout)
+						j = nfsrc_tcptimeout - 1;
+					if (j < 0)
+						j = 0;
+					j = (j * 10 / nfsrc_tcptimeout) % 10;
+					time_histo[j]++;
+					if ((rp->rc_flag & RC_REFCNT) ||
+					    tcp_lasttrim > rp->rc_timestamp ||
+					    nfsrc_activesocket(rp, sockref, so))
+						nfsrc_freecache(rp);
+				}
+			}
+			mtx_unlock(&nfsrchash_table[i].mtx);
+		}
+		j = nfsrc_tcphighwater / 5;	/* 20% of it */
+		if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) {
+			/*
+			 * Trim some more with a smaller timeout of as little
+			 * as 20% of nfsrc_tcptimeout to try and get below
+			 * 80% of the nfsrc_tcphighwater.
+			 */
+			k = 0;
+			for (i = 0; i < 8; i++) {
+				k += time_histo[i];
+				if (k > j)
+					break;
+			}
+			k = nfsrc_tcptimeout * (i + 1) / 10;
+			if (k < 1)
+				k = 1;
+			thisstamp = tcp_lasttrim + k;
+			for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+				mtx_lock(&nfsrchash_table[i].mtx);
+				LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl,
+				    rc_hash, nextrp) {
+					if (!(rp->rc_flag &
+					     (RC_INPROG|RC_LOCKED|RC_WANTED))
+					     && rp->rc_refcnt == 0
+					     && ((rp->rc_flag & RC_REFCNT) ||
+						 thisstamp > rp->rc_timestamp ||
+						 nfsrc_activesocket(rp, sockref,
+						    so)))
+						nfsrc_freecache(rp);
+				}
+				mtx_unlock(&nfsrchash_table[i].mtx);
+			}
+		}
 	}
-	NFSUNLOCKCACHE();
+	atomic_store_rel_int(&onethread, 0);
 }
 
 /*
@@ -810,12 +938,14 @@ nfsrc_trimcache(u_int64_t sockref, struc
 APPLESTATIC void
 nfsrvd_refcache(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	if (rp->rc_refcnt < 0)
 		panic("nfs cache refcnt");
 	rp->rc_refcnt++;
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*
@@ -824,14 +954,16 @@ nfsrvd_refcache(struct nfsrvcache *rp)
 APPLESTATIC void
 nfsrvd_derefcache(struct nfsrvcache *rp)
 {
+	struct mtx *mutex;
 
-	NFSLOCKCACHE();
+	mutex = nfsrc_cachemutex(rp);
+	mtx_lock(mutex);
 	if (rp->rc_refcnt <= 0)
 		panic("nfs cache derefcnt");
 	rp->rc_refcnt--;
 	if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG)))
 		nfsrc_freecache(rp);
-	NFSUNLOCKCACHE();
+	mtx_unlock(mutex);
 }
 
 /*

Modified: head/sys/fs/nfsserver/nfs_nfsdport.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c	Wed Aug 14 20:20:42 2013	(r254336)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c	Wed Aug 14 21:11:26 2013	(r254337)
@@ -60,7 +60,8 @@ extern SVCPOOL	*nfsrvd_pool;
 extern struct nfsv4lock nfsd_suspend_lock;
 struct vfsoptlist nfsv4root_opt, nfsv4root_newopt;
 NFSDLOCKMUTEX;
-struct mtx nfs_cache_mutex;
+struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE];
+struct mtx nfsrc_udpmtx;
 struct mtx nfs_v4root_mutex;
 struct nfsrvfh nfs_rootfh, nfs_pubfh;
 int nfs_pubfhset = 0, nfs_rootfhset = 0;
@@ -3278,7 +3279,7 @@ extern int (*nfsd_call_nfsd)(struct thre
 static int
 nfsd_modevent(module_t mod, int type, void *data)
 {
-	int error = 0;
+	int error = 0, i;
 	static int loaded = 0;
 
 	switch (type) {
@@ -3286,7 +3287,14 @@ nfsd_modevent(module_t mod, int type, vo
 		if (loaded)
 			goto out;
 		newnfs_portinit();
-		mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF);
+		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) {
+			snprintf(nfsrchash_table[i].lock_name,
+			    sizeof(nfsrchash_table[i].lock_name), "nfsrc_tcp%d",
+			    i);
+			mtx_init(&nfsrchash_table[i].mtx,
+			    nfsrchash_table[i].lock_name, NULL, MTX_DEF);
+		}
+		mtx_init(&nfsrc_udpmtx, "nfs_udpcache_mutex", NULL, MTX_DEF);
 		mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF);
 		mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL,
 		    MTX_DEF);
@@ -3330,7 +3338,9 @@ nfsd_modevent(module_t mod, int type, vo
 			svcpool_destroy(nfsrvd_pool);
 
 		/* and get rid of the locks */
-		mtx_destroy(&nfs_cache_mutex);
+		for (i = 0; i < NFSRVCACHE_HASHSIZE; i++)
+			mtx_destroy(&nfsrchash_table[i].mtx);
+		mtx_destroy(&nfsrc_udpmtx);
 		mtx_destroy(&nfs_v4root_mutex);
 		mtx_destroy(&nfsv4root_mnt.mnt_mtx);
 		lockdestroy(&nfsv4root_mnt.mnt_explock);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201308142111.r7ELBQfC044549>