From owner-svn-src-stable@FreeBSD.ORG Fri Sep 13 23:10:54 2013 Return-Path: Delivered-To: svn-src-stable@freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) (using TLSv1 with cipher ADH-AES256-SHA (256/256 bits)) (No client certificate requested) by hub.freebsd.org (Postfix) with ESMTP id C96C7671; Fri, 13 Sep 2013 23:10:54 +0000 (UTC) (envelope-from rmacklem@FreeBSD.org) Received: from svn.freebsd.org (svn.freebsd.org [IPv6:2001:1900:2254:2068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.freebsd.org (Postfix) with ESMTPS id A71F52FED; Fri, 13 Sep 2013 23:10:54 +0000 (UTC) Received: from svn.freebsd.org ([127.0.1.70]) by svn.freebsd.org (8.14.7/8.14.7) with ESMTP id r8DNAs8u044549; Fri, 13 Sep 2013 23:10:54 GMT (envelope-from rmacklem@svn.freebsd.org) Received: (from rmacklem@localhost) by svn.freebsd.org (8.14.7/8.14.5/Submit) id r8DNArPb044542; Fri, 13 Sep 2013 23:10:53 GMT (envelope-from rmacklem@svn.freebsd.org) Message-Id: <201309132310.r8DNArPb044542@svn.freebsd.org> From: Rick Macklem Date: Fri, 13 Sep 2013 23:10:53 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-9@freebsd.org Subject: svn commit: r255532 - in stable/9/sys/fs: nfs nfsserver X-SVN-Group: stable-9 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 13 Sep 2013 23:10:55 -0000 Author: rmacklem Date: Fri Sep 13 23:10:53 2013 New Revision: 255532 URL: http://svnweb.freebsd.org/changeset/base/255532 Log: MFC: r254337 Fix several performance related issues in the new NFS server's DRC for NFS over TCP. - Increase the size of the hash tables. - Create a separate mutex for each hash list of the TCP hash table. - Single thread the code that deletes stale cache entries. - Add a tunable called vfs.nfsd.tcphighwater, which can be increased to allow the cache to grow larger, avoiding the overhead of frequent scans to delete stale cache entries. (The default value will result in frequent scans to delete stale cache entries, analagous to what the pre-patched code does.) - Add a tunable called vfs.nfsd.cachetcp that can be used to disable DRC caching for NFS over TCP, since the old NFS server didn't DRC cache TCP. It also adjusts the size of nfsrc_floodlevel dynamically, so that it is always greater than vfs.nfsd.tcphighwater. For UDP the algorithm remains the same as the pre-patched code, but the tunable vfs.nfsd.udphighwater can be used to allow the cache to grow larger and reduce the overhead caused by frequent scans for stale entries. UDP also uses a larger hash table size than the pre-patched code. Modified: stable/9/sys/fs/nfs/nfsport.h stable/9/sys/fs/nfs/nfsrvcache.h stable/9/sys/fs/nfsserver/nfs_nfsdcache.c stable/9/sys/fs/nfsserver/nfs_nfsdport.c Directory Properties: stable/9/sys/ (props changed) stable/9/sys/fs/ (props changed) Modified: stable/9/sys/fs/nfs/nfsport.h ============================================================================== --- stable/9/sys/fs/nfs/nfsport.h Fri Sep 13 21:23:04 2013 (r255531) +++ stable/9/sys/fs/nfs/nfsport.h Fri Sep 13 23:10:53 2013 (r255532) @@ -541,11 +541,6 @@ void nfsrvd_rcv(struct socket *, void *, #define NFSREQSPINLOCK extern struct mtx nfs_req_mutex #define NFSLOCKREQ() mtx_lock(&nfs_req_mutex) #define NFSUNLOCKREQ() mtx_unlock(&nfs_req_mutex) -#define NFSCACHEMUTEX extern struct mtx nfs_cache_mutex -#define NFSCACHEMUTEXPTR (&nfs_cache_mutex) -#define NFSLOCKCACHE() mtx_lock(&nfs_cache_mutex) -#define NFSUNLOCKCACHE() mtx_unlock(&nfs_cache_mutex) -#define NFSCACHELOCKREQUIRED() mtx_assert(&nfs_cache_mutex, MA_OWNED) #define NFSSOCKMUTEX extern struct mtx nfs_slock_mutex #define NFSSOCKMUTEXPTR (&nfs_slock_mutex) #define NFSLOCKSOCK() mtx_lock(&nfs_slock_mutex) Modified: stable/9/sys/fs/nfs/nfsrvcache.h ============================================================================== --- stable/9/sys/fs/nfs/nfsrvcache.h Fri Sep 13 21:23:04 2013 (r255531) +++ stable/9/sys/fs/nfs/nfsrvcache.h Fri Sep 13 23:10:53 2013 (r255532) @@ -41,8 +41,9 @@ #define NFSRVCACHE_MAX_SIZE 2048 #define NFSRVCACHE_MIN_SIZE 64 -#define NFSRVCACHE_HASHSIZE 20 +#define NFSRVCACHE_HASHSIZE 500 +/* Cache table entry. */ struct nfsrvcache { LIST_ENTRY(nfsrvcache) rc_hash; /* Hash chain */ TAILQ_ENTRY(nfsrvcache) rc_lru; /* UDP lru chain */ @@ -104,4 +105,11 @@ struct nfsrvcache { LIST_HEAD(nfsrvhashhead, nfsrvcache); +/* The fine-grained locked cache hash table for TCP. */ +struct nfsrchash_bucket { + struct mtx mtx; + char lock_name[16]; + struct nfsrvhashhead tbl; +}; + #endif /* _NFS_NFSRVCACHE_H_ */ Modified: stable/9/sys/fs/nfsserver/nfs_nfsdcache.c ============================================================================== --- stable/9/sys/fs/nfsserver/nfs_nfsdcache.c Fri Sep 13 21:23:04 2013 (r255531) +++ stable/9/sys/fs/nfsserver/nfs_nfsdcache.c Fri Sep 13 23:10:53 2013 (r255532) @@ -160,15 +160,51 @@ __FBSDID("$FreeBSD$"); #include extern struct nfsstats newnfsstats; -NFSCACHEMUTEX; +extern struct mtx nfsrc_udpmtx; +extern struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; int nfsrc_floodlevel = NFSRVCACHE_FLOODLEVEL, nfsrc_tcpsavedreplies = 0; #endif /* !APPLEKEXT */ -static int nfsrc_tcpnonidempotent = 1; -static int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER, nfsrc_udpcachesize = 0; +SYSCTL_DECL(_vfs_nfsd); + +static u_int nfsrc_tcphighwater = 0; +static int +sysctl_tcphighwater(SYSCTL_HANDLER_ARGS) +{ + int error, newhighwater; + + newhighwater = nfsrc_tcphighwater; + error = sysctl_handle_int(oidp, &newhighwater, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (newhighwater < 0) + return (EINVAL); + if (newhighwater >= nfsrc_floodlevel) + nfsrc_floodlevel = newhighwater + newhighwater / 5; + nfsrc_tcphighwater = newhighwater; + return (0); +} +SYSCTL_PROC(_vfs_nfsd, OID_AUTO, tcphighwater, CTLTYPE_UINT | CTLFLAG_RW, 0, + sizeof(nfsrc_tcphighwater), sysctl_tcphighwater, "IU", + "High water mark for TCP cache entries"); + +static u_int nfsrc_udphighwater = NFSRVCACHE_UDPHIGHWATER; +SYSCTL_UINT(_vfs_nfsd, OID_AUTO, udphighwater, CTLFLAG_RW, + &nfsrc_udphighwater, 0, + "High water mark for UDP cache entries"); +static u_int nfsrc_tcptimeout = NFSRVCACHE_TCPTIMEOUT; +SYSCTL_UINT(_vfs_nfsd, OID_AUTO, tcpcachetimeo, CTLFLAG_RW, + &nfsrc_tcptimeout, 0, + "Timeout for TCP entries in the DRC"); +static u_int nfsrc_tcpnonidempotent = 1; +SYSCTL_UINT(_vfs_nfsd, OID_AUTO, cachetcp, CTLFLAG_RW, + &nfsrc_tcpnonidempotent, 0, + "Enable the DRC for NFS over TCP"); + +static int nfsrc_udpcachesize = 0; static TAILQ_HEAD(, nfsrvcache) nfsrvudplru; -static struct nfsrvhashhead nfsrvhashtbl[NFSRVCACHE_HASHSIZE], - nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; +static struct nfsrvhashhead nfsrvudphashtbl[NFSRVCACHE_HASHSIZE]; + /* * and the reverse mapping from generic to Version 2 procedure numbers */ @@ -197,10 +233,11 @@ static int newnfsv2_procid[NFS_V3NPROCS] NFSV2PROC_NOOP, }; +#define nfsrc_hash(xid) (((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE) #define NFSRCUDPHASH(xid) \ - (&nfsrvudphashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE]) + (&nfsrvudphashtbl[nfsrc_hash(xid)]) #define NFSRCHASH(xid) \ - (&nfsrvhashtbl[((xid) + ((xid) >> 24)) % NFSRVCACHE_HASHSIZE]) + (&nfsrchash_table[nfsrc_hash(xid)].tbl) #define TRUE 1 #define FALSE 0 #define NFSRVCACHE_CHECKLEN 100 @@ -251,6 +288,18 @@ static int nfsrc_getlenandcksum(mbuf_t m static void nfsrc_marksametcpconn(u_int64_t); /* + * Return the correct mutex for this cache entry. + */ +static __inline struct mtx * +nfsrc_cachemutex(struct nfsrvcache *rp) +{ + + if ((rp->rc_flag & RC_UDP) != 0) + return (&nfsrc_udpmtx); + return (&nfsrchash_table[nfsrc_hash(rp->rc_xid)].mtx); +} + +/* * Initialize the server request cache list */ APPLESTATIC void @@ -264,7 +313,7 @@ nfsrvd_initcache(void) inited = 1; for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { LIST_INIT(&nfsrvudphashtbl[i]); - LIST_INIT(&nfsrvhashtbl[i]); + LIST_INIT(&nfsrchash_table[i].tbl); } TAILQ_INIT(&nfsrvudplru); nfsrc_tcpsavedreplies = 0; @@ -325,10 +374,12 @@ nfsrc_getudp(struct nfsrv_descript *nd, struct sockaddr_in6 *saddr6; struct nfsrvhashhead *hp; int ret = 0; + struct mtx *mutex; + mutex = nfsrc_cachemutex(newrp); hp = NFSRCUDPHASH(newrp->rc_xid); loop: - NFSLOCKCACHE(); + mtx_lock(mutex); LIST_FOREACH(rp, hp, rc_hash) { if (newrp->rc_xid == rp->rc_xid && newrp->rc_proc == rp->rc_proc && @@ -336,8 +387,8 @@ loop: nfsaddr_match(NETFAMILY(rp), &rp->rc_haddr, nd->nd_nam)) { if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, - (PZERO - 1) | PDROP, "nfsrc", 10 * hz); + (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, + "nfsrc", 10 * hz); goto loop; } if (rp->rc_flag == 0) @@ -347,14 +398,14 @@ loop: TAILQ_INSERT_TAIL(&nfsrvudplru, rp, rc_lru); if (rp->rc_flag & RC_INPROG) { newnfsstats.srvcache_inproghits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); ret = RC_DROPIT; } else if (rp->rc_flag & RC_REPSTATUS) { /* * V2 only. */ newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; ret = RC_REPLY; @@ -362,7 +413,7 @@ loop: NFSRVCACHE_UDPTIMEOUT; } else if (rp->rc_flag & RC_REPMBUF) { newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAIT); ret = RC_REPLY; @@ -377,7 +428,7 @@ loop: } } newnfsstats.srvcache_misses++; - newnfsstats.srvcache_size++; + atomic_add_int(&newnfsstats.srvcache_size, 1); nfsrc_udpcachesize++; newrp->rc_flag |= RC_INPROG; @@ -392,7 +443,7 @@ loop: } LIST_INSERT_HEAD(hp, newrp, rc_hash); TAILQ_INSERT_TAIL(&nfsrvudplru, newrp, rc_lru); - NFSUNLOCKCACHE(); + mtx_unlock(mutex); nd->nd_rp = newrp; ret = RC_DOIT; @@ -410,12 +461,14 @@ nfsrvd_updatecache(struct nfsrv_descript struct nfsrvcache *rp; struct nfsrvcache *retrp = NULL; mbuf_t m; + struct mtx *mutex; rp = nd->nd_rp; if (!rp) panic("nfsrvd_updatecache null rp"); nd->nd_rp = NULL; - NFSLOCKCACHE(); + mutex = nfsrc_cachemutex(rp); + mtx_lock(mutex); nfsrc_lock(rp); if (!(rp->rc_flag & RC_INPROG)) panic("nfsrvd_updatecache not inprog"); @@ -430,7 +483,7 @@ nfsrvd_updatecache(struct nfsrv_descript */ if (nd->nd_repstat == NFSERR_REPLYFROMCACHE) { newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); nd->nd_repstat = 0; if (nd->nd_mreq) mbuf_freem(nd->nd_mreq); @@ -438,7 +491,7 @@ nfsrvd_updatecache(struct nfsrv_descript panic("reply from cache"); nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAIT); - rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_TCPTIMEOUT; + rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; nfsrc_unlock(rp); goto out; } @@ -463,29 +516,28 @@ nfsrvd_updatecache(struct nfsrv_descript nfsv2_repstat[newnfsv2_procid[nd->nd_procnum]]) { rp->rc_status = nd->nd_repstat; rp->rc_flag |= RC_REPSTATUS; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } else { if (!(rp->rc_flag & RC_UDP)) { - nfsrc_tcpsavedreplies++; + atomic_add_int(&nfsrc_tcpsavedreplies, 1); if (nfsrc_tcpsavedreplies > newnfsstats.srvcache_tcppeak) newnfsstats.srvcache_tcppeak = nfsrc_tcpsavedreplies; } - NFSUNLOCKCACHE(); + mtx_unlock(mutex); m = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAIT); - NFSLOCKCACHE(); + mtx_lock(mutex); rp->rc_reply = m; rp->rc_flag |= RC_REPMBUF; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } if (rp->rc_flag & RC_UDP) { rp->rc_timestamp = NFSD_MONOSEC + NFSRVCACHE_UDPTIMEOUT; nfsrc_unlock(rp); } else { - rp->rc_timestamp = NFSD_MONOSEC + - NFSRVCACHE_TCPTIMEOUT; + rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; if (rp->rc_refcnt > 0) nfsrc_unlock(rp); else @@ -493,7 +545,7 @@ nfsrvd_updatecache(struct nfsrv_descript } } else { nfsrc_freecache(rp); - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } out: @@ -509,14 +561,16 @@ out: APPLESTATIC void nfsrvd_delcache(struct nfsrvcache *rp) { + struct mtx *mutex; + mutex = nfsrc_cachemutex(rp); if (!(rp->rc_flag & RC_INPROG)) panic("nfsrvd_delcache not in prog"); - NFSLOCKCACHE(); + mtx_lock(mutex); rp->rc_flag &= ~RC_INPROG; if (rp->rc_refcnt == 0 && !(rp->rc_flag & RC_LOCKED)) nfsrc_freecache(rp); - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } /* @@ -528,7 +582,9 @@ APPLESTATIC void nfsrvd_sentcache(struct nfsrvcache *rp, struct socket *so, int err) { tcp_seq tmp_seq; + struct mtx *mutex; + mutex = nfsrc_cachemutex(rp); if (!(rp->rc_flag & RC_LOCKED)) panic("nfsrvd_sentcache not locked"); if (!err) { @@ -537,10 +593,10 @@ nfsrvd_sentcache(struct nfsrvcache *rp, so->so_proto->pr_protocol != IPPROTO_TCP) panic("nfs sent cache"); if (nfsrv_getsockseqnum(so, &tmp_seq)) { - NFSLOCKCACHE(); + mtx_lock(mutex); rp->rc_tcpseq = tmp_seq; rp->rc_flag |= RC_TCPSEQ; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } } nfsrc_unlock(rp); @@ -559,11 +615,13 @@ nfsrc_gettcp(struct nfsrv_descript *nd, struct nfsrvcache *hitrp; struct nfsrvhashhead *hp, nfsrc_templist; int hit, ret = 0; + struct mtx *mutex; + mutex = nfsrc_cachemutex(newrp); hp = NFSRCHASH(newrp->rc_xid); newrp->rc_reqlen = nfsrc_getlenandcksum(nd->nd_mrep, &newrp->rc_cksum); tryagain: - NFSLOCKCACHE(); + mtx_lock(mutex); hit = 1; LIST_INIT(&nfsrc_templist); /* @@ -621,8 +679,8 @@ tryagain: rp = hitrp; if ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, - (PZERO - 1) | PDROP, "nfsrc", 10 * hz); + (void)mtx_sleep(rp, mutex, (PZERO - 1) | PDROP, + "nfsrc", 10 * hz); goto tryagain; } if (rp->rc_flag == 0) @@ -630,7 +688,7 @@ tryagain: rp->rc_flag |= RC_LOCKED; if (rp->rc_flag & RC_INPROG) { newnfsstats.srvcache_inproghits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_DROPIT; @@ -639,24 +697,22 @@ tryagain: * V2 only. */ newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_REPLY; nfsrvd_rephead(nd); *(nd->nd_errp) = rp->rc_status; - rp->rc_timestamp = NFSD_MONOSEC + - NFSRVCACHE_TCPTIMEOUT; + rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else if (rp->rc_flag & RC_REPMBUF) { newnfsstats.srvcache_nonidemdonehits++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); if (newrp->rc_sockref == rp->rc_sockref) nfsrc_marksametcpconn(rp->rc_sockref); ret = RC_REPLY; nd->nd_mreq = m_copym(rp->rc_reply, 0, M_COPYALL, M_WAIT); - rp->rc_timestamp = NFSD_MONOSEC + - NFSRVCACHE_TCPTIMEOUT; + rp->rc_timestamp = NFSD_MONOSEC + nfsrc_tcptimeout; } else { panic("nfs tcp cache1"); } @@ -665,7 +721,7 @@ tryagain: goto out; } newnfsstats.srvcache_misses++; - newnfsstats.srvcache_size++; + atomic_add_int(&newnfsstats.srvcache_size, 1); /* * For TCP, multiple entries for a key are allowed, so don't @@ -674,7 +730,7 @@ tryagain: newrp->rc_cachetime = NFSD_MONOSEC; newrp->rc_flag |= RC_INPROG; LIST_INSERT_HEAD(hp, newrp, rc_hash); - NFSUNLOCKCACHE(); + mtx_unlock(mutex); nd->nd_rp = newrp; ret = RC_DOIT; @@ -685,16 +741,17 @@ out: /* * Lock a cache entry. - * Also puts a mutex lock on the cache list. */ static void nfsrc_lock(struct nfsrvcache *rp) { - NFSCACHELOCKREQUIRED(); + struct mtx *mutex; + + mutex = nfsrc_cachemutex(rp); + mtx_assert(mutex, MA_OWNED); while ((rp->rc_flag & RC_LOCKED) != 0) { rp->rc_flag |= RC_WANTED; - (void)mtx_sleep(rp, NFSCACHEMUTEXPTR, PZERO - 1, - "nfsrc", 0); + (void)mtx_sleep(rp, mutex, PZERO - 1, "nfsrc", 0); } rp->rc_flag |= RC_LOCKED; } @@ -705,11 +762,13 @@ nfsrc_lock(struct nfsrvcache *rp) static void nfsrc_unlock(struct nfsrvcache *rp) { + struct mtx *mutex; - NFSLOCKCACHE(); + mutex = nfsrc_cachemutex(rp); + mtx_lock(mutex); rp->rc_flag &= ~RC_LOCKED; nfsrc_wanted(rp); - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } /* @@ -732,7 +791,6 @@ static void nfsrc_freecache(struct nfsrvcache *rp) { - NFSCACHELOCKREQUIRED(); LIST_REMOVE(rp, rc_hash); if (rp->rc_flag & RC_UDP) { TAILQ_REMOVE(&nfsrvudplru, rp, rc_lru); @@ -742,10 +800,10 @@ nfsrc_freecache(struct nfsrvcache *rp) if (rp->rc_flag & RC_REPMBUF) { mbuf_freem(rp->rc_reply); if (!(rp->rc_flag & RC_UDP)) - nfsrc_tcpsavedreplies--; + atomic_add_int(&nfsrc_tcpsavedreplies, -1); } FREE((caddr_t)rp, M_NFSRVCACHE); - newnfsstats.srvcache_size--; + atomic_add_int(&newnfsstats.srvcache_size, -1); } /* @@ -757,20 +815,21 @@ nfsrvd_cleancache(void) struct nfsrvcache *rp, *nextrp; int i; - NFSLOCKCACHE(); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) { + mtx_lock(&nfsrchash_table[i].mtx); + LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, nextrp) nfsrc_freecache(rp); - } + mtx_unlock(&nfsrchash_table[i].mtx); } + mtx_lock(&nfsrc_udpmtx); for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { LIST_FOREACH_SAFE(rp, &nfsrvudphashtbl[i], rc_hash, nextrp) { nfsrc_freecache(rp); } } newnfsstats.srvcache_size = 0; + mtx_unlock(&nfsrc_udpmtx); nfsrc_tcpsavedreplies = 0; - NFSUNLOCKCACHE(); } /* @@ -780,28 +839,97 @@ static void nfsrc_trimcache(u_int64_t sockref, struct socket *so) { struct nfsrvcache *rp, *nextrp; - int i; + int i, j, k, time_histo[10]; + time_t thisstamp; + static time_t udp_lasttrim = 0, tcp_lasttrim = 0; + static int onethread = 0; - NFSLOCKCACHE(); - TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { - if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) - && rp->rc_refcnt == 0 - && ((rp->rc_flag & RC_REFCNT) || - NFSD_MONOSEC > rp->rc_timestamp || - nfsrc_udpcachesize > nfsrc_udphighwater)) - nfsrc_freecache(rp); - } - for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { - LIST_FOREACH_SAFE(rp, &nfsrvhashtbl[i], rc_hash, nextrp) { + if (atomic_cmpset_acq_int(&onethread, 0, 1) == 0) + return; + if (NFSD_MONOSEC != udp_lasttrim || + nfsrc_udpcachesize >= (nfsrc_udphighwater + + nfsrc_udphighwater / 2)) { + mtx_lock(&nfsrc_udpmtx); + udp_lasttrim = NFSD_MONOSEC; + TAILQ_FOREACH_SAFE(rp, &nfsrvudplru, rc_lru, nextrp) { if (!(rp->rc_flag & (RC_INPROG|RC_LOCKED|RC_WANTED)) && rp->rc_refcnt == 0 && ((rp->rc_flag & RC_REFCNT) || - NFSD_MONOSEC > rp->rc_timestamp || - nfsrc_activesocket(rp, sockref, so))) + udp_lasttrim > rp->rc_timestamp || + nfsrc_udpcachesize > nfsrc_udphighwater)) nfsrc_freecache(rp); } + mtx_unlock(&nfsrc_udpmtx); + } + if (NFSD_MONOSEC != tcp_lasttrim || + nfsrc_tcpsavedreplies >= nfsrc_tcphighwater) { + for (i = 0; i < 10; i++) + time_histo[i] = 0; + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { + mtx_lock(&nfsrchash_table[i].mtx); + if (i == 0) + tcp_lasttrim = NFSD_MONOSEC; + LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, rc_hash, + nextrp) { + if (!(rp->rc_flag & + (RC_INPROG|RC_LOCKED|RC_WANTED)) + && rp->rc_refcnt == 0) { + /* + * The timestamps range from roughly the + * present (tcp_lasttrim) to the present + * + nfsrc_tcptimeout. Generate a simple + * histogram of where the timeouts fall. + */ + j = rp->rc_timestamp - tcp_lasttrim; + if (j >= nfsrc_tcptimeout) + j = nfsrc_tcptimeout - 1; + if (j < 0) + j = 0; + j = (j * 10 / nfsrc_tcptimeout) % 10; + time_histo[j]++; + if ((rp->rc_flag & RC_REFCNT) || + tcp_lasttrim > rp->rc_timestamp || + nfsrc_activesocket(rp, sockref, so)) + nfsrc_freecache(rp); + } + } + mtx_unlock(&nfsrchash_table[i].mtx); + } + j = nfsrc_tcphighwater / 5; /* 20% of it */ + if (j > 0 && (nfsrc_tcpsavedreplies + j) > nfsrc_tcphighwater) { + /* + * Trim some more with a smaller timeout of as little + * as 20% of nfsrc_tcptimeout to try and get below + * 80% of the nfsrc_tcphighwater. + */ + k = 0; + for (i = 0; i < 8; i++) { + k += time_histo[i]; + if (k > j) + break; + } + k = nfsrc_tcptimeout * (i + 1) / 10; + if (k < 1) + k = 1; + thisstamp = tcp_lasttrim + k; + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { + mtx_lock(&nfsrchash_table[i].mtx); + LIST_FOREACH_SAFE(rp, &nfsrchash_table[i].tbl, + rc_hash, nextrp) { + if (!(rp->rc_flag & + (RC_INPROG|RC_LOCKED|RC_WANTED)) + && rp->rc_refcnt == 0 + && ((rp->rc_flag & RC_REFCNT) || + thisstamp > rp->rc_timestamp || + nfsrc_activesocket(rp, sockref, + so))) + nfsrc_freecache(rp); + } + mtx_unlock(&nfsrchash_table[i].mtx); + } + } } - NFSUNLOCKCACHE(); + atomic_store_rel_int(&onethread, 0); } /* @@ -810,12 +938,14 @@ nfsrc_trimcache(u_int64_t sockref, struc APPLESTATIC void nfsrvd_refcache(struct nfsrvcache *rp) { + struct mtx *mutex; - NFSLOCKCACHE(); + mutex = nfsrc_cachemutex(rp); + mtx_lock(mutex); if (rp->rc_refcnt < 0) panic("nfs cache refcnt"); rp->rc_refcnt++; - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } /* @@ -824,14 +954,16 @@ nfsrvd_refcache(struct nfsrvcache *rp) APPLESTATIC void nfsrvd_derefcache(struct nfsrvcache *rp) { + struct mtx *mutex; - NFSLOCKCACHE(); + mutex = nfsrc_cachemutex(rp); + mtx_lock(mutex); if (rp->rc_refcnt <= 0) panic("nfs cache derefcnt"); rp->rc_refcnt--; if (rp->rc_refcnt == 0 && !(rp->rc_flag & (RC_LOCKED | RC_INPROG))) nfsrc_freecache(rp); - NFSUNLOCKCACHE(); + mtx_unlock(mutex); } /* Modified: stable/9/sys/fs/nfsserver/nfs_nfsdport.c ============================================================================== --- stable/9/sys/fs/nfsserver/nfs_nfsdport.c Fri Sep 13 21:23:04 2013 (r255531) +++ stable/9/sys/fs/nfsserver/nfs_nfsdport.c Fri Sep 13 23:10:53 2013 (r255532) @@ -60,7 +60,8 @@ extern SVCPOOL *nfsrvd_pool; extern struct nfsv4lock nfsd_suspend_lock; struct vfsoptlist nfsv4root_opt, nfsv4root_newopt; NFSDLOCKMUTEX; -struct mtx nfs_cache_mutex; +struct nfsrchash_bucket nfsrchash_table[NFSRVCACHE_HASHSIZE]; +struct mtx nfsrc_udpmtx; struct mtx nfs_v4root_mutex; struct nfsrvfh nfs_rootfh, nfs_pubfh; int nfs_pubfhset = 0, nfs_rootfhset = 0; @@ -3291,7 +3292,7 @@ extern int (*nfsd_call_nfsd)(struct thre static int nfsd_modevent(module_t mod, int type, void *data) { - int error = 0; + int error = 0, i; static int loaded = 0; switch (type) { @@ -3299,7 +3300,14 @@ nfsd_modevent(module_t mod, int type, vo if (loaded) goto out; newnfs_portinit(); - mtx_init(&nfs_cache_mutex, "nfs_cache_mutex", NULL, MTX_DEF); + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) { + snprintf(nfsrchash_table[i].lock_name, + sizeof(nfsrchash_table[i].lock_name), "nfsrc_tcp%d", + i); + mtx_init(&nfsrchash_table[i].mtx, + nfsrchash_table[i].lock_name, NULL, MTX_DEF); + } + mtx_init(&nfsrc_udpmtx, "nfs_udpcache_mutex", NULL, MTX_DEF); mtx_init(&nfs_v4root_mutex, "nfs_v4root_mutex", NULL, MTX_DEF); mtx_init(&nfsv4root_mnt.mnt_mtx, "struct mount mtx", NULL, MTX_DEF); @@ -3343,7 +3351,9 @@ nfsd_modevent(module_t mod, int type, vo svcpool_destroy(nfsrvd_pool); /* and get rid of the locks */ - mtx_destroy(&nfs_cache_mutex); + for (i = 0; i < NFSRVCACHE_HASHSIZE; i++) + mtx_destroy(&nfsrchash_table[i].mtx); + mtx_destroy(&nfsrc_udpmtx); mtx_destroy(&nfs_v4root_mutex); mtx_destroy(&nfsv4root_mnt.mnt_mtx); lockdestroy(&nfsv4root_mnt.mnt_explock);