Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 25 Jul 2020 00:01:57 +0000 (UTC)
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-12@freebsd.org
Subject:   svn commit: r363503 - stable/12/sys/kern
Message-ID:  <202007250001.06P01vOI041025@repo.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mjg
Date: Sat Jul 25 00:01:57 2020
New Revision: 363503
URL: https://svnweb.freebsd.org/changeset/base/363503

Log:
  MFC r347503,r347505,r347510,r352177,r352178,r352183,r352612,r352617,
      r352631,r352810,r352811,r352812,r352813,r354893,r355124,r355125,
      r356880,r356883,r356915
  
      cache: push sdt probes in cache_zap_locked to code doing the work
      cache: bump numcache on entry, while here fix lnumcache type
      cache: fix a brainfart in r347505
      cache: assorted cleanups
      cache: change the formula for calculating lock array sizes
      cache: avoid excessive relocking on entry removal during lookup
      cache: jump in negative success instead of positive
      cache: count evictions of negatve entries
      cache: tidy up handling of negative entries
      cache: stop recalculating upper limit each time a new entry is added
      cache: make negative list shrinking a little bit concurrent
      cache: stop requeuing negative entries on the hot list
      cache: decrease ncnegfactor to 5
      cache: minor stat cleanup
      cache: fix numcache accounting on entry
      cache: stop reusing .. entries on enter
      cache: convert numcachehv to counter(9) on 64-bit platforms
      cache: counter_u64_add_protected -> counter_u64_add
      cache: make numcachehv use counter(9) on all archs

Modified:
  stable/12/sys/kern/vfs_cache.c
Directory Properties:
  stable/12/   (props changed)

Modified: stable/12/sys/kern/vfs_cache.c
==============================================================================
--- stable/12/sys/kern/vfs_cache.c	Fri Jul 24 23:51:08 2020	(r363502)
+++ stable/12/sys/kern/vfs_cache.c	Sat Jul 25 00:01:57 2020	(r363503)
@@ -91,10 +91,10 @@ SDT_PROBE_DEFINE1(vfs, namecache, purge_negative, done
 SDT_PROBE_DEFINE1(vfs, namecache, purgevfs, done, "struct mount *");
 SDT_PROBE_DEFINE3(vfs, namecache, zap, done, "struct vnode *", "char *",
     "struct vnode *");
-SDT_PROBE_DEFINE3(vfs, namecache, zap_negative, done, "struct vnode *",
-    "char *", "int");
-SDT_PROBE_DEFINE3(vfs, namecache, shrink_negative, done, "struct vnode *",
-    "char *", "int");
+SDT_PROBE_DEFINE2(vfs, namecache, zap_negative, done, "struct vnode *",
+    "char *");
+SDT_PROBE_DEFINE2(vfs, namecache, shrink_negative, done, "struct vnode *",
+    "char *");
 
 /*
  * This structure describes the elements in the cache of recent
@@ -108,7 +108,6 @@ struct	namecache {
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	union {
 		struct	vnode *nu_vp;	/* vnode the name refers to */
-		u_int	nu_neghits;	/* negative entry hits */
 	} n_un;
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
@@ -131,7 +130,6 @@ struct	namecache_ts {
 };
 
 #define	nc_vp		n_un.nu_vp
-#define	nc_neghits	n_un.nu_neghits
 
 /*
  * Flags in namecache.nc_flag
@@ -201,31 +199,22 @@ static __read_mostly LIST_HEAD(nchashhead, namecache) 
 static u_long __read_mostly	nchash;			/* size of hash table */
 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0,
     "Size of namecache hash table");
-static u_long __read_mostly	ncnegfactor = 12; /* ratio of negative entries */
+static u_long __read_mostly	ncnegfactor = 5; /* ratio of negative entries */
 SYSCTL_ULONG(_vfs, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0,
     "Ratio of negative namecache entries");
 static u_long __exclusive_cache_line	numneg;	/* number of negative entries allocated */
-SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0,
-    "Number of negative entries in namecache");
 static u_long __exclusive_cache_line	numcache;/* number of cache entries allocated */
-SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0,
-    "Number of namecache entries");
-static u_long __exclusive_cache_line	numcachehv;/* number of cache entries with vnodes held */
-SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0,
-    "Number of namecache entries with vnodes held");
-u_int __read_mostly	ncsizefactor = 2;
+u_int ncsizefactor = 2;
 SYSCTL_UINT(_vfs, OID_AUTO, ncsizefactor, CTLFLAG_RW, &ncsizefactor, 0,
     "Size factor for namecache");
 static u_int __read_mostly	ncpurgeminvnodes;
 SYSCTL_UINT(_vfs, OID_AUTO, ncpurgeminvnodes, CTLFLAG_RW, &ncpurgeminvnodes, 0,
     "Number of vnodes below which purgevfs ignores the request");
-static u_int __read_mostly	ncneghitsrequeue = 8;
-SYSCTL_UINT(_vfs, OID_AUTO, ncneghitsrequeue, CTLFLAG_RW, &ncneghitsrequeue, 0,
-    "Number of hits to requeue a negative entry in the LRU list");
+static u_int __read_mostly	ncsize; /* the size as computed on creation or resizing */
 
 struct nchstats	nchstats;		/* cache effectiveness statistics */
 
-static struct mtx       ncneg_shrink_lock;
+static struct mtx __exclusive_cache_line	ncneg_shrink_lock;
 static int	shrink_list_turn;
 
 struct neglist {
@@ -235,6 +224,7 @@ struct neglist {
 
 static struct neglist __read_mostly	*neglists;
 static struct neglist ncneg_hot;
+static u_long numhotneg;
 
 #define	numneglists (ncneghash + 1)
 static u_int __read_mostly	ncneghash;
@@ -358,6 +348,7 @@ static SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 
 	SYSCTL_COUNTER_U64(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, descr);
 STATNODE_ULONG(numneg, "Number of negative cache entries");
 STATNODE_ULONG(numcache, "Number of cache entries");
+STATNODE_COUNTER(numcachehv, "Number of namecache entries with vnodes held");
 STATNODE_COUNTER(numcalls, "Number of cache lookups");
 STATNODE_COUNTER(dothits, "Number of '.' hits");
 STATNODE_COUNTER(dotdothits, "Number of '..' hits");
@@ -377,11 +368,20 @@ STATNODE_COUNTER(numfullpathfail2,
     "Number of fullpath search errors (VOP_VPTOCNP failures)");
 STATNODE_COUNTER(numfullpathfail4, "Number of fullpath search errors (ENOMEM)");
 STATNODE_COUNTER(numfullpathfound, "Number of successful fullpath calls");
+STATNODE_COUNTER(zap_and_exit_bucket_relock_success,
+    "Number of successful removals after relocking");
 static long zap_and_exit_bucket_fail; STATNODE_ULONG(zap_and_exit_bucket_fail,
     "Number of times zap_and_exit failed to lock");
+static long zap_and_exit_bucket_fail2; STATNODE_ULONG(zap_and_exit_bucket_fail2,
+    "Number of times zap_and_exit failed to lock");
 static long cache_lock_vnodes_cel_3_failures;
 STATNODE_ULONG(cache_lock_vnodes_cel_3_failures,
     "Number of times 3-way vnode locking failed");
+STATNODE_ULONG(numhotneg, "Number of hot negative entries");
+STATNODE_COUNTER(numneg_evicted,
+    "Number of negative entries evicted when adding a new entry");
+STATNODE_COUNTER(shrinking_skipped,
+    "Number of times shrinking was already in progress");
 
 static void cache_zap_locked(struct namecache *ncp, bool neg_locked);
 static int vn_fullpath1(struct thread *td, struct vnode *vp, struct vnode *rdir,
@@ -393,7 +393,7 @@ static int cache_yield;
 SYSCTL_INT(_vfs_cache, OID_AUTO, yield, CTLFLAG_RD, &cache_yield, 0,
     "Number of times cache called yield");
 
-static void
+static void __noinline
 cache_maybe_yield(void)
 {
 
@@ -452,12 +452,14 @@ cache_assert_bucket_locked(struct namecache *ncp, int 
 #define cache_assert_bucket_locked(x, y) do { } while (0)
 #endif
 
-#define cache_sort(x, y)	_cache_sort((void **)(x), (void **)(y))
+#define cache_sort_vnodes(x, y)	_cache_sort_vnodes((void **)(x), (void **)(y))
 static void
-_cache_sort(void **p1, void **p2)
+_cache_sort_vnodes(void **p1, void **p2)
 {
 	void *tmp;
 
+	MPASS(*p1 != NULL || *p2 != NULL);
+
 	if (*p1 > *p2) {
 		tmp = *p2;
 		*p2 = *p1;
@@ -505,8 +507,7 @@ static int
 cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
-	cache_sort(&vlp1, &vlp2);
-	MPASS(vlp2 != NULL);
+	cache_sort_vnodes(&vlp1, &vlp2);
 
 	if (vlp1 != NULL) {
 		if (!mtx_trylock(vlp1))
@@ -522,6 +523,19 @@ cache_trylock_vnodes(struct mtx *vlp1, struct mtx *vlp
 }
 
 static void
+cache_lock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
+{
+
+	MPASS(vlp1 != NULL || vlp2 != NULL);
+	MPASS(vlp1 <= vlp2);
+
+	if (vlp1 != NULL)
+		mtx_lock(vlp1);
+	if (vlp2 != NULL)
+		mtx_lock(vlp2);
+}
+
+static void
 cache_unlock_vnodes(struct mtx *vlp1, struct mtx *vlp2)
 {
 
@@ -650,8 +664,6 @@ SYSCTL_PROC(_debug_hashstat, OID_AUTO, nchash, CTLTYPE
  *
  * A variation of LRU scheme is used. New entries are hashed into one of
  * numneglists cold lists. Entries get promoted to the hot list on first hit.
- * Partial LRU for the hot list is maintained by requeueing them every
- * ncneghitsrequeue hits.
  *
  * The shrinker will demote hot list head and evict from the cold list in a
  * round-robin manner.
@@ -660,30 +672,15 @@ static void
 cache_negative_hit(struct namecache *ncp)
 {
 	struct neglist *neglist;
-	u_int hits;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
-	hits = atomic_fetchadd_int(&ncp->nc_neghits, 1);
-	if (ncp->nc_flag & NCF_HOTNEGATIVE) {
-		if ((hits % ncneghitsrequeue) != 0)
-			return;
-		mtx_lock(&ncneg_hot.nl_lock);
-		if (ncp->nc_flag & NCF_HOTNEGATIVE) {
-			TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
-			TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
-			mtx_unlock(&ncneg_hot.nl_lock);
-			return;
-		}
-		/*
-		 * The shrinker cleared the flag and removed the entry from
-		 * the hot list. Put it back.
-		 */
-	} else {
-		mtx_lock(&ncneg_hot.nl_lock);
-	}
+	if (ncp->nc_flag & NCF_HOTNEGATIVE)
+		return;
 	neglist = NCP2NEGLIST(ncp);
+	mtx_lock(&ncneg_hot.nl_lock);
 	mtx_lock(&neglist->nl_lock);
 	if (!(ncp->nc_flag & NCF_HOTNEGATIVE)) {
+		numhotneg++;
 		TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
 		TAILQ_INSERT_TAIL(&ncneg_hot.nl_list, ncp, nc_dst);
 		ncp->nc_flag |= NCF_HOTNEGATIVE;
@@ -737,6 +734,7 @@ cache_negative_remove(struct namecache *ncp, bool neg_
 	if (ncp->nc_flag & NCF_HOTNEGATIVE) {
 		mtx_assert(&ncneg_hot.nl_lock, MA_OWNED);
 		TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
+		numhotneg--;
 	} else {
 		mtx_assert(&neglist->nl_lock, MA_OWNED);
 		TAILQ_REMOVE(&neglist->nl_list, ncp, nc_dst);
@@ -782,8 +780,11 @@ cache_negative_zap_one(void)
 	struct mtx *dvlp;
 	struct rwlock *blp;
 
-	if (!mtx_trylock(&ncneg_shrink_lock))
+	if (mtx_owner(&ncneg_shrink_lock) != NULL ||
+	    !mtx_trylock(&ncneg_shrink_lock)) {
+		counter_u64_add(shrinking_skipped, 1);
 		return;
+	}
 
 	mtx_lock(&ncneg_hot.nl_lock);
 	ncp = TAILQ_FIRST(&ncneg_hot.nl_list);
@@ -793,8 +794,10 @@ cache_negative_zap_one(void)
 		TAILQ_REMOVE(&ncneg_hot.nl_list, ncp, nc_dst);
 		TAILQ_INSERT_TAIL(&neglist->nl_list, ncp, nc_dst);
 		ncp->nc_flag &= ~NCF_HOTNEGATIVE;
+		numhotneg--;
 		mtx_unlock(&neglist->nl_lock);
 	}
+	mtx_unlock(&ncneg_hot.nl_lock);
 
 	cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
 	shrink_list_turn++;
@@ -802,16 +805,14 @@ cache_negative_zap_one(void)
 		shrink_list_turn = 0;
 	if (ncp == NULL && shrink_list_turn == 0)
 		cache_negative_shrink_select(shrink_list_turn, &ncp, &neglist);
-	if (ncp == NULL) {
-		mtx_unlock(&ncneg_hot.nl_lock);
-		goto out;
-	}
+	mtx_unlock(&ncneg_shrink_lock);
+	if (ncp == NULL)
+		return;
 
 	MPASS(ncp->nc_flag & NCF_NEGATIVE);
 	dvlp = VP2VNODELOCK(ncp->nc_dvp);
 	blp = NCP2BUCKETLOCK(ncp);
 	mtx_unlock(&neglist->nl_lock);
-	mtx_unlock(&ncneg_hot.nl_lock);
 	mtx_lock(dvlp);
 	rw_wlock(blp);
 	mtx_lock(&neglist->nl_lock);
@@ -819,18 +820,16 @@ cache_negative_zap_one(void)
 	if (ncp != ncp2 || dvlp != VP2VNODELOCK(ncp2->nc_dvp) ||
 	    blp != NCP2BUCKETLOCK(ncp2) || !(ncp2->nc_flag & NCF_NEGATIVE)) {
 		ncp = NULL;
-		goto out_unlock_all;
-	}
-	SDT_PROBE3(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
-	    ncp->nc_name, ncp->nc_neghits);
+	} else {
+		SDT_PROBE2(vfs, namecache, shrink_negative, done, ncp->nc_dvp,
+		    ncp->nc_name);
 
-	cache_zap_locked(ncp, true);
-out_unlock_all:
+		cache_zap_locked(ncp, true);
+		counter_u64_add(numneg_evicted, 1);
+	}
 	mtx_unlock(&neglist->nl_lock);
 	rw_wunlock(blp);
 	mtx_unlock(dvlp);
-out:
-	mtx_unlock(&ncneg_shrink_lock);
 	cache_free(ncp);
 }
 
@@ -851,19 +850,16 @@ cache_zap_locked(struct namecache *ncp, bool neg_locke
 
 	CTR2(KTR_VFS, "cache_zap(%p) vp %p", ncp,
 	    (ncp->nc_flag & NCF_NEGATIVE) ? NULL : ncp->nc_vp);
+	LIST_REMOVE(ncp, nc_hash);
 	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 		SDT_PROBE3(vfs, namecache, zap, done, ncp->nc_dvp,
 		    ncp->nc_name, ncp->nc_vp);
-	} else {
-		SDT_PROBE3(vfs, namecache, zap_negative, done, ncp->nc_dvp,
-		    ncp->nc_name, ncp->nc_neghits);
-	}
-	LIST_REMOVE(ncp, nc_hash);
-	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
 		TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst, ncp, nc_dst);
 		if (ncp == ncp->nc_vp->v_cache_dd)
 			ncp->nc_vp->v_cache_dd = NULL;
 	} else {
+		SDT_PROBE2(vfs, namecache, zap_negative, done, ncp->nc_dvp,
+		    ncp->nc_name);
 		cache_negative_remove(ncp, neg_locked);
 	}
 	if (ncp->nc_flag & NCF_ISDOTDOT) {
@@ -873,7 +869,7 @@ cache_zap_locked(struct namecache *ncp, bool neg_locke
 		LIST_REMOVE(ncp, nc_src);
 		if (LIST_EMPTY(&ncp->nc_dvp->v_cache_src)) {
 			ncp->nc_flag |= NCF_DVDROP;
-			atomic_subtract_rel_long(&numcachehv, 1);
+			counter_u64_add(numcachehv, -1);
 		}
 	}
 	atomic_subtract_rel_long(&numcache, 1);
@@ -926,7 +922,7 @@ cache_zap_locked_vnode_kl2(struct namecache *ncp, stru
 			mtx_unlock(*vlpp);
 			*vlpp = NULL;
 		}
-		cache_sort(&vlp1, &vlp2);
+		cache_sort_vnodes(&vlp1, &vlp2);
 		if (vlp1 == pvlp) {
 			mtx_lock(vlp2);
 			to_unlock = vlp2;
@@ -952,7 +948,7 @@ out_relock:
 	return (false);
 }
 
-static int
+static int __noinline
 cache_zap_locked_vnode(struct namecache *ncp, struct vnode *vp)
 {
 	struct mtx *pvlp, *vlp1, *vlp2, *to_unlock;
@@ -971,7 +967,7 @@ cache_zap_locked_vnode(struct namecache *ncp, struct v
 	blp = NCP2BUCKETLOCK(ncp);
 	vlp1 = VP2VNODELOCK(ncp->nc_dvp);
 	vlp2 = VP2VNODELOCK(ncp->nc_vp);
-	cache_sort(&vlp1, &vlp2);
+	cache_sort_vnodes(&vlp1, &vlp2);
 	if (vlp1 == pvlp) {
 		mtx_lock(vlp2);
 		to_unlock = vlp2;
@@ -991,10 +987,47 @@ out:
 	return (error);
 }
 
+/*
+ * If trylocking failed we can get here. We know enough to take all needed locks
+ * in the right order and re-lookup the entry.
+ */
 static int
-cache_zap_wlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+cache_zap_unlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+    struct vnode *dvp, struct mtx *dvlp, struct mtx *vlp, uint32_t hash,
+    struct rwlock *blp)
 {
+	struct namecache *rncp;
+
+	cache_assert_bucket_locked(ncp, RA_UNLOCKED);
+
+	cache_sort_vnodes(&dvlp, &vlp);
+	cache_lock_vnodes(dvlp, vlp);
+	rw_wlock(blp);
+	LIST_FOREACH(rncp, (NCHHASH(hash)), nc_hash) {
+		if (rncp == ncp && rncp->nc_dvp == dvp &&
+		    rncp->nc_nlen == cnp->cn_namelen &&
+		    !bcmp(rncp->nc_name, cnp->cn_nameptr, rncp->nc_nlen))
+			break;
+	}
+	if (rncp != NULL) {
+		cache_zap_locked(rncp, false);
+		rw_wunlock(blp);
+		cache_unlock_vnodes(dvlp, vlp);
+		counter_u64_add(zap_and_exit_bucket_relock_success, 1);
+		return (0);
+	}
+
+	rw_wunlock(blp);
+	cache_unlock_vnodes(dvlp, vlp);
+	return (EAGAIN);
+}
+
+static int __noinline
+cache_zap_wlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+    uint32_t hash, struct rwlock *blp)
+{
 	struct mtx *dvlp, *vlp;
+	struct vnode *dvp;
 
 	cache_assert_bucket_locked(ncp, RA_WLOCKED);
 
@@ -1009,14 +1042,17 @@ cache_zap_wlocked_bucket(struct namecache *ncp, struct
 		return (0);
 	}
 
+	dvp = ncp->nc_dvp;
 	rw_wunlock(blp);
-	return (EAGAIN);
+	return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
-static int
-cache_zap_rlocked_bucket(struct namecache *ncp, struct rwlock *blp)
+static int __noinline
+cache_zap_rlocked_bucket(struct namecache *ncp, struct componentname *cnp,
+    uint32_t hash, struct rwlock *blp)
 {
 	struct mtx *dvlp, *vlp;
+	struct vnode *dvp;
 
 	cache_assert_bucket_locked(ncp, RA_RLOCKED);
 
@@ -1033,8 +1069,9 @@ cache_zap_rlocked_bucket(struct namecache *ncp, struct
 		return (0);
 	}
 
+	dvp = ncp->nc_dvp;
 	rw_runlock(blp);
-	return (EAGAIN);
+	return (cache_zap_unlocked_bucket(ncp, cnp, dvp, dvlp, vlp, hash, blp));
 }
 
 static int
@@ -1049,7 +1086,7 @@ cache_zap_wlocked_bucket_kl(struct namecache *ncp, str
 	vlp = NULL;
 	if (!(ncp->nc_flag & NCF_NEGATIVE))
 		vlp = VP2VNODELOCK(ncp->nc_vp);
-	cache_sort(&dvlp, &vlp);
+	cache_sort_vnodes(&dvlp, &vlp);
 
 	if (*vlpp1 == dvlp && *vlpp2 == vlp) {
 		cache_zap_locked(ncp, false);
@@ -1196,14 +1233,13 @@ retry:
 		goto out_no_entry;
 	}
 
-	counter_u64_add(numposzaps, 1);
-
-	error = cache_zap_wlocked_bucket(ncp, blp);
-	if (error != 0) {
+	error = cache_zap_wlocked_bucket(ncp, cnp, hash, blp);
+	if (__predict_false(error != 0)) {
 		zap_and_exit_bucket_fail++;
 		cache_maybe_yield();
 		goto retry;
 	}
+	counter_u64_add(numposzaps, 1);
 	cache_free(ncp);
 	return (0);
 out_no_entry:
@@ -1323,7 +1359,7 @@ retry:
 	}
 
 	/* We failed to find an entry */
-	if (ncp == NULL) {
+	if (__predict_false(ncp == NULL)) {
 		rw_runlock(blp);
 		SDT_PROBE3(vfs, namecache, lookup, miss, dvp, cnp->cn_nameptr,
 		    NULL);
@@ -1331,35 +1367,17 @@ retry:
 		return (0);
 	}
 
-	/* We found a "positive" match, return the vnode */
-	if (!(ncp->nc_flag & NCF_NEGATIVE)) {
-		counter_u64_add(numposhits, 1);
-		*vpp = ncp->nc_vp;
-		CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
-		    dvp, cnp->cn_nameptr, *vpp, ncp);
-		SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
-		    *vpp);
-		cache_out_ts(ncp, tsp, ticksp);
-		goto success;
-	}
+	if (ncp->nc_flag & NCF_NEGATIVE)
+		goto negative_success;
 
-negative_success:
-	/* We found a negative match, and want to create it, so purge */
-	if (cnp->cn_nameiop == CREATE) {
-		counter_u64_add(numnegzaps, 1);
-		goto zap_and_exit;
-	}
-
-	counter_u64_add(numneghits, 1);
-	cache_negative_hit(ncp);
-	if (ncp->nc_flag & NCF_WHITE)
-		cnp->cn_flags |= ISWHITEOUT;
-	SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
-	    ncp->nc_name);
+	/* We found a "positive" match, return the vnode */
+	counter_u64_add(numposhits, 1);
+	*vpp = ncp->nc_vp;
+	CTR4(KTR_VFS, "cache_lookup(%p, %s) found %p via ncp %p",
+	    dvp, cnp->cn_nameptr, *vpp, ncp);
+	SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name,
+	    *vpp);
 	cache_out_ts(ncp, tsp, ticksp);
-	cache_lookup_unlock(blp, dvlp);
-	return (ENOENT);
-
 success:
 	/*
 	 * On success we return a locked and ref'd vnode as per the lookup
@@ -1393,13 +1411,30 @@ success:
 	}
 	return (-1);
 
+negative_success:
+	/* We found a negative match, and want to create it, so purge */
+	if (cnp->cn_nameiop == CREATE) {
+		counter_u64_add(numnegzaps, 1);
+		goto zap_and_exit;
+	}
+
+	counter_u64_add(numneghits, 1);
+	cache_negative_hit(ncp);
+	if (ncp->nc_flag & NCF_WHITE)
+		cnp->cn_flags |= ISWHITEOUT;
+	SDT_PROBE2(vfs, namecache, lookup, hit__negative, dvp,
+	    ncp->nc_name);
+	cache_out_ts(ncp, tsp, ticksp);
+	cache_lookup_unlock(blp, dvlp);
+	return (ENOENT);
+
 zap_and_exit:
 	if (blp != NULL)
-		error = cache_zap_rlocked_bucket(ncp, blp);
+		error = cache_zap_rlocked_bucket(ncp, cnp, hash, blp);
 	else
 		error = cache_zap_locked_vnode(ncp, dvp);
-	if (error != 0) {
-		zap_and_exit_bucket_fail++;
+	if (__predict_false(error != 0)) {
+		zap_and_exit_bucket_fail2++;
 		cache_maybe_yield();
 		goto retry;
 	}
@@ -1435,7 +1470,7 @@ cache_lock_vnodes_cel(struct celockstate *cel, struct 
 
 	vlp1 = VP2VNODELOCK(vp);
 	vlp2 = VP2VNODELOCK(dvp);
-	cache_sort(&vlp1, &vlp2);
+	cache_sort_vnodes(&vlp1, &vlp2);
 
 	if (vlp1 != NULL) {
 		mtx_lock(vlp1);
@@ -1505,7 +1540,7 @@ cache_lock_buckets_cel(struct celockstate *cel, struct
 	MPASS(cel->blp[0] == NULL);
 	MPASS(cel->blp[1] == NULL);
 
-	cache_sort(&blp1, &blp2);
+	cache_sort_vnodes(&blp1, &blp2);
 
 	if (blp1 != NULL) {
 		rw_wlock(blp1);
@@ -1619,6 +1654,33 @@ cache_enter_unlock(struct celockstate *cel)
 	cache_unlock_vnodes_cel(cel);
 }
 
+static void __noinline
+cache_enter_dotdot_prep(struct vnode *dvp, struct vnode *vp,
+    struct componentname *cnp)
+{
+	struct celockstate cel;
+	struct namecache *ncp;
+	uint32_t hash;
+	int len;
+
+	if (dvp->v_cache_dd == NULL)
+		return;
+	len = cnp->cn_namelen;
+	cache_celockstate_init(&cel);
+	hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
+	cache_enter_lock_dd(&cel, dvp, vp, hash);
+	ncp = dvp->v_cache_dd;
+	if (ncp != NULL && (ncp->nc_flag & NCF_ISDOTDOT)) {
+		KASSERT(ncp->nc_dvp == dvp, ("wrong isdotdot parent"));
+		cache_zap_locked(ncp, false);
+	} else {
+		ncp = NULL;
+	}
+	dvp->v_cache_dd = NULL;
+	cache_enter_unlock(&cel);
+	cache_free(ncp);
+}
+
 /*
  * Add an entry to the cache.
  */
@@ -1630,12 +1692,10 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 	struct namecache *ncp, *n2, *ndd;
 	struct namecache_ts *ncp_ts, *n2_ts;
 	struct nchashhead *ncpp;
-	struct neglist *neglist;
 	uint32_t hash;
 	int flag;
 	int len;
-	bool neg_locked;
-	int lnumcache;
+	u_long lnumcache;
 
 	CTR3(KTR_VFS, "cache_enter(%p, %p, %s)", dvp, vp, cnp->cn_nameptr);
 	VNASSERT(vp == NULL || (vp->v_iflag & VI_DOOMED) == 0, vp,
@@ -1646,69 +1706,28 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 	if (__predict_false(!doingcache))
 		return;
 
+	flag = 0;
+	if (__predict_false(cnp->cn_nameptr[0] == '.')) {
+		if (cnp->cn_namelen == 1)
+			return;
+		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
+			cache_enter_dotdot_prep(dvp, vp, cnp);
+			flag = NCF_ISDOTDOT;
+		}
+	}
+
 	/*
 	 * Avoid blowout in namecache entries.
 	 */
-	if (__predict_false(numcache >= desiredvnodes * ncsizefactor))
+	lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
+	if (__predict_false(lnumcache >= ncsize)) {
+		atomic_add_long(&numcache, -1);
 		return;
+	}
 
 	cache_celockstate_init(&cel);
 	ndd = NULL;
 	ncp_ts = NULL;
-	flag = 0;
-	if (cnp->cn_nameptr[0] == '.') {
-		if (cnp->cn_namelen == 1)
-			return;
-		if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') {
-			len = cnp->cn_namelen;
-			hash = cache_get_hash(cnp->cn_nameptr, len, dvp);
-			cache_enter_lock_dd(&cel, dvp, vp, hash);
-			/*
-			 * If dotdot entry already exists, just retarget it
-			 * to new parent vnode, otherwise continue with new
-			 * namecache entry allocation.
-			 */
-			if ((ncp = dvp->v_cache_dd) != NULL &&
-			    ncp->nc_flag & NCF_ISDOTDOT) {
-				KASSERT(ncp->nc_dvp == dvp,
-				    ("wrong isdotdot parent"));
-				neg_locked = false;
-				if (ncp->nc_flag & NCF_NEGATIVE || vp == NULL) {
-					neglist = NCP2NEGLIST(ncp);
-					mtx_lock(&ncneg_hot.nl_lock);
-					mtx_lock(&neglist->nl_lock);
-					neg_locked = true;
-				}
-				if (!(ncp->nc_flag & NCF_NEGATIVE)) {
-					TAILQ_REMOVE(&ncp->nc_vp->v_cache_dst,
-					    ncp, nc_dst);
-				} else {
-					cache_negative_remove(ncp, true);
-				}
-				if (vp != NULL) {
-					TAILQ_INSERT_HEAD(&vp->v_cache_dst,
-					    ncp, nc_dst);
-					ncp->nc_flag &= ~(NCF_NEGATIVE|NCF_HOTNEGATIVE);
-				} else {
-					ncp->nc_flag &= ~(NCF_HOTNEGATIVE);
-					ncp->nc_flag |= NCF_NEGATIVE;
-					cache_negative_insert(ncp, true);
-				}
-				if (neg_locked) {
-					mtx_unlock(&neglist->nl_lock);
-					mtx_unlock(&ncneg_hot.nl_lock);
-				}
-				ncp->nc_vp = vp;
-				cache_enter_unlock(&cel);
-				return;
-			}
-			dvp->v_cache_dd = NULL;
-			cache_enter_unlock(&cel);
-			cache_celockstate_init(&cel);
-			SDT_PROBE3(vfs, namecache, enter, done, dvp, "..", vp);
-			flag = NCF_ISDOTDOT;
-		}
-	}
 
 	/*
 	 * Calculate the hash key and setup as much of the new
@@ -1800,7 +1819,7 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 	if (flag != NCF_ISDOTDOT) {
 		if (LIST_EMPTY(&dvp->v_cache_src)) {
 			vhold(dvp);
-			atomic_add_rel_long(&numcachehv, 1);
+			counter_u64_add(numcachehv, 1);
 		}
 		LIST_INSERT_HEAD(&dvp->v_cache_src, ncp, nc_src);
 	}
@@ -1828,7 +1847,6 @@ cache_enter_time(struct vnode *dvp, struct vnode *vp, 
 		    ncp->nc_name);
 	}
 	cache_enter_unlock(&cel);
-	lnumcache = atomic_fetchadd_long(&numcache, 1) + 1;
 	if (numneg * ncnegfactor > lnumcache)
 		cache_negative_zap_one();
 	cache_free(ndd);
@@ -1875,20 +1893,23 @@ nchinit(void *dummy __unused)
 	    NULL, NULL, NULL, NULL, UMA_ALIGNOF(struct namecache_ts),
 	    UMA_ZONE_ZINIT);
 
+	ncsize = desiredvnodes * ncsizefactor;
 	nchashtbl = hashinit(desiredvnodes * 2, M_VFSCACHE, &nchash);
-	ncbuckethash = cache_roundup_2(mp_ncpus * 64) - 1;
+	ncbuckethash = cache_roundup_2(mp_ncpus * mp_ncpus) - 1;
+	if (ncbuckethash < 7) /* arbitrarily chosen to avoid having one lock */
+		ncbuckethash = 7;
 	if (ncbuckethash > nchash)
 		ncbuckethash = nchash;
 	bucketlocks = malloc(sizeof(*bucketlocks) * numbucketlocks, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numbucketlocks; i++)
 		rw_init_flags(&bucketlocks[i], "ncbuc", RW_DUPOK | RW_RECURSE);
-	ncvnodehash = cache_roundup_2(mp_ncpus * 64) - 1;
+	ncvnodehash = ncbuckethash;
 	vnodelocks = malloc(sizeof(*vnodelocks) * numvnodelocks, M_VFSCACHE,
 	    M_WAITOK | M_ZERO);
 	for (i = 0; i < numvnodelocks; i++)
 		mtx_init(&vnodelocks[i], "ncvn", NULL, MTX_DUPOK | MTX_RECURSE);
-	ncpurgeminvnodes = numbucketlocks;
+	ncpurgeminvnodes = numbucketlocks * 2;
 
 	ncneghash = 3;
 	neglists = malloc(sizeof(*neglists) * numneglists, M_VFSCACHE,
@@ -1902,6 +1923,7 @@ nchinit(void *dummy __unused)
 
 	mtx_init(&ncneg_shrink_lock, "ncnegs", NULL, MTX_DEF);
 
+	numcachehv = counter_u64_alloc(M_WAITOK);
 	numcalls = counter_u64_alloc(M_WAITOK);
 	dothits = counter_u64_alloc(M_WAITOK);
 	dotdothits = counter_u64_alloc(M_WAITOK);
@@ -1917,6 +1939,9 @@ nchinit(void *dummy __unused)
 	numfullpathfail2 = counter_u64_alloc(M_WAITOK);
 	numfullpathfail4 = counter_u64_alloc(M_WAITOK);
 	numfullpathfound = counter_u64_alloc(M_WAITOK);
+	zap_and_exit_bucket_relock_success = counter_u64_alloc(M_WAITOK);
+	numneg_evicted = counter_u64_alloc(M_WAITOK);
+	shrinking_skipped = counter_u64_alloc(M_WAITOK);
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nchinit, NULL);
 
@@ -1927,8 +1952,10 @@ cache_changesize(int newmaxvnodes)
 	u_long new_nchash, old_nchash;
 	struct namecache *ncp;
 	uint32_t hash;
+	int newncsize;
 	int i;
 
+	newncsize = newmaxvnodes * ncsizefactor;
 	newmaxvnodes = cache_roundup_2(newmaxvnodes * 2);
 	if (newmaxvnodes < numbucketlocks)
 		newmaxvnodes = numbucketlocks;
@@ -1958,6 +1985,7 @@ cache_changesize(int newmaxvnodes)
 			LIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
 		}
 	}
+	ncsize = newncsize;
 	cache_unlock_all_buckets();
 	cache_unlock_all_vnodes();
 	free(old_nchashtbl, M_VFSCACHE);



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202007250001.06P01vOI041025>