Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 10 Apr 2021 06:02:10 GMT
From:      Mateusz Guzik <mjg@FreeBSD.org>
To:        src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org
Subject:   git: 5dd2f40ab47c - stable/13 - cache: fix resizing in face of lockless lookup
Message-ID:  <202104100602.13A62AB5015289@gitrepo.freebsd.org>

next in thread | raw e-mail | index | archive | help
The branch stable/13 has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=5dd2f40ab47c02906a0fd86af45794b0e3869483

commit 5dd2f40ab47c02906a0fd86af45794b0e3869483
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2021-03-29 19:17:57 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2021-04-10 05:57:56 +0000

    cache: fix resizing in face of lockless lookup
    
    Reported by:    pho
    Tested by:      pho
    
    (cherry picked from commit dc532884d582db6da833d598e4bb37ad1880947c)
---
 sys/kern/vfs_cache.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 106 insertions(+), 5 deletions(-)

diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index fef1e31d197b..ad91fb1686a4 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -455,6 +455,9 @@ static long cache_lock_vnodes_cel_3_failures;
 DEBUGNODE_ULONG(vnodes_cel_3_failures, cache_lock_vnodes_cel_3_failures,
     "Number of times 3-way vnode locking failed");
 
+static void cache_fplookup_lockout(void);
+static void cache_fplookup_restore(void);
+
 static void cache_zap_locked(struct namecache *ncp);
 static int vn_fullpath_hardlink(struct nameidata *ndp, char **retbuf,
     char **freebuf, size_t *buflen);
@@ -2544,11 +2547,76 @@ cache_vnode_init(struct vnode *vp)
 	cache_prehash(vp);
 }
 
+/*
+ * Induce transient cache misses for lockless operation in cache_lookup() by
+ * using a temporary hash table.
+ *
+ * This will force a fs lookup.
+ *
+ * Synchronisation is done in 2 steps, calling vfs_smr_quiesce each time
+ * to observe all CPUs not performing the lookup.
+ */
+static void
+cache_changesize_set_temp(struct nchashhead *temptbl, u_long temphash)
+{
+
+	MPASS(temphash < nchash);
+	/*
+	 * Change the size. The new size is smaller and can safely be used
+	 * against the existing table. All lookups which now hash wrong will
+	 * result in a cache miss, which all callers are supposed to know how
+	 * to handle.
+	 */
+	atomic_store_long(&nchash, temphash);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated hash value, but they still
+	 * see the old table.
+	 */
+	atomic_store_ptr(&nchashtbl, temptbl);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated table pointer and size pair.
+	 */
+}
+
+/*
+ * Set the new hash table.
+ *
+ * Similarly to cache_changesize_set_temp(), this has to synchronize against
+ * lockless operation in cache_lookup().
+ */
+static void
+cache_changesize_set_new(struct nchashhead *new_tbl, u_long new_hash)
+{
+
+	MPASS(nchash < new_hash);
+	/*
+	 * Change the pointer first. This wont result in out of bounds access
+	 * since the temporary table is guaranteed to be smaller.
+	 */
+	atomic_store_ptr(&nchashtbl, new_tbl);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated pointer value, but they
+	 * still see the old size.
+	 */
+	atomic_store_long(&nchash, new_hash);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated table pointer and size pair.
+	 */
+}
+
 void
 cache_changesize(u_long newmaxvnodes)
 {
-	struct nchashhead *new_nchashtbl, *old_nchashtbl;
-	u_long new_nchash, old_nchash;
+	struct nchashhead *new_nchashtbl, *old_nchashtbl, *temptbl;
+	u_long new_nchash, old_nchash, temphash;
 	struct namecache *ncp;
 	uint32_t hash;
 	u_long newncsize;
@@ -2565,30 +2633,36 @@ cache_changesize(u_long newmaxvnodes)
 		ncfreetbl(new_nchashtbl);
 		return;
 	}
+
+	temptbl = nchinittbl(1, &temphash);
+
 	/*
 	 * Move everything from the old hash table to the new table.
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
+	cache_fplookup_lockout();
 	cache_lock_all_vnodes();
 	cache_lock_all_buckets();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
-	nchashtbl = new_nchashtbl;
-	nchash = new_nchash;
+	cache_changesize_set_temp(temptbl, temphash);
 	for (i = 0; i <= old_nchash; i++) {
 		while ((ncp = CK_SLIST_FIRST(&old_nchashtbl[i])) != NULL) {
 			hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen,
 			    ncp->nc_dvp);
 			CK_SLIST_REMOVE(&old_nchashtbl[i], ncp, namecache, nc_hash);
-			CK_SLIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
+			CK_SLIST_INSERT_HEAD(&new_nchashtbl[hash & new_nchash], ncp, nc_hash);
 		}
 	}
 	ncsize = newncsize;
 	cache_recalc_neg_min(ncnegminpct);
+	cache_changesize_set_new(new_nchashtbl, new_nchash);
 	cache_unlock_all_buckets();
 	cache_unlock_all_vnodes();
+	cache_fplookup_restore();
 	ncfreetbl(old_nchashtbl);
+	ncfreetbl(temptbl);
 }
 
 /*
@@ -3661,6 +3735,33 @@ syscal_vfs_cache_fast_lookup(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
     &cache_fast_lookup, 0, syscal_vfs_cache_fast_lookup, "IU", "");
 
+/*
+ * Disable lockless lookup and observe all CPUs not executing it.
+ *
+ * Used when resizing the hash table.
+ *
+ * TODO: no provisions are made to handle tweaking of the knob at the same time
+ */
+static void
+cache_fplookup_lockout(void)
+{
+	bool on;
+
+	on = atomic_load_char(&cache_fast_lookup_enabled);
+	if (on) {
+		atomic_store_char(&cache_fast_lookup_enabled, false);
+		atomic_thread_fence_rel();
+		vfs_smr_quiesce();
+	}
+}
+
+static void
+cache_fplookup_restore(void)
+{
+
+	cache_fast_lookup_enabled_recalc();
+}
+
 /*
  * Components of nameidata (or objects it can point to) which may
  * need restoring in case fast path lookup fails.



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202104100602.13A62AB5015289>