From owner-dev-commits-src-branches@freebsd.org  Sat Apr 10 06:02:11 2021
Return-Path: <owner-dev-commits-src-branches@freebsd.org>
Delivered-To: dev-commits-src-branches@mailman.nyi.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1])
 by mailman.nyi.freebsd.org (Postfix) with ESMTP id 2C2135CF669;
 Sat, 10 Apr 2021 06:02:11 +0000 (UTC) (envelope-from git@FreeBSD.org)
Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org
 [IPv6:2610:1c1:1:606c::19:3])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256
 client-signature RSA-PSS (4096 bits) client-digest SHA256)
 (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK))
 by mx1.freebsd.org (Postfix) with ESMTPS id 4FHPXg0g0Sz4RJ3;
 Sat, 10 Apr 2021 06:02:11 +0000 (UTC) (envelope-from git@FreeBSD.org)
Received: from gitrepo.freebsd.org (gitrepo.freebsd.org
 [IPv6:2610:1c1:1:6068::e6a:5])
 (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256)
 (Client did not present a certificate)
 by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id 0368B3F67;
 Sat, 10 Apr 2021 06:02:11 +0000 (UTC) (envelope-from git@FreeBSD.org)
Received: from gitrepo.freebsd.org ([127.0.1.44])
 by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 13A62AfB015290;
 Sat, 10 Apr 2021 06:02:10 GMT (envelope-from git@gitrepo.freebsd.org)
Received: (from git@localhost)
 by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 13A62AB5015289;
 Sat, 10 Apr 2021 06:02:10 GMT (envelope-from git)
Date: Sat, 10 Apr 2021 06:02:10 GMT
Message-Id: <202104100602.13A62AB5015289@gitrepo.freebsd.org>
To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org,
 dev-commits-src-branches@FreeBSD.org
From: Mateusz Guzik <mjg@FreeBSD.org>
Subject: git: 5dd2f40ab47c - stable/13 - cache: fix resizing in face of
 lockless lookup
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
X-Git-Committer: mjg
X-Git-Repository: src
X-Git-Refname: refs/heads/stable/13
X-Git-Reftype: branch
X-Git-Commit: 5dd2f40ab47c02906a0fd86af45794b0e3869483
Auto-Submitted: auto-generated
X-BeenThere: dev-commits-src-branches@freebsd.org
X-Mailman-Version: 2.1.34
Precedence: list
List-Id: Commits to the stable branches of the FreeBSD src repository
 <dev-commits-src-branches.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/dev-commits-src-branches>, 
 <mailto:dev-commits-src-branches-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/dev-commits-src-branches/>
List-Post: <mailto:dev-commits-src-branches@freebsd.org>
List-Help: <mailto:dev-commits-src-branches-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/dev-commits-src-branches>, 
 <mailto:dev-commits-src-branches-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Sat, 10 Apr 2021 06:02:11 -0000

The branch stable/13 has been updated by mjg:

URL: https://cgit.FreeBSD.org/src/commit/?id=5dd2f40ab47c02906a0fd86af45794b0e3869483

commit 5dd2f40ab47c02906a0fd86af45794b0e3869483
Author:     Mateusz Guzik <mjg@FreeBSD.org>
AuthorDate: 2021-03-29 19:17:57 +0000
Commit:     Mateusz Guzik <mjg@FreeBSD.org>
CommitDate: 2021-04-10 05:57:56 +0000

    cache: fix resizing in face of lockless lookup
    
    Reported by:    pho
    Tested by:      pho
    
    (cherry picked from commit dc532884d582db6da833d598e4bb37ad1880947c)
---
 sys/kern/vfs_cache.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 106 insertions(+), 5 deletions(-)

diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index fef1e31d197b..ad91fb1686a4 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -455,6 +455,9 @@ static long cache_lock_vnodes_cel_3_failures;
 DEBUGNODE_ULONG(vnodes_cel_3_failures, cache_lock_vnodes_cel_3_failures,
     "Number of times 3-way vnode locking failed");
 
+static void cache_fplookup_lockout(void);
+static void cache_fplookup_restore(void);
+
 static void cache_zap_locked(struct namecache *ncp);
 static int vn_fullpath_hardlink(struct nameidata *ndp, char **retbuf,
     char **freebuf, size_t *buflen);
@@ -2544,11 +2547,76 @@ cache_vnode_init(struct vnode *vp)
 	cache_prehash(vp);
 }
 
+/*
+ * Induce transient cache misses for lockless operation in cache_lookup() by
+ * using a temporary hash table.
+ *
+ * This will force a fs lookup.
+ *
+ * Synchronisation is done in 2 steps, calling vfs_smr_quiesce each time
+ * to observe all CPUs not performing the lookup.
+ */
+static void
+cache_changesize_set_temp(struct nchashhead *temptbl, u_long temphash)
+{
+
+	MPASS(temphash < nchash);
+	/*
+	 * Change the size. The new size is smaller and can safely be used
+	 * against the existing table. All lookups which now hash wrong will
+	 * result in a cache miss, which all callers are supposed to know how
+	 * to handle.
+	 */
+	atomic_store_long(&nchash, temphash);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated hash value, but they still
+	 * see the old table.
+	 */
+	atomic_store_ptr(&nchashtbl, temptbl);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated table pointer and size pair.
+	 */
+}
+
+/*
+ * Set the new hash table.
+ *
+ * Similarly to cache_changesize_set_temp(), this has to synchronize against
+ * lockless operation in cache_lookup().
+ */
+static void
+cache_changesize_set_new(struct nchashhead *new_tbl, u_long new_hash)
+{
+
+	MPASS(nchash < new_hash);
+	/*
+	 * Change the pointer first. This wont result in out of bounds access
+	 * since the temporary table is guaranteed to be smaller.
+	 */
+	atomic_store_ptr(&nchashtbl, new_tbl);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated pointer value, but they
+	 * still see the old size.
+	 */
+	atomic_store_long(&nchash, new_hash);
+	atomic_thread_fence_rel();
+	vfs_smr_quiesce();
+	/*
+	 * At this point everyone sees the updated table pointer and size pair.
+	 */
+}
+
 void
 cache_changesize(u_long newmaxvnodes)
 {
-	struct nchashhead *new_nchashtbl, *old_nchashtbl;
-	u_long new_nchash, old_nchash;
+	struct nchashhead *new_nchashtbl, *old_nchashtbl, *temptbl;
+	u_long new_nchash, old_nchash, temphash;
 	struct namecache *ncp;
 	uint32_t hash;
 	u_long newncsize;
@@ -2565,30 +2633,36 @@ cache_changesize(u_long newmaxvnodes)
 		ncfreetbl(new_nchashtbl);
 		return;
 	}
+
+	temptbl = nchinittbl(1, &temphash);
+
 	/*
 	 * Move everything from the old hash table to the new table.
 	 * None of the namecache entries in the table can be removed
 	 * because to do so, they have to be removed from the hash table.
 	 */
+	cache_fplookup_lockout();
 	cache_lock_all_vnodes();
 	cache_lock_all_buckets();
 	old_nchashtbl = nchashtbl;
 	old_nchash = nchash;
-	nchashtbl = new_nchashtbl;
-	nchash = new_nchash;
+	cache_changesize_set_temp(temptbl, temphash);
 	for (i = 0; i <= old_nchash; i++) {
 		while ((ncp = CK_SLIST_FIRST(&old_nchashtbl[i])) != NULL) {
 			hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen,
 			    ncp->nc_dvp);
 			CK_SLIST_REMOVE(&old_nchashtbl[i], ncp, namecache, nc_hash);
-			CK_SLIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash);
+			CK_SLIST_INSERT_HEAD(&new_nchashtbl[hash & new_nchash], ncp, nc_hash);
 		}
 	}
 	ncsize = newncsize;
 	cache_recalc_neg_min(ncnegminpct);
+	cache_changesize_set_new(new_nchashtbl, new_nchash);
 	cache_unlock_all_buckets();
 	cache_unlock_all_vnodes();
+	cache_fplookup_restore();
 	ncfreetbl(old_nchashtbl);
+	ncfreetbl(temptbl);
 }
 
 /*
@@ -3661,6 +3735,33 @@ syscal_vfs_cache_fast_lookup(SYSCTL_HANDLER_ARGS)
 SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE,
     &cache_fast_lookup, 0, syscal_vfs_cache_fast_lookup, "IU", "");
 
+/*
+ * Disable lockless lookup and observe all CPUs not executing it.
+ *
+ * Used when resizing the hash table.
+ *
+ * TODO: no provisions are made to handle tweaking of the knob at the same time
+ */
+static void
+cache_fplookup_lockout(void)
+{
+	bool on;
+
+	on = atomic_load_char(&cache_fast_lookup_enabled);
+	if (on) {
+		atomic_store_char(&cache_fast_lookup_enabled, false);
+		atomic_thread_fence_rel();
+		vfs_smr_quiesce();
+	}
+}
+
+static void
+cache_fplookup_restore(void)
+{
+
+	cache_fast_lookup_enabled_recalc();
+}
+
 /*
  * Components of nameidata (or objects it can point to) which may
  * need restoring in case fast path lookup fails.