From owner-dev-commits-src-main@freebsd.org Fri Apr 2 03:13:36 2021 Return-Path: Delivered-To: dev-commits-src-main@mailman.nyi.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.nyi.freebsd.org (Postfix) with ESMTP id DE6435BF977; Fri, 2 Apr 2021 03:13:36 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256 client-signature RSA-PSS (4096 bits) client-digest SHA256) (Client CN "mxrelay.nyi.freebsd.org", Issuer "R3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id 4FBQ9r61Cdz3jRR; Fri, 2 Apr 2021 03:13:36 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org (gitrepo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:5]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id C140A18C29; Fri, 2 Apr 2021 03:13:36 +0000 (UTC) (envelope-from git@FreeBSD.org) Received: from gitrepo.freebsd.org ([127.0.1.44]) by gitrepo.freebsd.org (8.16.1/8.16.1) with ESMTP id 1323Da6C060054; Fri, 2 Apr 2021 03:13:36 GMT (envelope-from git@gitrepo.freebsd.org) Received: (from git@localhost) by gitrepo.freebsd.org (8.16.1/8.16.1/Submit) id 1323DaEE060053; Fri, 2 Apr 2021 03:13:36 GMT (envelope-from git) Date: Fri, 2 Apr 2021 03:13:36 GMT Message-Id: <202104020313.1323DaEE060053@gitrepo.freebsd.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org From: Mateusz Guzik Subject: git: dc532884d582 - main - cache: fix resizing in face of lockless lookup MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit X-Git-Committer: mjg X-Git-Repository: src X-Git-Refname: refs/heads/main X-Git-Reftype: branch X-Git-Commit: dc532884d582db6da833d598e4bb37ad1880947c Auto-Submitted: auto-generated X-BeenThere: dev-commits-src-main@freebsd.org X-Mailman-Version: 2.1.34 Precedence: list List-Id: Commit messages for the main branch of the src repository List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Fri, 02 Apr 2021 03:13:36 -0000 The branch main has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=dc532884d582db6da833d598e4bb37ad1880947c commit dc532884d582db6da833d598e4bb37ad1880947c Author: Mateusz Guzik AuthorDate: 2021-03-29 19:17:57 +0000 Commit: Mateusz Guzik CommitDate: 2021-04-02 03:11:05 +0000 cache: fix resizing in face of lockless lookup Reported by: pho Tested by: pho --- sys/kern/vfs_cache.c | 111 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 106 insertions(+), 5 deletions(-) diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 8cae0260cbf0..3f09f37a7415 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -455,6 +455,9 @@ static long cache_lock_vnodes_cel_3_failures; DEBUGNODE_ULONG(vnodes_cel_3_failures, cache_lock_vnodes_cel_3_failures, "Number of times 3-way vnode locking failed"); +static void cache_fplookup_lockout(void); +static void cache_fplookup_restore(void); + static void cache_zap_locked(struct namecache *ncp); static int vn_fullpath_hardlink(struct nameidata *ndp, char **retbuf, char **freebuf, size_t *buflen); @@ -2549,11 +2552,76 @@ cache_vnode_init(struct vnode *vp) cache_prehash(vp); } +/* + * Induce transient cache misses for lockless operation in cache_lookup() by + * using a temporary hash table. + * + * This will force a fs lookup. + * + * Synchronisation is done in 2 steps, calling vfs_smr_quiesce each time + * to observe all CPUs not performing the lookup. + */ +static void +cache_changesize_set_temp(struct nchashhead *temptbl, u_long temphash) +{ + + MPASS(temphash < nchash); + /* + * Change the size. The new size is smaller and can safely be used + * against the existing table. All lookups which now hash wrong will + * result in a cache miss, which all callers are supposed to know how + * to handle. + */ + atomic_store_long(&nchash, temphash); + atomic_thread_fence_rel(); + vfs_smr_quiesce(); + /* + * At this point everyone sees the updated hash value, but they still + * see the old table. + */ + atomic_store_ptr(&nchashtbl, temptbl); + atomic_thread_fence_rel(); + vfs_smr_quiesce(); + /* + * At this point everyone sees the updated table pointer and size pair. + */ +} + +/* + * Set the new hash table. + * + * Similarly to cache_changesize_set_temp(), this has to synchronize against + * lockless operation in cache_lookup(). + */ +static void +cache_changesize_set_new(struct nchashhead *new_tbl, u_long new_hash) +{ + + MPASS(nchash < new_hash); + /* + * Change the pointer first. This wont result in out of bounds access + * since the temporary table is guaranteed to be smaller. + */ + atomic_store_ptr(&nchashtbl, new_tbl); + atomic_thread_fence_rel(); + vfs_smr_quiesce(); + /* + * At this point everyone sees the updated pointer value, but they + * still see the old size. + */ + atomic_store_long(&nchash, new_hash); + atomic_thread_fence_rel(); + vfs_smr_quiesce(); + /* + * At this point everyone sees the updated table pointer and size pair. + */ +} + void cache_changesize(u_long newmaxvnodes) { - struct nchashhead *new_nchashtbl, *old_nchashtbl; - u_long new_nchash, old_nchash; + struct nchashhead *new_nchashtbl, *old_nchashtbl, *temptbl; + u_long new_nchash, old_nchash, temphash; struct namecache *ncp; uint32_t hash; u_long newncsize; @@ -2570,30 +2638,36 @@ cache_changesize(u_long newmaxvnodes) ncfreetbl(new_nchashtbl); return; } + + temptbl = nchinittbl(1, &temphash); + /* * Move everything from the old hash table to the new table. * None of the namecache entries in the table can be removed * because to do so, they have to be removed from the hash table. */ + cache_fplookup_lockout(); cache_lock_all_vnodes(); cache_lock_all_buckets(); old_nchashtbl = nchashtbl; old_nchash = nchash; - nchashtbl = new_nchashtbl; - nchash = new_nchash; + cache_changesize_set_temp(temptbl, temphash); for (i = 0; i <= old_nchash; i++) { while ((ncp = CK_SLIST_FIRST(&old_nchashtbl[i])) != NULL) { hash = cache_get_hash(ncp->nc_name, ncp->nc_nlen, ncp->nc_dvp); CK_SLIST_REMOVE(&old_nchashtbl[i], ncp, namecache, nc_hash); - CK_SLIST_INSERT_HEAD(NCHHASH(hash), ncp, nc_hash); + CK_SLIST_INSERT_HEAD(&new_nchashtbl[hash & new_nchash], ncp, nc_hash); } } ncsize = newncsize; cache_recalc_neg_min(ncnegminpct); + cache_changesize_set_new(new_nchashtbl, new_nchash); cache_unlock_all_buckets(); cache_unlock_all_vnodes(); + cache_fplookup_restore(); ncfreetbl(old_nchashtbl); + ncfreetbl(temptbl); } /* @@ -3666,6 +3740,33 @@ syscal_vfs_cache_fast_lookup(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE, &cache_fast_lookup, 0, syscal_vfs_cache_fast_lookup, "IU", ""); +/* + * Disable lockless lookup and observe all CPUs not executing it. + * + * Used when resizing the hash table. + * + * TODO: no provisions are made to handle tweaking of the knob at the same time + */ +static void +cache_fplookup_lockout(void) +{ + bool on; + + on = atomic_load_char(&cache_fast_lookup_enabled); + if (on) { + atomic_store_char(&cache_fast_lookup_enabled, false); + atomic_thread_fence_rel(); + vfs_smr_quiesce(); + } +} + +static void +cache_fplookup_restore(void) +{ + + cache_fast_lookup_enabled_recalc(); +} + /* * Components of nameidata (or objects it can point to) which may * need restoring in case fast path lookup fails.