Date: Wed, 10 Jul 2024 22:05:42 GMT From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-branches@FreeBSD.org Subject: git: b7f6841e00d5 - stable/14 - vfs: make skipping LRU requeue optional Message-ID: <202407102205.46AM5goS079898@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch stable/14 has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=b7f6841e00d53f1aee65a8cce4f98c239ae4cf75 commit b7f6841e00d53f1aee65a8cce4f98c239ae4cf75 Author: Mateusz Guzik <mjg@FreeBSD.org> AuthorDate: 2024-07-08 12:24:41 +0000 Commit: Mateusz Guzik <mjg@FreeBSD.org> CommitDate: 2024-07-10 22:04:40 +0000 vfs: make skipping LRU requeue optional As explained in the comment in the code it is a bottleneck in certain workloads. On the other hand it does not need to be skipped in most cases, while transiently running into the lock being contended happens a lot. (cherry picked from commit 0a9aa6fdf58468945240e86bf16c268acc8c1776) --- sys/kern/vfs_subr.c | 54 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index d1c17dca37d4..646339987ba2 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -224,6 +224,10 @@ static counter_u64_t vnode_skipped_requeues; SYSCTL_COUNTER_U64(_vfs_vnode_stats, OID_AUTO, skipped_requeues, CTLFLAG_RD, &vnode_skipped_requeues, "Number of times LRU requeue was skipped due to lock contention"); +static __read_mostly bool vnode_can_skip_requeue; +SYSCTL_BOOL(_vfs_vnode_param, OID_AUTO, can_skip_requeue, CTLFLAG_RW, + &vnode_can_skip_requeue, 0, "Is LRU requeue skippable"); + static u_long deferred_inact; SYSCTL_ULONG(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact, 0, "Number of times inactive processing was deferred"); @@ -3785,31 +3789,41 @@ vdbatch_process(struct vdbatch *vd) * lock contention, where vnode_list_mtx becomes the primary bottleneck * if multiple CPUs get here (one real-world example is highly parallel * do-nothing make , which will stat *tons* of vnodes). Since it is - * quasi-LRU (read: not that great even if fully honoured) just dodge - * the problem. Parties which don't like it are welcome to implement - * something better. + * quasi-LRU (read: not that great even if fully honoured) provide an + * option to just dodge the problem. Parties which don't like it are + * welcome to implement something better. */ - critical_enter(); - if (mtx_trylock(&vnode_list_mtx)) { - for (i = 0; i < VDBATCH_SIZE; i++) { - vp = vd->tab[i]; - vd->tab[i] = NULL; - TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); - TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); - MPASS(vp->v_dbatchcpu != NOCPU); - vp->v_dbatchcpu = NOCPU; + if (vnode_can_skip_requeue) { + if (!mtx_trylock(&vnode_list_mtx)) { + counter_u64_add(vnode_skipped_requeues, 1); + critical_enter(); + for (i = 0; i < VDBATCH_SIZE; i++) { + vp = vd->tab[i]; + vd->tab[i] = NULL; + MPASS(vp->v_dbatchcpu != NOCPU); + vp->v_dbatchcpu = NOCPU; + } + vd->index = 0; + critical_exit(); + return; + } - mtx_unlock(&vnode_list_mtx); + /* fallthrough to locked processing */ } else { - counter_u64_add(vnode_skipped_requeues, 1); + mtx_lock(&vnode_list_mtx); + } - for (i = 0; i < VDBATCH_SIZE; i++) { - vp = vd->tab[i]; - vd->tab[i] = NULL; - MPASS(vp->v_dbatchcpu != NOCPU); - vp->v_dbatchcpu = NOCPU; - } + mtx_assert(&vnode_list_mtx, MA_OWNED); + critical_enter(); + for (i = 0; i < VDBATCH_SIZE; i++) { + vp = vd->tab[i]; + vd->tab[i] = NULL; + TAILQ_REMOVE(&vnode_list, vp, v_vnodelist); + TAILQ_INSERT_TAIL(&vnode_list, vp, v_vnodelist); + MPASS(vp->v_dbatchcpu != NOCPU); + vp->v_dbatchcpu = NOCPU; } + mtx_unlock(&vnode_list_mtx); vd->index = 0; critical_exit(); }
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202407102205.46AM5goS079898>