Date: Sat, 23 Jan 2021 15:04:56 GMT From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@FreeBSD.org, dev-commits-src-all@FreeBSD.org, dev-commits-src-main@FreeBSD.org Subject: git: 739ecbcf1c4f - main - cache: add symlink support to lockless lookup Message-ID: <202101231504.10NF4uka076387@gitrepo.freebsd.org>
next in thread | raw e-mail | index | archive | help
The branch main has been updated by mjg: URL: https://cgit.FreeBSD.org/src/commit/?id=739ecbcf1c4fd22b5f6ee0bb180a67644046a3e0 commit 739ecbcf1c4fd22b5f6ee0bb180a67644046a3e0 Author: Mateusz Guzik <mjg@FreeBSD.org> AuthorDate: 2021-01-23 13:40:48 +0000 Commit: Mateusz Guzik <mjg@FreeBSD.org> CommitDate: 2021-01-23 15:04:43 +0000 cache: add symlink support to lockless lookup Reviewed by: kib (previous version) Tested by: pho (previous version) Differential Revision: https://reviews.freebsd.org/D27488 --- sys/fs/deadfs/dead_vnops.c | 1 + sys/kern/vfs_cache.c | 214 ++++++++++++++++++++++++++++++++++++++------- sys/kern/vfs_lookup.c | 11 ++- sys/kern/vfs_subr.c | 13 +++ sys/kern/vnode_if.src | 10 +++ sys/sys/namei.h | 4 +- sys/sys/param.h | 2 +- sys/sys/vnode.h | 9 ++ 8 files changed, 227 insertions(+), 37 deletions(-) diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c index 49f852791387..09c3c996ee0e 100644 --- a/sys/fs/deadfs/dead_vnops.c +++ b/sys/fs/deadfs/dead_vnops.c @@ -80,6 +80,7 @@ struct vop_vector dead_vnodeops = { .vop_unset_text = dead_unset_text, .vop_write = dead_write, .vop_fplookup_vexec = VOP_EOPNOTSUPP, + .vop_fplookup_symlink = VOP_EAGAIN, }; VFS_VOP_VECTOR_REGISTER(dead_vnodeops); diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index cfa25dc59ee4..770c8ebf061b 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -562,6 +562,36 @@ static uma_zone_t __read_mostly cache_zone_small_ts; static uma_zone_t __read_mostly cache_zone_large; static uma_zone_t __read_mostly cache_zone_large_ts; +char * +cache_symlink_alloc(size_t size, int flags) +{ + + if (size < CACHE_ZONE_SMALL_SIZE) { + return (uma_zalloc_smr(cache_zone_small, flags)); + } + if (size < CACHE_ZONE_LARGE_SIZE) { + return (uma_zalloc_smr(cache_zone_large, flags)); + } + return (NULL); +} + +void +cache_symlink_free(char *string, size_t size) +{ + + MPASS(string != NULL); + + if (size < CACHE_ZONE_SMALL_SIZE) { + uma_zfree_smr(cache_zone_small, string); + return; + } + if (size < CACHE_ZONE_LARGE_SIZE) { + uma_zfree_smr(cache_zone_large, string); + return; + } + __assert_unreachable(); +} + static struct namecache * cache_alloc_uma(int len, bool ts) { @@ -3584,6 +3614,7 @@ cache_fast_lookup_enabled_recalc(void) #ifdef MAC mac_on = mac_vnode_check_lookup_enabled(); + mac_on |= mac_vnode_check_readlink_enabled(); #else mac_on = 0; #endif @@ -3615,6 +3646,7 @@ SYSCTL_PROC(_vfs, OID_AUTO, cache_fast_lookup, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MP * need restoring in case fast path lookup fails. */ struct nameidata_outer { + size_t ni_pathlen; int cn_flags; }; @@ -3656,8 +3688,10 @@ static bool cache_fplookup_is_mp(struct cache_fpl *fpl); static int cache_fplookup_cross_mount(struct cache_fpl *fpl); static int cache_fplookup_partial_setup(struct cache_fpl *fpl); static int cache_fplookup_skip_slashes(struct cache_fpl *fpl); +static int cache_fplookup_preparse(struct cache_fpl *fpl); static void cache_fpl_pathlen_dec(struct cache_fpl *fpl); static void cache_fpl_pathlen_inc(struct cache_fpl *fpl); +static void cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n); static void cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n); static void @@ -3698,6 +3732,7 @@ static void cache_fpl_checkpoint_outer(struct cache_fpl *fpl) { + fpl->snd_outer.ni_pathlen = fpl->ndp->ni_pathlen; fpl->snd_outer.cn_flags = fpl->ndp->ni_cnd.cn_flags; } @@ -3731,6 +3766,7 @@ cache_fpl_restore_abort(struct cache_fpl *fpl) */ fpl->ndp->ni_resflags = 0; fpl->ndp->ni_cnd.cn_nameptr = fpl->ndp->ni_cnd.cn_pnbuf; + fpl->ndp->ni_pathlen = fpl->snd_outer.ni_pathlen; } #ifdef INVARIANTS @@ -3752,6 +3788,7 @@ cache_fpl_assert_status(struct cache_fpl *fpl) case CACHE_FPL_STATUS_UNSET: __assert_unreachable(); break; + case CACHE_FPL_STATUS_DESTROYED: case CACHE_FPL_STATUS_ABORTED: case CACHE_FPL_STATUS_PARTIAL: case CACHE_FPL_STATUS_HANDLED: @@ -3804,6 +3841,11 @@ cache_fpl_aborted_early_impl(struct cache_fpl *fpl, int line) static int __noinline cache_fpl_aborted_impl(struct cache_fpl *fpl, int line) { + struct nameidata *ndp; + struct componentname *cnp; + + ndp = fpl->ndp; + cnp = fpl->cnp; if (fpl->status != CACHE_FPL_STATUS_UNSET) { KASSERT(fpl->status == CACHE_FPL_STATUS_PARTIAL, @@ -3815,6 +3857,14 @@ cache_fpl_aborted_impl(struct cache_fpl *fpl, int line) if (fpl->in_smr) cache_fpl_smr_exit(fpl); cache_fpl_restore_abort(fpl); + /* + * Resolving symlinks overwrites data passed by the caller. + * Let namei know. + */ + if (ndp->ni_loopcnt > 0) { + fpl->status = CACHE_FPL_STATUS_DESTROYED; + cache_fpl_cleanup_cnp(cnp); + } return (CACHE_FPL_FAILED); } @@ -3955,13 +4005,6 @@ cache_fplookup_dirfd(struct cache_fpl *fpl, struct vnode **vpp) return (0); } -static bool -cache_fplookup_vnode_supported(struct vnode *vp) -{ - - return (vp->v_type != VLNK); -} - static int __noinline cache_fplookup_negative_promote(struct cache_fpl *fpl, struct namecache *oncp, uint32_t hash) @@ -4242,8 +4285,7 @@ cache_fplookup_final_modifying(struct cache_fpl *fpl) * Since we expect this to be the terminal vnode it should * almost never be true. */ - if (__predict_false(!cache_fplookup_vnode_supported(tvp) || - cache_fplookup_is_mp(fpl))) { + if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) { vput(dvp); vput(tvp); return (cache_fpl_aborted(fpl)); @@ -4546,8 +4588,7 @@ cache_fplookup_noentry(struct cache_fpl *fpl) return (cache_fpl_handled(fpl)); } - if (__predict_false(!cache_fplookup_vnode_supported(tvp) || - cache_fplookup_is_mp(fpl))) { + if (__predict_false(tvp->v_type == VLNK || cache_fplookup_is_mp(fpl))) { vput(dvp); vput(tvp); return (cache_fpl_aborted(fpl)); @@ -4688,6 +4729,102 @@ cache_fplookup_neg(struct cache_fpl *fpl, struct namecache *ncp, uint32_t hash) return (cache_fpl_handled_error(fpl, ENOENT)); } +/* + * Resolve a symlink. Called by filesystem-specific routines. + * + * Code flow is: + * ... -> cache_fplookup_symlink -> VOP_FPLOOKUP_SYMLINK -> cache_symlink_resolve + */ +int +cache_symlink_resolve(struct cache_fpl *fpl, const char *string, size_t len) +{ + struct nameidata *ndp; + struct componentname *cnp; + + ndp = fpl->ndp; + cnp = fpl->cnp; + + if (__predict_false(len == 0)) { + return (ENOENT); + } + + ndp->ni_pathlen = fpl->nulchar - cnp->cn_nameptr - cnp->cn_namelen + 1; +#ifdef INVARIANTS + if (ndp->ni_pathlen != fpl->debug.ni_pathlen) { + panic("%s: mismatch (%zu != %zu) nulchar %p nameptr %p [%s] ; full string [%s]\n", + __func__, ndp->ni_pathlen, fpl->debug.ni_pathlen, fpl->nulchar, + cnp->cn_nameptr, cnp->cn_nameptr, cnp->cn_pnbuf); + } +#endif + + if (__predict_false(len + ndp->ni_pathlen > MAXPATHLEN)) { + return (ENAMETOOLONG); + } + + if (__predict_false(ndp->ni_loopcnt++ >= MAXSYMLINKS)) { + return (ELOOP); + } + + if (ndp->ni_pathlen > 1) { + bcopy(ndp->ni_next, cnp->cn_pnbuf + len, ndp->ni_pathlen); + } else { + cnp->cn_pnbuf[len] = '\0'; + } + bcopy(string, cnp->cn_pnbuf, len); + + ndp->ni_pathlen += len; + cache_fpl_pathlen_add(fpl, len); + cnp->cn_nameptr = cnp->cn_pnbuf; + fpl->nulchar = &cnp->cn_nameptr[ndp->ni_pathlen - 1]; + + return (0); +} + +static int __noinline +cache_fplookup_symlink(struct cache_fpl *fpl) +{ + struct nameidata *ndp; + struct componentname *cnp; + struct vnode *dvp, *tvp; + int error; + + ndp = fpl->ndp; + cnp = fpl->cnp; + dvp = fpl->dvp; + tvp = fpl->tvp; + + if (cache_fpl_islastcn(ndp)) { + if ((cnp->cn_flags & FOLLOW) == 0) { + return (cache_fplookup_final(fpl)); + } + } + + error = VOP_FPLOOKUP_SYMLINK(tvp, fpl); + if (__predict_false(error != 0)) { + switch (error) { + case EAGAIN: + return (cache_fpl_partial(fpl)); + case ENOENT: + case ENAMETOOLONG: + case ELOOP: + cache_fpl_smr_exit(fpl); + return (cache_fpl_handled_error(fpl, error)); + default: + return (cache_fpl_aborted(fpl)); + } + } + + if (*(cnp->cn_nameptr) == '/') { + fpl->dvp = cache_fpl_handle_root(fpl); + fpl->dvp_seqc = vn_seqc_read_any(fpl->dvp); + if (seqc_in_modify(fpl->dvp_seqc)) { + return (cache_fpl_aborted(fpl)); + } + } + + return (cache_fplookup_preparse(fpl)); +} + static int cache_fplookup_next(struct cache_fpl *fpl) { @@ -4743,10 +4880,6 @@ cache_fplookup_next(struct cache_fpl *fpl) return (cache_fpl_partial(fpl)); } - if (!cache_fplookup_vnode_supported(tvp)) { - return (cache_fpl_partial(fpl)); - } - counter_u64_add(numposhits, 1); SDT_PROBE3(vfs, namecache, lookup, hit, dvp, ncp->nc_name, tvp); @@ -4931,7 +5064,16 @@ static void cache_fpl_pathlen_inc(struct cache_fpl *fpl) { - fpl->debug.ni_pathlen++; + cache_fpl_pathlen_add(fpl, 1); +} + +static void +cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n) +{ + + fpl->debug.ni_pathlen += n; + KASSERT(fpl->debug.ni_pathlen <= PATH_MAX, + ("%s: pathlen overflow to %zd\n", __func__, fpl->debug.ni_pathlen)); } static void @@ -4953,13 +5095,18 @@ cache_fpl_pathlen_inc(struct cache_fpl *fpl) { } +static void +cache_fpl_pathlen_add(struct cache_fpl *fpl, size_t n) +{ +} + static void cache_fpl_pathlen_sub(struct cache_fpl *fpl, size_t n) { } #endif -static int +static int __always_inline cache_fplookup_preparse(struct cache_fpl *fpl) { struct componentname *cnp; @@ -5238,8 +5385,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl) return (cache_fpl_aborted(fpl)); } - VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); - error = cache_fplookup_preparse(fpl); if (__predict_false(cache_fpl_terminated(fpl))) { return (error); @@ -5251,8 +5396,6 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl) break; } - VNPASS(cache_fplookup_vnode_supported(fpl->dvp), fpl->dvp); - error = VOP_FPLOOKUP_VEXEC(fpl->dvp, cnp->cn_cred); if (__predict_false(error != 0)) { error = cache_fplookup_failed_vexec(fpl, error); @@ -5266,20 +5409,27 @@ cache_fplookup_impl(struct vnode *dvp, struct cache_fpl *fpl) VNPASS(!seqc_in_modify(fpl->tvp_seqc), fpl->tvp); - if (cache_fpl_islastcn(ndp)) { - error = cache_fplookup_final(fpl); - break; - } + if (fpl->tvp->v_type == VLNK) { + error = cache_fplookup_symlink(fpl); + if (cache_fpl_terminated(fpl)) { + break; + } + } else { + if (cache_fpl_islastcn(ndp)) { + error = cache_fplookup_final(fpl); + break; + } - if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) { - error = cache_fpl_aborted(fpl); - break; - } + if (!vn_seqc_consistent(fpl->dvp, fpl->dvp_seqc)) { + error = cache_fpl_aborted(fpl); + break; + } - fpl->dvp = fpl->tvp; - fpl->dvp_seqc = fpl->tvp_seqc; + fpl->dvp = fpl->tvp; + fpl->dvp_seqc = fpl->tvp_seqc; + cache_fplookup_parse_advance(fpl); + } - cache_fplookup_parse_advance(fpl); cache_fpl_checkpoint(fpl); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index b6529623ecbb..ad65ab11bb1d 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -578,6 +578,7 @@ namei(struct nameidata *ndp) ndp->ni_startdir->v_type == VBAD); ndp->ni_lcf = 0; + ndp->ni_loopcnt = 0; ndp->ni_vp = NULL; error = namei_getpath(ndp); @@ -611,8 +612,16 @@ namei(struct nameidata *ndp) TAILQ_INIT(&ndp->ni_cap_tracker); dp = ndp->ni_startdir; break; + case CACHE_FPL_STATUS_DESTROYED: + ndp->ni_loopcnt = 0; + error = namei_getpath(ndp); + if (__predict_false(error != 0)) { + return (error); + } + /* FALLTHROUGH */ case CACHE_FPL_STATUS_ABORTED: TAILQ_INIT(&ndp->ni_cap_tracker); + MPASS(ndp->ni_lcf == 0); error = namei_setup(ndp, &dp, &pwd); if (error != 0) { namei_cleanup_cnp(cnp); @@ -621,8 +630,6 @@ namei(struct nameidata *ndp) break; } - ndp->ni_loopcnt = 0; - /* * Locked lookup. */ diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 8461fd0d49b5..047e4c54f0c5 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -5452,6 +5452,19 @@ vop_fplookup_vexec_debugpost(void *ap __unused, int rc __unused) VFS_SMR_ASSERT_ENTERED(); } +void +vop_fplookup_symlink_debugpre(void *ap __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} + +void +vop_fplookup_symlink_debugpost(void *ap __unused, int rc __unused) +{ + + VFS_SMR_ASSERT_ENTERED(); +} void vop_strategy_debugpre(void *ap) { diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index ed6f3bf9d61b..5d15d4a0c863 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -156,6 +156,16 @@ vop_fplookup_vexec { }; +%% fplookup_symlink vp - - - +%! fplookup_symlink debugpre vop_fplookup_symlink_debugpre +%! fplookup_symlink debugpost vop_fplookup_symlink_debugpost + +vop_fplookup_symlink { + IN struct vnode *vp; + IN struct cache_fpl *fpl; +}; + + %% access vp L L L vop_access { diff --git a/sys/sys/namei.h b/sys/sys/namei.h index c045bc7c6bc5..1f9bae9e2753 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -116,8 +116,8 @@ struct nameidata { #ifdef _KERNEL -enum cache_fpl_status { CACHE_FPL_STATUS_ABORTED, CACHE_FPL_STATUS_PARTIAL, - CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET }; +enum cache_fpl_status { CACHE_FPL_STATUS_DESTROYED, CACHE_FPL_STATUS_ABORTED, + CACHE_FPL_STATUS_PARTIAL, CACHE_FPL_STATUS_HANDLED, CACHE_FPL_STATUS_UNSET }; int cache_fplookup(struct nameidata *ndp, enum cache_fpl_status *status, struct pwd **pwdp); diff --git a/sys/sys/param.h b/sys/sys/param.h index 19d6d3fb2d35..f540b240e848 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -60,7 +60,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 1400000 /* Master, propagated to newvers */ +#define __FreeBSD_version 1400001 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 0eadfec02313..ffd0bdad940d 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -66,6 +66,7 @@ enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT }; */ struct namecache; +struct cache_fpl; struct vpollinfo { struct mtx vpi_lock; /* lock to protect below */ @@ -646,6 +647,10 @@ void cache_purge(struct vnode *vp); void cache_purge_vgone(struct vnode *vp); void cache_purge_negative(struct vnode *vp); void cache_purgevfs(struct mount *mp); +char *cache_symlink_alloc(size_t size, int flags); +void cache_symlink_free(char *string, size_t size); +int cache_symlink_resolve(struct cache_fpl *fpl, const char *string, + size_t len); void cache_vop_rename(struct vnode *fdvp, struct vnode *fvp, struct vnode *tdvp, struct vnode *tvp, struct componentname *fcnp, struct componentname *tcnp); void cache_vop_rmdir(struct vnode *dvp, struct vnode *vp); @@ -900,6 +905,8 @@ int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef DEBUG_VFS_LOCKS void vop_fplookup_vexec_debugpre(void *a); void vop_fplookup_vexec_debugpost(void *a, int rc); +void vop_fplookup_symlink_debugpre(void *a); +void vop_fplookup_symlink_debugpost(void *a, int rc); void vop_strategy_debugpre(void *a); void vop_lock_debugpre(void *a); void vop_lock_debugpost(void *a, int rc); @@ -910,6 +917,8 @@ void vop_mkdir_debugpost(void *a, int rc); #else #define vop_fplookup_vexec_debugpre(x) do { } while (0) #define vop_fplookup_vexec_debugpost(x, y) do { } while (0) +#define vop_fplookup_symlink_debugpre(x) do { } while (0) +#define vop_fplookup_symlink_debugpost(x, y) do { } while (0) #define vop_strategy_debugpre(x) do { } while (0) #define vop_lock_debugpre(x) do { } while (0) #define vop_lock_debugpost(x, y) do { } while (0)
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?202101231504.10NF4uka076387>