Date: Sat, 22 Nov 2014 01:28:12 +0100 From: Mateusz Guzik <mjguzik@gmail.com> To: freebsd-fs@freebsd.org Subject: atomic v_usecount and v_holdcnt Message-ID: <20141122002812.GA32289@dft-labs.eu>
next in thread | raw e-mail | index | archive | help
The idea is that we don't need an interlock as long as we don't transition either counter 1->0 or 0->1. Patch itself is more of a PoC, so I didn't rename vholdl & friends just yet. It helps during lookups with same vnodes since the interlock which was taken twice served as a serializatin point and this effect is now reduced. There are other places which can avoid VI_LOCK + vget scheme. Patch below survived make -j 40 buildworld, poudriere with 40 workers etc on a 2 package(s) x 10 core(s) x 2 SMT threads machine with and without debugs (including DEBUG_VFS_LOCKS). Perf difference: in a crap microbenchmark of 40 threads doing a stat on /foo/bar/baz/quux${N}, where each thread stats a separate file I got over 4 times speed up on tmpfs. Comments? diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c index 83f29c1..b587ebd 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/vnode.c @@ -99,6 +99,6 @@ vn_rele_async(vnode_t *vp, taskq_t *taskq) (task_func_t *)vn_rele_inactive, vp, TQ_SLEEP) != 0); return; } - vp->v_usecount--; + refcount_release(&vp->v_usecount); vdropl(vp); } diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 55e3217..ec26d35 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -665,12 +665,12 @@ success: ltype = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); } - VI_LOCK(*vpp); + vholdl(*vpp); if (wlocked) CACHE_WUNLOCK(); else CACHE_RUNLOCK(); - error = vget(*vpp, cnp->cn_lkflags | LK_INTERLOCK, cnp->cn_thread); + error = vget_held(*vpp, cnp->cn_lkflags, cnp->cn_thread); if (cnp->cn_flags & ISDOTDOT) { vn_lock(dvp, ltype | LK_RETRY); if (dvp->v_iflag & VI_DOOMED) { @@ -1376,9 +1376,9 @@ vn_dir_dd_ino(struct vnode *vp) if ((ncp->nc_flag & NCF_ISDOTDOT) != 0) continue; ddvp = ncp->nc_dvp; - VI_LOCK(ddvp); + vholdl(ddvp); CACHE_RUNLOCK(); - if (vget(ddvp, LK_INTERLOCK | LK_SHARED | LK_NOWAIT, curthread)) + if (vget_held(ddvp, LK_SHARED | LK_NOWAIT, curthread)) return (NULL); return (ddvp); } diff --git a/sys/kern/vfs_hash.c b/sys/kern/vfs_hash.c index 0271e49..d2fdbba 100644 --- a/sys/kern/vfs_hash.c +++ b/sys/kern/vfs_hash.c @@ -83,9 +83,9 @@ vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td, s continue; if (fn != NULL && fn(vp, arg)) continue; - VI_LOCK(vp); + vhold(vp); mtx_unlock(&vfs_hash_mtx); - error = vget(vp, flags | LK_INTERLOCK, td); + error = vget_held(vp, flags, td); if (error == ENOENT && (flags & LK_NOWAIT) == 0) break; if (error) @@ -127,9 +127,9 @@ vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td, stru continue; if (fn != NULL && fn(vp2, arg)) continue; - VI_LOCK(vp2); + vhold(vp2); mtx_unlock(&vfs_hash_mtx); - error = vget(vp2, flags | LK_INTERLOCK, td); + error = vget_held(vp2, flags, td); if (error == ENOENT && (flags & LK_NOWAIT) == 0) break; mtx_lock(&vfs_hash_mtx); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 345aad6..53bdf0d 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -854,7 +854,7 @@ vnlru_free(int count) */ freevnodes--; vp->v_iflag &= ~VI_FREE; - vp->v_holdcnt++; + refcount_acquire(&vp->v_holdcnt); mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); @@ -2052,12 +2052,7 @@ v_incr_usecount(struct vnode *vp) CTR2(KTR_VFS, "%s: vp %p", __func__, vp); vholdl(vp); - vp->v_usecount++; - if (vp->v_type == VCHR && vp->v_rdev != NULL) { - dev_lock(); - vp->v_rdev->si_usecount++; - dev_unlock(); - } + v_upgrade_usecount(vp); } /* @@ -2067,9 +2062,24 @@ v_incr_usecount(struct vnode *vp) static void v_upgrade_usecount(struct vnode *vp) { + int old, locked; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - vp->v_usecount++; +retry: + old = vp->v_usecount; + if (old > 0) { + if (atomic_cmpset_int(&vp->v_usecount, old, old + 1)) + goto dev; + goto retry; + } + + locked = mtx_owned(VI_MTX(vp)); + if (!locked) + VI_LOCK(vp); + refcount_acquire(&vp->v_usecount); + if (!locked) + VI_UNLOCK(vp); +dev: if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; @@ -2086,16 +2096,10 @@ static void v_decr_usecount(struct vnode *vp) { - ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_usecount: negative usecount")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - vp->v_usecount--; - if (vp->v_type == VCHR && vp->v_rdev != NULL) { - dev_lock(); - vp->v_rdev->si_usecount--; - dev_unlock(); - } + v_decr_useonly(vp); vdropl(vp); } @@ -2108,12 +2112,27 @@ v_decr_usecount(struct vnode *vp) static void v_decr_useonly(struct vnode *vp) { + int old, locked; - ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_useonly: negative usecount")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - vp->v_usecount--; +retry: + old = vp->v_usecount; + if (old > 1) { + if (atomic_cmpset_int(&vp->v_usecount, old, old - 1)) { + goto dev; + } + goto retry; + } + + locked = mtx_owned(VI_MTX(vp)); + if (!locked) + VI_LOCK(vp); + refcount_release(&vp->v_usecount); + if (!locked) + VI_UNLOCK(vp); +dev: if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; @@ -2129,19 +2148,15 @@ v_decr_useonly(struct vnode *vp) * vput try to do it here. */ int -vget(struct vnode *vp, int flags, struct thread *td) +vget_held(struct vnode *vp, int flags, struct thread *td) { int error; - error = 0; VNASSERT((flags & LK_TYPE_MASK) != 0, vp, ("vget: invalid lock operation")); CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags); - if ((flags & LK_INTERLOCK) == 0) - VI_LOCK(vp); - vholdl(vp); - if ((error = vn_lock(vp, flags | LK_INTERLOCK)) != 0) { + if ((error = vn_lock(vp, flags)) != 0) { vdrop(vp); CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__, vp); @@ -2149,7 +2164,6 @@ vget(struct vnode *vp, int flags, struct thread *td) } if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) panic("vget: vn_lock failed to return ENOENT\n"); - VI_LOCK(vp); /* Upgrade our holdcnt to a usecount. */ v_upgrade_usecount(vp); /* @@ -2159,15 +2173,26 @@ vget(struct vnode *vp, int flags, struct thread *td) * we don't succeed no harm is done. */ if (vp->v_iflag & VI_OWEINACT) { - if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && - (flags & LK_NOWAIT) == 0) - vinactive(vp, td); - vp->v_iflag &= ~VI_OWEINACT; + VI_LOCK(vp); + if (vp->v_iflag & VI_OWEINACT) { + if (VOP_ISLOCKED(vp) == LK_EXCLUSIVE && + (flags & LK_NOWAIT) == 0) + vinactive(vp, td); + vp->v_iflag &= ~VI_OWEINACT; + } + VI_UNLOCK(vp); } - VI_UNLOCK(vp); return (0); } +int +vget(struct vnode *vp, int flags, struct thread *td) +{ + + vhold(vp); + return (vget_held(vp, flags, td)); +} + /* * Increase the reference count of a vnode. */ @@ -2176,9 +2201,7 @@ vref(struct vnode *vp) { CTR2(KTR_VFS, "%s: vp %p", __func__, vp); - VI_LOCK(vp); v_incr_usecount(vp); - VI_UNLOCK(vp); } /* @@ -2195,9 +2218,7 @@ vrefcnt(struct vnode *vp) { int usecnt; - VI_LOCK(vp); usecnt = vp->v_usecount; - VI_UNLOCK(vp); return (usecnt); } @@ -2210,6 +2231,7 @@ static void vputx(struct vnode *vp, int func) { int error; + int old; KASSERT(vp != NULL, ("vputx: null vp")); if (func == VPUTX_VUNREF) @@ -2219,11 +2241,26 @@ vputx(struct vnode *vp, int func) else KASSERT(func == VPUTX_VRELE, ("vputx: wrong func")); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); + +retry: + old = vp->v_usecount; + if (old > 1) { + if (atomic_cmpset_int(&vp->v_usecount, old, old - 1)) { + if (func == VPUTX_VPUT) + VOP_UNLOCK(vp, 0); + if (vp->v_type == VCHR && vp->v_rdev != NULL) { + dev_lock(); + vp->v_rdev->si_usecount--; + dev_unlock(); + } + vdropl(vp); + return; + } + goto retry; + } + VI_LOCK(vp); - /* Skip this v_writecount check if we're going to panic below. */ - VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp, - ("vputx: missed vn_close")); error = 0; if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && @@ -2320,9 +2357,7 @@ void vhold(struct vnode *vp) { - VI_LOCK(vp); vholdl(vp); - VI_UNLOCK(vp); } /* @@ -2332,20 +2367,30 @@ void vholdl(struct vnode *vp) { struct mount *mp; + int old; + int locked; CTR2(KTR_VFS, "%s: vp %p", __func__, vp); -#ifdef INVARIANTS - /* getnewvnode() calls v_incr_usecount() without holding interlock. */ - if (vp->v_type != VNON || vp->v_data != NULL) { - ASSERT_VI_LOCKED(vp, "vholdl"); - VNASSERT(vp->v_holdcnt > 0 || (vp->v_iflag & VI_FREE) != 0, - vp, ("vholdl: free vnode is held")); +retry: + old = vp->v_holdcnt; + if (old > 0) { + if (atomic_cmpset_int(&vp->v_holdcnt, old, old + 1)) { + VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, + ("vholdl: vnode with usecount is free")); + return; + } + goto retry; } -#endif - vp->v_holdcnt++; - if ((vp->v_iflag & VI_FREE) == 0) + locked = mtx_owned(VI_MTX(vp)); + if (!locked) + VI_LOCK(vp); + if ((vp->v_iflag & VI_FREE) == 0) { + refcount_acquire(&vp->v_holdcnt); + if (!locked) + VI_UNLOCK(vp); return; - VNASSERT(vp->v_holdcnt == 1, vp, ("vholdl: wrong hold count")); + } + VNASSERT(vp->v_holdcnt == 0, vp, ("vholdl: wrong hold count")); VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed.")); /* * Remove a vnode from the free list, mark it as in use, @@ -2362,6 +2407,9 @@ vholdl(struct vnode *vp) TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist); mp->mnt_activevnodelistsize++; mtx_unlock(&vnode_free_list_mtx); + refcount_acquire(&vp->v_holdcnt); + if (!locked) + VI_UNLOCK(vp); } /* @@ -2372,7 +2420,6 @@ void vdrop(struct vnode *vp) { - VI_LOCK(vp); vdropl(vp); } @@ -2387,15 +2434,27 @@ vdropl(struct vnode *vp) struct bufobj *bo; struct mount *mp; int active; + int old; + int locked; - ASSERT_VI_LOCKED(vp, "vdropl"); CTR2(KTR_VFS, "%s: vp %p", __func__, vp); if (vp->v_holdcnt <= 0) panic("vdrop: holdcnt %d", vp->v_holdcnt); - vp->v_holdcnt--; - VNASSERT(vp->v_holdcnt >= vp->v_usecount, vp, - ("hold count less than use count")); - if (vp->v_holdcnt > 0) { + locked = mtx_owned(VI_MTX(vp)); +retry: + old = vp->v_holdcnt; + if (old > 1) { + if (atomic_cmpset_int(&vp->v_holdcnt, old, old -1)) { + if (locked) + VI_UNLOCK(vp); + return; + } + goto retry; + } + + if (!locked) + VI_LOCK(vp); + if (refcount_release(&vp->v_holdcnt) == 0) { VI_UNLOCK(vp); return; } diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index c78b9d1..87eee4a 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -651,6 +651,7 @@ void vdrop(struct vnode *); void vdropl(struct vnode *); int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td); int vget(struct vnode *vp, int lockflag, struct thread *td); +int vget_held(struct vnode *vp, int lockflag, struct thread *td); void vgone(struct vnode *vp); void vhold(struct vnode *); void vholdl(struct vnode *);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20141122002812.GA32289>