Date: Thu, 16 Mar 2017 06:45:36 +0000 (UTC) From: Mateusz Guzik <mjg@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r315378 - in stable/11/sys: kern sys Message-ID: <201703160645.v2G6jaHA086561@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: mjg Date: Thu Mar 16 06:45:36 2017 New Revision: 315378 URL: https://svnweb.freebsd.org/changeset/base/315378 Log: MFC r313275,r313280,r313282,r313335: mtx: move lockstat handling out of inline primitives Lockstat requires checking if it is enabled and if so, calling a 6 argument function. Further, determining whether to call it on unlock requires pre-reading the lock value. This is problematic in at least 3 ways: - more branches in the hot path than necessary - additional cacheline ping pong under contention - bigger code Instead, check first if lockstat handling is necessary and if so, just fall back to regular locking routines. For this purpose a new macro is introduced (LOCKSTAT_PROFILE_ENABLED). LOCK_PROFILING uninlines all primitives. Fold in the current inline lock variant into the _mtx_lock_flags to retain the support. With this change the inline variants are not used when LOCK_PROFILING is defined and thus can ignore its existence. This results in: text data bss dec hex filename 22259667 1303208 4994976 28557851 1b3c21b kernel.orig 21797315 1303208 4994976 28095499 1acb40b kernel.patched i.e. about 3% reduction in text size. A remaining action is to remove spurious arguments for internal kernel consumers. == sx: move lockstat handling out of inline primitives See r313275 for details. == rwlock: move lockstat handling out of inline primitives See r313275 for details. One difference here is that recursion handling was removed from the fallback routine. As it is it was never supposed to see a recursed lock in the first place. Future changes will move it out of inline variants, but right now there is no easy to way to test if the lock is recursed without reading additional words. == locks: fix recursion support after recent changes When a relevant lockstat probe is enabled the fallback primitive is called with a constant signifying a free lock. This works fine for typical cases but breaks with recursion, since it checks if the passed value is that of the executing thread. Read the value if necessary. Modified: stable/11/sys/kern/kern_mutex.c stable/11/sys/kern/kern_rwlock.c stable/11/sys/kern/kern_sx.c stable/11/sys/sys/lockstat.h stable/11/sys/sys/mutex.h stable/11/sys/sys/rwlock.h stable/11/sys/sys/sdt.h stable/11/sys/sys/sx.h Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/kern/kern_mutex.c ============================================================================== --- stable/11/sys/kern/kern_mutex.c Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/kern/kern_mutex.c Thu Mar 16 06:45:36 2017 (r315378) @@ -226,6 +226,7 @@ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; + uintptr_t tid, v; if (SCHEDULER_STOPPED()) return; @@ -243,7 +244,13 @@ __mtx_lock_flags(volatile uintptr_t *c, WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - __mtx_lock(m, curthread, opts, file, line); + tid = (uintptr_t)curthread; + v = MTX_UNOWNED; + if (!_mtx_obtain_lock_fetch(m, &v, tid)) + _mtx_lock_sleep(m, v, tid, opts, file, line); + else + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, + m, 0, 0, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, @@ -271,7 +278,7 @@ __mtx_unlock_flags(volatile uintptr_t *c line); mtx_assert(m, MA_OWNED); - __mtx_unlock(m, curthread, opts, file, line); + __mtx_unlock_sleep(c, opts, file, line); TD_LOCKS_DEC(curthread); } @@ -449,6 +456,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, lock_delay_arg_init(&lda, NULL); #endif m = mtxlock2mtx(c); + if (__predict_false(v == MTX_UNOWNED)) + v = MTX_READ_VALUE(m); if (__predict_false(lv_mtx_owner(v) == (struct thread *)tid)) { KASSERT((m->lock_object.lo_flags & LO_RECURSABLE) != 0 || @@ -857,20 +866,17 @@ __mtx_unlock_sleep(volatile uintptr_t *c { struct mtx *m; struct turnstile *ts; - uintptr_t v; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); - v = MTX_READ_VALUE(m); - if (v == (uintptr_t)curthread) { + if (!mtx_recursed(m)) { + LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m); if (_mtx_release_lock(m, (uintptr_t)curthread)) return; - } - - if (mtx_recursed(m)) { + } else { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) Modified: stable/11/sys/kern/kern_rwlock.c ============================================================================== --- stable/11/sys/kern/kern_rwlock.c Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/kern/kern_rwlock.c Thu Mar 16 06:45:36 2017 (r315378) @@ -265,6 +265,7 @@ void _rw_wlock_cookie(volatile uintptr_t *c, const char *file, int line) { struct rwlock *rw; + uintptr_t tid, v; if (SCHEDULER_STOPPED()) return; @@ -278,7 +279,14 @@ _rw_wlock_cookie(volatile uintptr_t *c, ("rw_wlock() of destroyed rwlock @ %s:%d", file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - __rw_wlock(rw, curthread, file, line); + tid = (uintptr_t)curthread; + v = RW_UNLOCKED; + if (!_rw_write_lock_fetch(rw, &v, tid)) + _rw_wlock_hard(rw, v, tid, file, line); + else + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, + 0, 0, file, line, LOCKSTAT_WRITER); + LOCK_LOG_LOCK("WLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); TD_LOCKS_INC(curthread); @@ -337,7 +345,11 @@ _rw_wunlock_cookie(volatile uintptr_t *c WITNESS_UNLOCK(&rw->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("WUNLOCK", &rw->lock_object, 0, rw->rw_recurse, file, line); - __rw_wunlock(rw, curthread, file, line); + if (rw->rw_recurse) + rw->rw_recurse--; + else + _rw_wunlock_hard(rw, (uintptr_t)curthread, file, line); + TD_LOCKS_DEC(curthread); } @@ -782,6 +794,8 @@ __rw_wlock_hard(volatile uintptr_t *c, u lock_delay_arg_init(&lda, NULL); #endif rw = rwlock2rw(c); + if (__predict_false(v == RW_UNLOCKED)) + v = RW_READ_VALUE(rw); if (__predict_false(lv_rw_wowner(v) == (struct thread *)tid)) { KASSERT(rw->lock_object.lo_flags & LO_RECURSABLE, @@ -980,13 +994,12 @@ __rw_wunlock_hard(volatile uintptr_t *c, return; rw = rwlock2rw(c); + MPASS(!rw_recursed(rw)); - if (rw_wlocked(rw) && rw_recursed(rw)) { - rw->rw_recurse--; - if (LOCK_LOG_TEST(&rw->lock_object, 0)) - CTR2(KTR_LOCK, "%s: %p unrecursing", __func__, rw); + LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, + LOCKSTAT_WRITER); + if (_rw_write_unlock(rw, tid)) return; - } KASSERT(rw->rw_lock & (RW_LOCK_READ_WAITERS | RW_LOCK_WRITE_WAITERS), ("%s: neither of the waiter flags are set", __func__)); Modified: stable/11/sys/kern/kern_sx.c ============================================================================== --- stable/11/sys/kern/kern_sx.c Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/kern/kern_sx.c Thu Mar 16 06:45:36 2017 (r315378) @@ -290,6 +290,7 @@ sx_try_slock_(struct sx *sx, const char int _sx_xlock(struct sx *sx, int opts, const char *file, int line) { + uintptr_t tid, x; int error = 0; if (SCHEDULER_STOPPED()) @@ -301,7 +302,13 @@ _sx_xlock(struct sx *sx, int opts, const ("sx_xlock() of destroyed sx @ %s:%d", file, line)); WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - error = __sx_xlock(sx, curthread, opts, file, line); + tid = (uintptr_t)curthread; + x = SX_LOCK_UNLOCKED; + if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid)) + error = _sx_xlock_hard(sx, x, tid, opts, file, line); + else + LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, + 0, 0, file, line, LOCKSTAT_WRITER); if (!error) { LOCK_LOG_LOCK("XLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); @@ -359,7 +366,7 @@ _sx_xunlock(struct sx *sx, const char *f WITNESS_UNLOCK(&sx->lock_object, LOP_EXCLUSIVE, file, line); LOCK_LOG_LOCK("XUNLOCK", &sx->lock_object, 0, sx->sx_recurse, file, line); - __sx_xunlock(sx, curthread, file, line); + _sx_xunlock_hard(sx, (uintptr_t)curthread, file, line); TD_LOCKS_DEC(curthread); } @@ -504,6 +511,9 @@ _sx_xlock_hard(struct sx *sx, uintptr_t lock_delay_arg_init(&lda, NULL); #endif + if (__predict_false(x == SX_LOCK_UNLOCKED)) + x = SX_READ_VALUE(sx); + /* If we already hold an exclusive lock, then recurse. */ if (__predict_false(lv_sx_owner(x) == (struct thread *)tid)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, @@ -737,8 +747,13 @@ _sx_xunlock_hard(struct sx *sx, uintptr_ MPASS(!(sx->sx_lock & SX_LOCK_SHARED)); - /* If the lock is recursed, then unrecurse one level. */ - if (sx_xlocked(sx) && sx_recursed(sx)) { + if (!sx_recursed(sx)) { + LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, + LOCKSTAT_WRITER); + if (atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)) + return; + } else { + /* The lock is recursed, unrecurse one level. */ if ((--sx->sx_recurse) == 0) atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED); if (LOCK_LOG_TEST(&sx->lock_object, 0)) Modified: stable/11/sys/sys/lockstat.h ============================================================================== --- stable/11/sys/sys/lockstat.h Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/sys/lockstat.h Thu Mar 16 06:45:36 2017 (r315378) @@ -107,6 +107,10 @@ extern int lockstat_enabled; LOCKSTAT_RECORD1(probe, lp, a); \ } while (0) +#ifndef LOCK_PROFILING +#define LOCKSTAT_PROFILE_ENABLED(probe) SDT_PROBE_ENABLED(lockstat, , , probe) +#endif + struct lock_object; uint64_t lockstat_nsecs(struct lock_object *); @@ -130,6 +134,10 @@ uint64_t lockstat_nsecs(struct lock_obje #define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) \ LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) +#ifndef LOCK_PROFILING +#define LOCKSTAT_PROFILE_ENABLED(probe) 0 +#endif + #endif /* !KDTRACE_HOOKS */ #endif /* _KERNEL */ #endif /* _SYS_LOCKSTAT_H */ Modified: stable/11/sys/sys/mutex.h ============================================================================== --- stable/11/sys/sys/mutex.h Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/sys/mutex.h Thu Mar 16 06:45:36 2017 (r315378) @@ -171,10 +171,8 @@ void thread_lock_flags_(struct thread *, #define _mtx_obtain_lock(mp, tid) \ atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) -#define _mtx_obtain_lock_fetch(mp, vp, tid) ({ \ - *vp = MTX_UNOWNED; \ - atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid)); \ -}) +#define _mtx_obtain_lock_fetch(mp, vp, tid) \ + atomic_fcmpset_acq_ptr(&(mp)->mtx_lock, vp, (tid)) /* Try to release mtx_lock if it is unrecursed and uncontested. */ #define _mtx_release_lock(mp, tid) \ @@ -193,13 +191,11 @@ void thread_lock_flags_(struct thread *, /* Lock a normal mutex. */ #define __mtx_lock(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ - uintptr_t _v; \ + uintptr_t _v = MTX_UNOWNED; \ \ - if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__acquire) ||\ + !_mtx_obtain_lock_fetch((mp), &_v, _tid))) \ _mtx_lock_sleep((mp), _v, _tid, (opts), (file), (line));\ - else \ - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, \ - mp, 0, 0, file, line); \ } while (0) /* @@ -211,7 +207,7 @@ void thread_lock_flags_(struct thread *, #ifdef SMP #define __mtx_lock_spin(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ - uintptr_t _v; \ + uintptr_t _v = MTX_UNOWNED; \ \ spinlock_enter(); \ if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) \ @@ -267,9 +263,8 @@ void thread_lock_flags_(struct thread *, #define __mtx_unlock(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ \ - if ((mp)->mtx_recurse == 0) \ - LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp); \ - if (!_mtx_release_lock((mp), _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\ + !_mtx_release_lock((mp), _tid))) \ _mtx_unlock_sleep((mp), (opts), (file), (line)); \ } while (0) Modified: stable/11/sys/sys/rwlock.h ============================================================================== --- stable/11/sys/sys/rwlock.h Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/sys/rwlock.h Thu Mar 16 06:45:36 2017 (r315378) @@ -84,10 +84,8 @@ #define _rw_write_lock(rw, tid) \ atomic_cmpset_acq_ptr(&(rw)->rw_lock, RW_UNLOCKED, (tid)) -#define _rw_write_lock_fetch(rw, vp, tid) ({ \ - *vp = RW_UNLOCKED; \ - atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid)); \ -}) +#define _rw_write_lock_fetch(rw, vp, tid) \ + atomic_fcmpset_acq_ptr(&(rw)->rw_lock, vp, (tid)) /* Release a write lock quickly if there are no waiters. */ #define _rw_write_unlock(rw, tid) \ @@ -102,13 +100,11 @@ /* Acquire a write lock. */ #define __rw_wlock(rw, tid, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ - uintptr_t _v; \ + uintptr_t _v = RW_UNLOCKED; \ \ - if (!_rw_write_lock_fetch((rw), &_v, _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__acquire) || \ + !_rw_write_lock_fetch((rw), &_v, _tid))) \ _rw_wlock_hard((rw), _v, _tid, (file), (line)); \ - else \ - LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, \ - 0, 0, file, line, LOCKSTAT_WRITER); \ } while (0) /* Release a write lock. */ @@ -118,9 +114,8 @@ if ((rw)->rw_recurse) \ (rw)->rw_recurse--; \ else { \ - LOCKSTAT_PROFILE_RELEASE_RWLOCK(rw__release, rw, \ - LOCKSTAT_WRITER); \ - if (!_rw_write_unlock((rw), _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||\ + !_rw_write_unlock((rw), _tid))) \ _rw_wunlock_hard((rw), _tid, (file), (line)); \ } \ } while (0) Modified: stable/11/sys/sys/sdt.h ============================================================================== --- stable/11/sys/sys/sdt.h Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/sys/sdt.h Thu Mar 16 06:45:36 2017 (r315378) @@ -160,6 +160,9 @@ SET_DECLARE(sdt_argtypes_set, struct sdt #define SDT_PROBE_DECLARE(prov, mod, func, name) \ extern struct sdt_probe sdt_##prov##_##mod##_##func##_##name[1] +#define SDT_PROBE_ENABLED(prov, mod, func, name) \ + __predict_false((sdt_##prov##_##mod##_##func##_##name->id)) + #define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) do { \ if (__predict_false(sdt_##prov##_##mod##_##func##_##name->id)) \ (*sdt_probe_func)(sdt_##prov##_##mod##_##func##_##name->id, \ Modified: stable/11/sys/sys/sx.h ============================================================================== --- stable/11/sys/sys/sx.h Thu Mar 16 06:35:53 2017 (r315377) +++ stable/11/sys/sys/sx.h Thu Mar 16 06:45:36 2017 (r315378) @@ -145,21 +145,19 @@ struct sx_args { * deferred to 'tougher' functions. */ +#if (LOCK_DEBUG == 0) && !defined(SX_NOINLINE) /* Acquire an exclusive lock. */ static __inline int __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file, int line) { uintptr_t tid = (uintptr_t)td; - uintptr_t v; + uintptr_t v = SX_LOCK_UNLOCKED; int error = 0; - v = SX_LOCK_UNLOCKED; - if (!atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid)) + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__acquire) || + !atomic_fcmpset_acq_ptr(&sx->sx_lock, &v, tid))) error = _sx_xlock_hard(sx, v, tid, opts, file, line); - else - LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, - 0, 0, file, line, LOCKSTAT_WRITER); return (error); } @@ -170,12 +168,11 @@ __sx_xunlock(struct sx *sx, struct threa { uintptr_t tid = (uintptr_t)td; - if (sx->sx_recurse == 0) - LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, - LOCKSTAT_WRITER); - if (!atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)) + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) || + !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))) _sx_xunlock_hard(sx, tid, file, line); } +#endif /* * Public interface for lock operations.
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201703160645.v2G6jaHA086561>