From owner-svn-src-all@freebsd.org Sat Mar 17 19:26:34 2018 Return-Path: Delivered-To: svn-src-all@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2610:1c1:1:606c::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id 23B3EF62988; Sat, 17 Mar 2018 19:26:34 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from mxrelay.nyi.freebsd.org (mxrelay.nyi.freebsd.org [IPv6:2610:1c1:1:606c::19:3]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client CN "mxrelay.nyi.freebsd.org", Issuer "Let's Encrypt Authority X3" (verified OK)) by mx1.freebsd.org (Postfix) with ESMTPS id B51016ACA8; Sat, 17 Mar 2018 19:26:33 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mxrelay.nyi.freebsd.org (Postfix) with ESMTPS id AA25E1671D; Sat, 17 Mar 2018 19:26:33 +0000 (UTC) (envelope-from mjg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id w2HJQXZb025199; Sat, 17 Mar 2018 19:26:33 GMT (envelope-from mjg@FreeBSD.org) Received: (from mjg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id w2HJQXpv025196; Sat, 17 Mar 2018 19:26:33 GMT (envelope-from mjg@FreeBSD.org) Message-Id: <201803171926.w2HJQXpv025196@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: mjg set sender to mjg@FreeBSD.org using -f From: Mateusz Guzik Date: Sat, 17 Mar 2018 19:26:33 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r331109 - head/sys/kern X-SVN-Group: head X-SVN-Commit-Author: mjg X-SVN-Commit-Paths: head/sys/kern X-SVN-Commit-Revision: 331109 X-SVN-Commit-Repository: base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-all@freebsd.org X-Mailman-Version: 2.1.25 Precedence: list List-Id: "SVN commit messages for the entire src tree \(except for " user" and " projects" \)" List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Sat, 17 Mar 2018 19:26:34 -0000 Author: mjg Date: Sat Mar 17 19:26:33 2018 New Revision: 331109 URL: https://svnweb.freebsd.org/changeset/base/331109 Log: locks: slightly depessimize lockstat The slow path is always taken when lockstat is enabled. This induces rdtsc (or other) calls to get the cycle count even when there was no contention. Still go to the slow path to not mess with the fast path, but avoid the heavy lifting unless necessary. This reduces sys and real time during -j 80 buildkernel: before: 3651.84s user 1105.59s system 5394% cpu 1:28.18 total after: 3685.99s user 975.74s system 5450% cpu 1:25.53 total disabled: 3697.96s user 411.13s system 5261% cpu 1:18.10 total So note this is still a significant hit. LOCK_PROFILING results are not affected. Modified: head/sys/kern/kern_mutex.c head/sys/kern/kern_rwlock.c head/sys/kern/kern_sx.c Modified: head/sys/kern/kern_mutex.c ============================================================================== --- head/sys/kern/kern_mutex.c Sat Mar 17 19:04:36 2018 (r331108) +++ head/sys/kern/kern_mutex.c Sat Mar 17 19:26:33 2018 (r331109) @@ -491,8 +491,25 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v) #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) int doing_lockprof; #endif + td = curthread; tid = (uintptr_t)td; + m = mtxlock2mtx(c); + +#ifdef KDTRACE_HOOKS + if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) { + while (v == MTX_UNOWNED) { + if (_mtx_obtain_lock_fetch(m, &v, tid)) + goto out_lockstat; + } + doing_lockprof = 1; + all_time -= lockstat_nsecs(&m->lock_object); + } +#endif +#ifdef LOCK_PROFILING + doing_lockprof = 1; +#endif + if (SCHEDULER_STOPPED_TD(td)) return; @@ -501,7 +518,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v) #elif defined(KDTRACE_HOOKS) lock_delay_arg_init(&lda, NULL); #endif - m = mtxlock2mtx(c); + if (__predict_false(v == MTX_UNOWNED)) v = MTX_READ_VALUE(m); @@ -532,13 +549,6 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v) CTR4(KTR_LOCK, "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", m->lock_object.lo_name, (void *)m->mtx_lock, file, line); -#ifdef LOCK_PROFILING - doing_lockprof = 1; -#elif defined(KDTRACE_HOOKS) - doing_lockprof = lockstat_enabled; - if (__predict_false(doing_lockprof)) - all_time -= lockstat_nsecs(&m->lock_object); -#endif for (;;) { if (v == MTX_UNOWNED) { @@ -660,10 +670,6 @@ retry_turnstile: #endif #ifdef KDTRACE_HOOKS all_time += lockstat_nsecs(&m->lock_object); -#endif - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, - waittime, file, line); -#ifdef KDTRACE_HOOKS if (sleep_time) LOCKSTAT_RECORD1(adaptive__block, m, sleep_time); @@ -672,7 +678,10 @@ retry_turnstile: */ if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time); +out_lockstat: #endif + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested, + waittime, file, line); } #ifdef SMP @@ -708,6 +717,20 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid = (uintptr_t)curthread; m = mtxlock2mtx(c); +#ifdef KDTRACE_HOOKS + if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) { + while (v == MTX_UNOWNED) { + if (_mtx_obtain_lock_fetch(m, &v, tid)) + goto out_lockstat; + } + doing_lockprof = 1; + spin_time -= lockstat_nsecs(&m->lock_object); + } +#endif +#ifdef LOCK_PROFILING + doing_lockprof = 1; +#endif + if (__predict_false(v == MTX_UNOWNED)) v = MTX_READ_VALUE(m); @@ -730,13 +753,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); -#ifdef LOCK_PROFILING - doing_lockprof = 1; -#elif defined(KDTRACE_HOOKS) - doing_lockprof = lockstat_enabled; - if (__predict_false(doing_lockprof)) - spin_time -= lockstat_nsecs(&m->lock_object); -#endif + for (;;) { if (v == MTX_UNOWNED) { if (_mtx_obtain_lock_fetch(m, &v, tid)) @@ -767,13 +784,12 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t #endif #ifdef KDTRACE_HOOKS spin_time += lockstat_nsecs(&m->lock_object); -#endif - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m, - contested, waittime, file, line); -#ifdef KDTRACE_HOOKS if (lda.spin_cnt != 0) LOCKSTAT_RECORD1(spin__spin, m, spin_time); +out_lockstat: #endif + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m, + contested, waittime, file, line); } #endif /* SMP */ Modified: head/sys/kern/kern_rwlock.c ============================================================================== --- head/sys/kern/kern_rwlock.c Sat Mar 17 19:04:36 2018 (r331108) +++ head/sys/kern/kern_rwlock.c Sat Mar 17 19:26:33 2018 (r331109) @@ -438,9 +438,23 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) uintptr_t state; - int doing_lockprof; + int doing_lockprof = 0; #endif +#ifdef KDTRACE_HOOKS + if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { + if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) + goto out_lockstat; + doing_lockprof = 1; + all_time -= lockstat_nsecs(&rw->lock_object); + state = v; + } +#endif +#ifdef LOCK_PROFILING + doing_lockprof = 1; + state = v; +#endif + if (SCHEDULER_STOPPED()) return; @@ -456,17 +470,6 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); -#ifdef LOCK_PROFILING - doing_lockprof = 1; - state = v; -#elif defined(KDTRACE_HOOKS) - doing_lockprof = lockstat_enabled; - if (__predict_false(doing_lockprof)) { - all_time -= lockstat_nsecs(&rw->lock_object); - state = v; - } -#endif - for (;;) { if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG)) break; @@ -615,6 +618,7 @@ retry_ts: LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); +out_lockstat: #endif /* * TODO: acquire "owner of record" here. Here be turnstile dragons @@ -892,10 +896,28 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC #endif #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING) uintptr_t state; - int doing_lockprof; + int doing_lockprof = 0; #endif tid = (uintptr_t)curthread; + rw = rwlock2rw(c); + +#ifdef KDTRACE_HOOKS + if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) { + while (v == RW_UNLOCKED) { + if (_rw_write_lock_fetch(rw, &v, tid)) + goto out_lockstat; + } + doing_lockprof = 1; + all_time -= lockstat_nsecs(&rw->lock_object); + state = v; + } +#endif +#ifdef LOCK_PROFILING + doing_lockprof = 1; + state = v; +#endif + if (SCHEDULER_STOPPED()) return; @@ -904,7 +926,6 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC #elif defined(KDTRACE_HOOKS) lock_delay_arg_init(&lda, NULL); #endif - rw = rwlock2rw(c); if (__predict_false(v == RW_UNLOCKED)) v = RW_READ_VALUE(rw); @@ -929,17 +950,6 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOC lock_profile_obtain_lock_failed(&rw->lock_object, &contested, &waittime); -#ifdef LOCK_PROFILING - doing_lockprof = 1; - state = v; -#elif defined(KDTRACE_HOOKS) - doing_lockprof = lockstat_enabled; - if (__predict_false(doing_lockprof)) { - all_time -= lockstat_nsecs(&rw->lock_object); - state = v; - } -#endif - for (;;) { if (v == RW_UNLOCKED) { if (_rw_write_lock_fetch(rw, &v, tid)) @@ -1101,6 +1111,7 @@ retry_ts: LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); +out_lockstat: #endif LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested, waittime, file, line, LOCKSTAT_WRITER); Modified: head/sys/kern/kern_sx.c ============================================================================== --- head/sys/kern/kern_sx.c Sat Mar 17 19:04:36 2018 (r331108) +++ head/sys/kern/kern_sx.c Sat Mar 17 19:26:33 2018 (r331109) @@ -565,6 +565,23 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LO int extra_work = 0; tid = (uintptr_t)curthread; + +#ifdef KDTRACE_HOOKS + if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) { + while (x == SX_LOCK_UNLOCKED) { + if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid)) + goto out_lockstat; + } + extra_work = 1; + all_time -= lockstat_nsecs(&sx->lock_object); + state = x; + } +#endif +#ifdef LOCK_PROFILING + extra_work = 1; + state = x; +#endif + if (SCHEDULER_STOPPED()) return (0); @@ -603,16 +620,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LO lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); -#ifdef LOCK_PROFILING - extra_work = 1; - state = x; -#elif defined(KDTRACE_HOOKS) - extra_work = lockstat_enabled; - if (__predict_false(extra_work)) { - all_time -= lockstat_nsecs(&sx->lock_object); - state = x; - } -#endif #ifndef INVARIANTS GIANT_SAVE(extra_work); #endif @@ -800,6 +807,7 @@ retry_sleepq: LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); +out_lockstat: #endif if (!error) LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx, @@ -932,6 +940,20 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LO #endif int extra_work = 0; +#ifdef KDTRACE_HOOKS + if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) { + if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG)) + goto out_lockstat; + extra_work = 1; + all_time -= lockstat_nsecs(&sx->lock_object); + state = x; + } +#endif +#ifdef LOCK_PROFILING + extra_work = 1; + state = x; +#endif + if (SCHEDULER_STOPPED()) return (0); @@ -951,16 +973,6 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LO lock_profile_obtain_lock_failed(&sx->lock_object, &contested, &waittime); -#ifdef LOCK_PROFILING - extra_work = 1; - state = x; -#elif defined(KDTRACE_HOOKS) - extra_work = lockstat_enabled; - if (__predict_false(extra_work)) { - all_time -= lockstat_nsecs(&sx->lock_object); - state = x; - } -#endif #ifndef INVARIANTS GIANT_SAVE(extra_work); #endif @@ -1102,6 +1114,7 @@ retry_sleepq: LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); +out_lockstat: #endif if (error == 0) { LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,