Date: Sun, 31 Jul 2016 14:41:13 +0200 From: Mateusz Guzik <mjguzik@gmail.com> To: Konstantin Belousov <kostikbel@gmail.com> Cc: jhb@freebsd.org, freebsd-current@freebsd.org Subject: Re: [PATCH] randomized delay in locking primitives, take 2 Message-ID: <20160731124113.GE9408@dft-labs.eu> In-Reply-To: <20160731104928.GW83214@kib.kiev.ua> References: <20160731095706.GB9408@dft-labs.eu> <20160731104928.GW83214@kib.kiev.ua>
next in thread | previous in thread | raw e-mail | index | archive | help
On Sun, Jul 31, 2016 at 01:49:28PM +0300, Konstantin Belousov wrote: [snip] After an irc discussion, the following was produced (also available at: https://people.freebsd.org/~mjg/lock_backoff_complete4.diff): Differences: - uint64_t usage was converted to u_int (also see r303584) - currently unused features (cap limit and return value) were removed - lock_delay args got packed into a dedicated structure Note this patch requires the tree to be at least at r303584. diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 0555a78..9b07b8b 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #include <sys/resourcevar.h> #include <sys/sched.h> #include <sys/sbuf.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/turnstile.h> #include <sys/vmmeter.h> @@ -138,6 +139,36 @@ struct lock_class lock_class_mtx_spin = { #endif }; +#ifdef ADAPTIVE_MUTEXES +static SYSCTL_NODE(_debug, OID_AUTO, mtx, CTLFLAG_RD, NULL, "mtx debugging"); + +static struct lock_delay_config mtx_delay = { + .initial = 1000, + .step = 500, + .min = 100, + .max = 5000, +}; + +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_initial, CTLFLAG_RW, &mtx_delay.initial, + 0, ""); +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_step, CTLFLAG_RW, &mtx_delay.step, + 0, ""); +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_min, CTLFLAG_RW, &mtx_delay.min, + 0, ""); +SYSCTL_INT(_debug_mtx, OID_AUTO, delay_max, CTLFLAG_RW, &mtx_delay.max, + 0, ""); + +static void +mtx_delay_sysinit(void *dummy) +{ + + mtx_delay.initial = mp_ncpus * 25; + mtx_delay.min = mp_ncpus * 5; + mtx_delay.max = mp_ncpus * 25 * 10; +} +LOCK_DELAY_SYSINIT(mtx_delay_sysinit); +#endif + /* * System-wide mutexes */ @@ -408,8 +439,10 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, int contested = 0; uint64_t waittime = 0; #endif +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS - u_int spin_cnt = 0; u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; @@ -418,6 +451,9 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, if (SCHEDULER_STOPPED()) return; +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, &mtx_delay); +#endif m = mtxlock2mtx(c); if (mtx_owned(m)) { @@ -451,7 +487,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, if (m->mtx_lock == MTX_UNOWNED && _mtx_obtain_lock(m, tid)) break; #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef ADAPTIVE_MUTEXES /* @@ -471,12 +507,8 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, "spinning", "lockname:\"%s\"", m->lock_object.lo_name); while (mtx_owner(m) == owner && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname((struct thread *)tid), "running"); @@ -570,7 +602,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, /* * Only record the loops spinning and not sleeping. */ - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time); #endif } diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index d4cae61..363b042 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <sys/proc.h> #include <sys/rwlock.h> #include <sys/sched.h> +#include <sys/smp.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/turnstile.h> @@ -65,15 +66,6 @@ PMC_SOFT_DECLARE( , , lock, failed); */ #define rwlock2rw(c) (__containerof(c, struct rwlock, rw_lock)) -#ifdef ADAPTIVE_RWLOCKS -static int rowner_retries = 10; -static int rowner_loops = 10000; -static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, - "rwlock debugging"); -SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); -SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); -#endif - #ifdef DDB #include <ddb/ddb.h> @@ -100,6 +92,41 @@ struct lock_class lock_class_rw = { #endif }; +#ifdef ADAPTIVE_RWLOCKS +static int rowner_retries = 10; +static int rowner_loops = 10000; +static SYSCTL_NODE(_debug, OID_AUTO, rwlock, CTLFLAG_RD, NULL, + "rwlock debugging"); +SYSCTL_INT(_debug_rwlock, OID_AUTO, retry, CTLFLAG_RW, &rowner_retries, 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, loops, CTLFLAG_RW, &rowner_loops, 0, ""); + +static struct lock_delay_config rw_delay = { + .initial = 1000, + .step = 500, + .min = 100, + .max = 5000, +}; + +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_initial, CTLFLAG_RW, &rw_delay.initial, + 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_step, CTLFLAG_RW, &rw_delay.step, + 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_min, CTLFLAG_RW, &rw_delay.min, + 0, ""); +SYSCTL_INT(_debug_rwlock, OID_AUTO, delay_max, CTLFLAG_RW, &rw_delay.max, + 0, ""); + +static void +rw_delay_sysinit(void *dummy) +{ + + rw_delay.initial = mp_ncpus * 25; + rw_delay.min = mp_ncpus * 5; + rw_delay.max = mp_ncpus * 25 * 10; +} +LOCK_DELAY_SYSINIT(rw_delay_sysinit); +#endif + /* * Return a pointer to the owning thread if the lock is write-locked or * NULL if the lock is unlocked or read-locked. @@ -355,9 +382,11 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) int contested = 0; #endif uintptr_t v; +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - u_int spin_cnt = 0; u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; @@ -366,6 +395,9 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) if (SCHEDULER_STOPPED()) return; +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, &rw_delay); +#endif rw = rwlock2rw(c); KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread), @@ -412,7 +444,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) continue; } #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); @@ -437,12 +469,8 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) sched_tdname(curthread), "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == - owner && TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + owner && TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -459,7 +487,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) cpu_spinwait(); } #ifdef KDTRACE_HOOKS - spin_cnt += rowner_loops - i; + lda.spin_cnt += rowner_loops - i; #endif KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); @@ -552,7 +580,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); @@ -740,9 +768,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, uint64_t waittime = 0; int contested = 0; #endif +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - u_int spin_cnt = 0; u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; @@ -751,6 +781,9 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, if (SCHEDULER_STOPPED()) return; +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, &rw_delay); +#endif rw = rwlock2rw(c); if (rw_wlocked(rw)) { @@ -775,7 +808,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, if (rw->rw_lock == RW_UNLOCKED && _rw_write_lock(rw, tid)) break; #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); @@ -798,12 +831,8 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, "spinning", "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -828,7 +857,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); #ifdef KDTRACE_HOOKS - spin_cnt += rowner_loops - i; + lda.spin_cnt += rowner_loops - i; #endif if (i != rowner_loops) continue; @@ -918,7 +947,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); /* Record only the loops spinning and not sleeping. */ - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time, LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index 78d207d..42878110 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sched.h> #include <sys/sleepqueue.h> #include <sys/sx.h> +#include <sys/smp.h> #include <sys/sysctl.h> #if defined(SMP) && !defined(NO_ADAPTIVE_SX) @@ -145,6 +146,32 @@ static u_int asx_loops = 10000; static SYSCTL_NODE(_debug, OID_AUTO, sx, CTLFLAG_RD, NULL, "sxlock debugging"); SYSCTL_UINT(_debug_sx, OID_AUTO, retries, CTLFLAG_RW, &asx_retries, 0, ""); SYSCTL_UINT(_debug_sx, OID_AUTO, loops, CTLFLAG_RW, &asx_loops, 0, ""); + +static struct lock_delay_config sx_delay = { + .initial = 1000, + .step = 500, + .min = 100, + .max = 5000, +}; + +SYSCTL_INT(_debug_sx, OID_AUTO, delay_initial, CTLFLAG_RW, &sx_delay.initial, + 0, ""); +SYSCTL_INT(_debug_sx, OID_AUTO, delay_step, CTLFLAG_RW, &sx_delay.step, + 0, ""); +SYSCTL_INT(_debug_sx, OID_AUTO, delay_min, CTLFLAG_RW, &sx_delay.min, + 0, ""); +SYSCTL_INT(_debug_sx, OID_AUTO, delay_max, CTLFLAG_RW, &sx_delay.max, + 0, ""); + +static void +sx_delay_sysinit(void *dummy) +{ + + sx_delay.initial = mp_ncpus * 25; + sx_delay.min = mp_ncpus * 5; + sx_delay.max = mp_ncpus * 25 * 10; +} +LOCK_DELAY_SYSINIT(sx_delay_sysinit); #endif void @@ -513,9 +540,11 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, int contested = 0; #endif int error = 0; +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - u_int spin_cnt = 0; u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; @@ -524,6 +553,10 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, if (SCHEDULER_STOPPED()) return (0); +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, &sx_delay); +#endif + /* If we already hold an exclusive lock, then recurse. */ if (sx_xlocked(sx)) { KASSERT((sx->lock_object.lo_flags & LO_RECURSABLE) != 0, @@ -549,7 +582,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) break; #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif #ifdef HWPMC_HOOKS PMC_SOFT_CALL( , , lock, failed); @@ -578,12 +611,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -605,7 +634,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, break; cpu_spinwait(); #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif } KTR_STATE0(KTR_SCHED, "thread", @@ -725,7 +754,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, LOCKSTAT_RECORD4(sx__block, sx, sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); @@ -818,9 +847,11 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) #endif uintptr_t x; int error = 0; +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + struct lock_delay_arg lda; +#endif #ifdef KDTRACE_HOOKS uintptr_t state; - u_int spin_cnt = 0; u_int sleep_cnt = 0; int64_t sleep_time = 0; int64_t all_time = 0; @@ -829,6 +860,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) if (SCHEDULER_STOPPED()) return (0); +#if defined(ADAPTIVE_MUTEXES) || defined(KDTRACE_HOOKS) + lock_delay_arg_init(&lda, &sx_delay); +#endif #ifdef KDTRACE_HOOKS state = sx->sx_lock; all_time -= lockstat_nsecs(&sx->lock_object); @@ -840,7 +874,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) */ for (;;) { #ifdef KDTRACE_HOOKS - spin_cnt++; + lda.spin_cnt++; #endif x = sx->sx_lock; @@ -888,12 +922,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) "lockname:\"%s\"", sx->lock_object.lo_name); GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && - TD_IS_RUNNING(owner)) { - cpu_spinwait(); -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif - } + TD_IS_RUNNING(owner)) + lock_delay(&lda); KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); continue; @@ -989,7 +1019,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) LOCKSTAT_RECORD4(sx__block, sx, sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); - if (spin_cnt > sleep_cnt) + if (lda.spin_cnt > sleep_cnt) LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time, LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c index e78d5a9..bfe189d 100644 --- a/sys/kern/subr_lock.c +++ b/sys/kern/subr_lock.c @@ -103,6 +103,34 @@ lock_destroy(struct lock_object *lock) lock->lo_flags &= ~LO_INITIALIZED; } +void +lock_delay(struct lock_delay_arg *la) +{ + u_int i, delay, backoff, min, max; + struct lock_delay_config *lc = la->config; + + delay = la->delay; + + if (delay == 0) + delay = lc->initial; + else { + delay += lc->step; + max = lc->max; + if (delay > max) + delay = max; + } + + backoff = cpu_ticks() % delay; + min = lc->min; + if (backoff < min) + backoff = min; + for (i = 0; i < backoff; i++) + cpu_spinwait(); + + la->delay = delay; + la->spin_cnt += backoff; +} + #ifdef DDB DB_SHOW_COMMAND(lock, db_show_lock) { diff --git a/sys/sys/lock.h b/sys/sys/lock.h index 8d7a068..bd66aad 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -201,9 +201,35 @@ extern struct lock_class lock_class_lockmgr; extern struct lock_class *lock_classes[]; +extern int lock_delay_enabled; + +struct lock_delay_config { + u_int initial; + u_int step; + u_int min; + u_int max; +}; + +struct lock_delay_arg { + struct lock_delay_config *config; + u_int delay; + u_int spin_cnt; +}; + +static inline void +lock_delay_arg_init(struct lock_delay_arg *la, struct lock_delay_config *lc) { + la->config = lc; + la->delay = 0; + la->spin_cnt = 0; +} + +#define LOCK_DELAY_SYSINIT(func) \ + SYSINIT(func##_ld, SI_SUB_LOCK, SI_ORDER_ANY, func, NULL) + void lock_init(struct lock_object *, struct lock_class *, const char *, const char *, int); void lock_destroy(struct lock_object *); +void lock_delay(struct lock_delay_arg *); void spinlock_enter(void); void spinlock_exit(void); void witness_init(struct lock_object *, const char *);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20160731124113.GE9408>