Date: Sun, 10 Jul 2016 13:13:26 +0200 From: Mateusz Guzik <mjguzik@gmail.com> To: freebsd-current@freebsd.org Subject: [PATCH] microoptimize locking primitives by introducing randomized delay between atomic ops Message-ID: <20160710111326.GA7853@dft-labs.eu>
next in thread | raw e-mail | index | archive | help
If the lock is contended, primitives like __mtx_lock_sleep will spin checking if the owner is running or the lock was freed. The problem is that once it is discovered that the lock is free, multiple CPUs are likely to try to do the atomic op which will make it more costly for everyone and throughput suffers. The standard thing to do is to have some sort of a randomized delay so that this kind of behaviour is reduced. As such, below is a trivial hack which takes cpu_ticks() into account and performs % 2048, which in my testing gives reasonbly good results. Please note there is definitely way more room for improvement in general. In terms of results, there was no statistically significant change in -j 40 buildworld nor buildkernel. However, a 40-way find on a ports tree placed on tmpfs yielded the following: x vanilla + patched +----------------------------------------------------------------------------------------+ | ++++ + x x x x | |+ ++++ +++ + + + ++ + + x x x xxxxxxxx x x x| | |_____M____A__________| |________AM______| | +----------------------------------------------------------------------------------------+ N Min Max Median Avg Stddev x 20 12.431 15.952 14.897 14.7444 0.74241657 + 20 8.103 11.863 9.0135 9.44565 1.0059484 Difference at 95.0% confidence -5.29875 +/- 0.565836 -35.9374% +/- 3.83764% (Student's t, pooled s = 0.884057) The patch: diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 012cf7c..db3b950 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -444,7 +444,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, m->lock_object.lo_name); while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { - cpu_spinwait(); + lock_delay(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 6a904d2..3b512b5 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -438,7 +438,7 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) "lockname:\"%s\"", rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { - cpu_spinwait(); + lock_delay(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif @@ -799,7 +799,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, rw->lock_object.lo_name); while ((struct thread*)RW_OWNER(rw->rw_lock) == owner && TD_IS_RUNNING(owner)) { - cpu_spinwait(); + lock_delay(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index 2a81c04..1bfd46f 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -579,7 +579,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { - cpu_spinwait(); + lock_delay(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif @@ -889,10 +889,10 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) GIANT_SAVE(); while (SX_OWNER(sx->sx_lock) == x && TD_IS_RUNNING(owner)) { + lock_delay(); #ifdef KDTRACE_HOOKS spin_cnt++; #endif - cpu_spinwait(); } KTR_STATE0(KTR_SCHED, "thread", sched_tdname(curthread), "running"); diff --git a/sys/kern/subr_lock.c b/sys/kern/subr_lock.c index e78d5a9..55a0fb7 100644 --- a/sys/kern/subr_lock.c +++ b/sys/kern/subr_lock.c @@ -103,6 +103,16 @@ lock_destroy(struct lock_object *lock) lock->lo_flags &= ~LO_INITIALIZED; } +void +lock_delay(void) +{ + int i, delay; + + delay = cpu_ticks() % 2048; + for (i = 0; i < delay; i++) + cpu_spinwait(); +} + #ifdef DDB DB_SHOW_COMMAND(lock, db_show_lock) { diff --git a/sys/sys/lock.h b/sys/sys/lock.h index 8d7a068..d076687 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -204,6 +204,7 @@ extern struct lock_class *lock_classes[]; void lock_init(struct lock_object *, struct lock_class *, const char *, const char *, int); void lock_destroy(struct lock_object *); +void lock_delay(void); void spinlock_enter(void); void spinlock_exit(void); void witness_init(struct lock_object *, const char *);
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20160710111326.GA7853>