Date: Tue, 11 Nov 2003 14:13:40 -0800 (PST) From: John Baldwin <jhb@FreeBSD.org> To: Perforce Change Reviews <perforce@freebsd.org> Subject: PERFORCE change 42055 for review Message-ID: <200311112213.hABMDent006464@repoman.freebsd.org>
next in thread | raw e-mail | index | archive | help
http://perforce.freebsd.org/chv.cgi?CH=42055 Change 42055 by jhb@jhb_blue on 2003/11/11 14:13:32 IFC @42053: loopback turnstiles. Affected files ... .. //depot/projects/smpng/sys/conf/files#93 integrate .. //depot/projects/smpng/sys/kern/kern_mutex.c#73 integrate .. //depot/projects/smpng/sys/kern/kern_thread.c#46 integrate .. //depot/projects/smpng/sys/kern/subr_turnstile.c#2 integrate .. //depot/projects/smpng/sys/kern/subr_witness.c#107 integrate .. //depot/projects/smpng/sys/sys/_mutex.h#11 integrate .. //depot/projects/smpng/sys/sys/filedesc.h#18 integrate .. //depot/projects/smpng/sys/sys/proc.h#104 integrate Differences ... ==== //depot/projects/smpng/sys/conf/files#93 (text+ko) ==== @@ -1,4 +1,4 @@ -# $FreeBSD: src/sys/conf/files,v 1.847 2003/11/11 18:58:53 ume Exp $ +# $FreeBSD: src/sys/conf/files,v 1.848 2003/11/11 22:07:29 jhb Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -1154,6 +1154,7 @@ kern/subr_smp.c optional smp kern/subr_taskqueue.c standard kern/subr_trap.c standard +kern/subr_turnstile.c standard kern/subr_witness.c optional witness kern/sys_generic.c standard kern/sys_pipe.c standard ==== //depot/projects/smpng/sys/kern/kern_mutex.c#73 (text+ko) ==== @@ -34,7 +34,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_mutex.c,v 1.131 2003/07/31 18:52:18 jhb Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/kern_mutex.c,v 1.132 2003/11/11 22:07:29 jhb Exp $"); #include "opt_adaptive_mutexes.h" #include "opt_ddb.h" @@ -52,6 +52,7 @@ #include <sys/sched.h> #include <sys/sbuf.h> #include <sys/sysctl.h> +#include <sys/turnstile.h> #include <sys/vmmeter.h> #include <machine/atomic.h> @@ -90,122 +91,6 @@ struct mtx sched_lock; struct mtx Giant; -/* - * Prototypes for non-exported routines. - */ -static void propagate_priority(struct thread *); - -static void -propagate_priority(struct thread *td) -{ - int pri = td->td_priority; - struct mtx *m = td->td_blocked; - - mtx_assert(&sched_lock, MA_OWNED); - for (;;) { - struct thread *td1; - - td = mtx_owner(m); - - if (td == NULL) { - /* - * This really isn't quite right. Really - * ought to bump priority of thread that - * next acquires the mutex. - */ - MPASS(m->mtx_lock == MTX_CONTESTED); - return; - } - - MPASS(td->td_proc != NULL); - MPASS(td->td_proc->p_magic == P_MAGIC); - KASSERT(!TD_IS_SLEEPING(td), ( - "sleeping thread (pid %d) owns a mutex", - td->td_proc->p_pid)); - if (td->td_priority <= pri) /* lower is higher priority */ - return; - - - /* - * If lock holder is actually running, just bump priority. - */ - if (TD_IS_RUNNING(td)) { - td->td_priority = pri; - return; - } - -#ifndef SMP - /* - * For UP, we check to see if td is curthread (this shouldn't - * ever happen however as it would mean we are in a deadlock.) - */ - KASSERT(td != curthread, ("Deadlock detected")); -#endif - - /* - * If on run queue move to new run queue, and quit. - * XXXKSE this gets a lot more complicated under threads - * but try anyhow. - */ - if (TD_ON_RUNQ(td)) { - MPASS(td->td_blocked == NULL); - sched_prio(td, pri); - return; - } - /* - * Adjust for any other cases. - */ - td->td_priority = pri; - - /* - * If we aren't blocked on a mutex, we should be. - */ - KASSERT(TD_ON_LOCK(td), ( - "process %d(%s):%d holds %s but isn't blocked on a mutex\n", - td->td_proc->p_pid, td->td_proc->p_comm, td->td_state, - m->mtx_object.lo_name)); - - /* - * Pick up the mutex that td is blocked on. - */ - m = td->td_blocked; - MPASS(m != NULL); - - /* - * Check if the thread needs to be moved up on - * the blocked chain - */ - if (td == TAILQ_FIRST(&m->mtx_blocked)) { - continue; - } - - td1 = TAILQ_PREV(td, threadqueue, td_lockq); - if (td1->td_priority <= pri) { - continue; - } - - /* - * Remove thread from blocked chain and determine where - * it should be moved up to. Since we know that td1 has - * a lower priority than td, we know that at least one - * thread in the chain has a lower priority and that - * td1 will thus not be NULL after the loop. - */ - TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq); - TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) { - MPASS(td1->td_proc->p_magic == P_MAGIC); - if (td1->td_priority > pri) - break; - } - - MPASS(td1 != NULL); - TAILQ_INSERT_BEFORE(td1, td, td_lockq); - CTR4(KTR_LOCK, - "propagate_priority: p %p moved before %p on [%p] %s", - td, td1, m, m->mtx_object.lo_name); - } -} - #ifdef MUTEX_PROFILING SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); @@ -487,8 +372,8 @@ void _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line) { + struct turnstile *ts; struct thread *td = curthread; - struct thread *td1; #if defined(SMP) && defined(ADAPTIVE_MUTEXES) struct thread *owner; #endif @@ -515,15 +400,15 @@ while (!_obtain_lock(m, td)) { - mtx_lock_spin(&sched_lock); + ts = turnstile_lookup(&m->mtx_object); v = m->mtx_lock; /* * Check if the lock has been released while spinning for - * the sched_lock. + * the turnstile chain lock. */ if (v == MTX_UNOWNED) { - mtx_unlock_spin(&sched_lock); + turnstile_release(&m->mtx_object); #ifdef __i386__ ia32_pause(); #endif @@ -537,14 +422,9 @@ * necessary. */ if (v == MTX_CONTESTED) { - td1 = TAILQ_FIRST(&m->mtx_blocked); - MPASS(td1 != NULL); + MPASS(ts != NULL); m->mtx_lock = (uintptr_t)td | MTX_CONTESTED; - LIST_INSERT_HEAD(&td->td_contested, m, mtx_contested); - - if (td1->td_priority < td->td_priority) - td->td_priority = td1->td_priority; - mtx_unlock_spin(&sched_lock); + turnstile_claim(ts); return; } @@ -556,7 +436,7 @@ if ((v & MTX_CONTESTED) == 0 && !atomic_cmpset_ptr(&m->mtx_lock, (void *)v, (void *)(v | MTX_CONTESTED))) { - mtx_unlock_spin(&sched_lock); + turnstile_release(&m->mtx_object); #ifdef __i386__ ia32_pause(); #endif @@ -570,7 +450,7 @@ */ owner = (struct thread *)(v & MTX_FLAGMASK); if (m != &Giant && TD_IS_RUNNING(owner)) { - mtx_unlock_spin(&sched_lock); + turnstile_release(&m->mtx_object); while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) { #ifdef __i386__ ia32_pause(); @@ -585,42 +465,6 @@ */ mtx_assert(m, MA_NOTOWNED); -#ifdef notyet - /* - * If we're borrowing an interrupted thread's VM context, we - * must clean up before going to sleep. - */ - if (td->td_ithd != NULL) { - struct ithd *it = td->td_ithd; - - if (it->it_interrupted) { - if (LOCK_LOG_TEST(&m->mtx_object, opts)) - CTR2(KTR_LOCK, - "_mtx_lock_sleep: %p interrupted %p", - it, it->it_interrupted); - intr_thd_fixup(it); - } - } -#endif - - /* - * Put us on the list of threads blocked on this mutex - * and add this mutex to the owning thread's list of - * contested mutexes if needed. - */ - if (TAILQ_EMPTY(&m->mtx_blocked)) { - td1 = mtx_owner(m); - LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested); - TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq); - } else { - TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) - if (td1->td_priority > td->td_priority) - break; - if (td1) - TAILQ_INSERT_BEFORE(td1, td, td_lockq); - else - TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq); - } #ifdef KTR if (!cont_logged) { CTR6(KTR_CONTENTION, @@ -633,27 +477,9 @@ #endif /* - * Save who we're blocked on. + * Block on the turnstile. */ - td->td_blocked = m; - td->td_lockname = m->mtx_object.lo_name; - TD_SET_LOCK(td); - propagate_priority(td); - - if (LOCK_LOG_TEST(&m->mtx_object, opts)) - CTR3(KTR_LOCK, - "_mtx_lock_sleep: p %p blocked on [%p] %s", td, m, - m->mtx_object.lo_name); - - td->td_proc->p_stats->p_ru.ru_nvcsw++; - mi_switch(); - - if (LOCK_LOG_TEST(&m->mtx_object, opts)) - CTR3(KTR_LOCK, - "_mtx_lock_sleep: p %p free from blocked on [%p] %s", - td, m, m->mtx_object.lo_name); - - mtx_unlock_spin(&sched_lock); + turnstile_wait(ts, &m->mtx_object, mtx_owner(m)); } #ifdef KTR @@ -730,11 +556,8 @@ void _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line) { + struct turnstile *ts; struct thread *td, *td1; - struct mtx *m1; - int pri; - - td = curthread; if (mtx_recursed(m)) { if (--(m->mtx_recurse) == 0) @@ -744,57 +567,47 @@ return; } - mtx_lock_spin(&sched_lock); + ts = turnstile_lookup(&m->mtx_object); if (LOCK_LOG_TEST(&m->mtx_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m); - td1 = TAILQ_FIRST(&m->mtx_blocked); #if defined(SMP) && defined(ADAPTIVE_MUTEXES) - if (td1 == NULL) { + if (ts == NULL) { _release_lock_quick(m); if (LOCK_LOG_TEST(&m->mtx_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m); - mtx_unlock_spin(&sched_lock); + turnstile_release(&m->mtx_object); return; } +#else + MPASS(ts != NULL); #endif - MPASS(td->td_proc->p_magic == P_MAGIC); - MPASS(td1->td_proc->p_magic == P_MAGIC); - - TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq); - - LIST_REMOVE(m, mtx_contested); - if (TAILQ_EMPTY(&m->mtx_blocked)) { + /* XXX */ + td1 = turnstile_head(ts); + if (turnstile_signal(ts)) { _release_lock_quick(m); if (LOCK_LOG_TEST(&m->mtx_object, opts)) CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m); - } else + } else { m->mtx_lock = MTX_CONTESTED; - - pri = PRI_MAX; - LIST_FOREACH(m1, &td->td_contested, mtx_contested) { - int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority; - if (cp < pri) - pri = cp; + if (LOCK_LOG_TEST(&m->mtx_object, opts)) + CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested", + m); } + turnstile_unpend(ts); - if (pri > td->td_base_pri) - pri = td->td_base_pri; - td->td_priority = pri; - - if (LOCK_LOG_TEST(&m->mtx_object, opts)) - CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p", - m, td1); - - td1->td_blocked = NULL; - TD_CLR_LOCK(td1); - if (!TD_CAN_RUN(td1)) { - mtx_unlock_spin(&sched_lock); + /* + * XXX: This is just a hack until preemption is done. However, + * once preemption is done we need to either wrap the + * turnstile_signal() and release of the actual lock in an + * extra critical section or change the preemption code to + * always just set a flag and never do instant-preempts. + */ + td = curthread; + if (td->td_critnest > 0 || td1->td_priority >= td->td_priority) return; - } - setrunqueue(td1); - - if (td->td_critnest == 1 && td1->td_priority < pri) { + mtx_lock_spin(&sched_lock); + if (!TD_IS_RUNNING(td1)) { #ifdef notyet if (td->td_ithd != NULL) { struct ithd *it = td->td_ithd; @@ -819,7 +632,6 @@ CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p", m, (void *)m->mtx_lock); } - mtx_unlock_spin(&sched_lock); return; @@ -954,7 +766,6 @@ lock->lo_flags |= LO_DUPOK; m->mtx_lock = MTX_UNOWNED; - TAILQ_INIT(&m->mtx_blocked); LOCK_LOG_INIT(lock, opts); @@ -998,6 +809,9 @@ /* Setup thread0 so that mutexes work. */ LIST_INIT(&thread0.td_contested); + /* Setup turnstiles so that sleep mutexes work. */ + init_turnstiles(); + /* * Initialize mutexes. */ ==== //depot/projects/smpng/sys/kern/kern_thread.c#46 (text+ko) ==== @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/kern_thread.c,v 1.161 2003/08/26 11:33:15 davidxu Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/kern_thread.c,v 1.162 2003/11/11 22:07:29 jhb Exp $"); #include <sys/param.h> #include <sys/systm.h> @@ -44,6 +44,7 @@ #include <sys/signalvar.h> #include <sys/sx.h> #include <sys/tty.h> +#include <sys/turnstile.h> #include <sys/user.h> #include <sys/jail.h> #include <sys/kse.h> @@ -190,6 +191,7 @@ vm_thread_new(td, 0); mtx_unlock(&Giant); cpu_thread_setup(td); + td->td_turnstile = turnstile_alloc(); td->td_sched = (struct td_sched *)&td[1]; } @@ -202,6 +204,7 @@ struct thread *td; td = (struct thread *)mem; + turnstile_free(td->td_turnstile); vm_thread_dispose(td); } ==== //depot/projects/smpng/sys/kern/subr_turnstile.c#2 (text+ko) ==== @@ -30,102 +30,160 @@ */ /* - * Machine independent bits of mutex implementation. + * Implementation of turnstiles used to hold queue of threads blocked on + * non-sleepable locks. Sleepable locks use condition variables to + * implement their queues. Turnstiles differ from a sleep queue in that + * turnstile queue's are assigned to a lock held by an owning thread. Thus, + * when one thread is enqueued onto a turnstile, it can lend its priority + * to the owning thread. + * + * We wish to avoid bloating locks with an embedded turnstile and we do not + * want to use back-pointers in the locks for the same reason. Thus, we + * use a similar approach to that of Solaris 7 as described in Solaris + * Internals by Jim Mauro and Richard McDougall. Turnstiles are looked up + * in a hash table based on the address of the lock. Each entry in the + * hash table is a linked-lists of turnstiles and is called a turnstile + * chain. Each chain contains a spin mutex that protects all of the + * turnstiles in the chain. + * + * Each time a thread is created, a turnstile is malloc'd and attached to + * that thread. When a thread blocks on a lock, if it is the first thread + * to block, it lends its turnstile to the lock. If the lock already has + * a turnstile, then it gives its turnstile to the lock's turnstile's free + * list. When a thread is woken up, it takes a thread from the free list + * if there are any other waiters. If it is the only thread blocked on the + * lock, then it reclaims the turnstile associated with the lock and removes + * it from the hash table. + * + * XXX: We should probably implement some sort of sleep queue that condition + * variables and sleepqueue's share. On Solaris condition variables are + * implemented using a hash table of sleep queues similar to our current + * sleep queues. We might want to investigate doing that ourselves. */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD: src/sys/kern/subr_turnstile.c,v 1.131 2003/07/31 18:52:18 jhb Exp $"); +__FBSDID("$FreeBSD: src/sys/kern/subr_turnstile.c,v 1.132 2003/11/11 22:07:29 jhb Exp $"); -#include "opt_adaptive_mutexes.h" -#include "opt_ddb.h" - #include <sys/param.h> #include <sys/systm.h> -#include <sys/bus.h> #include <sys/kernel.h> #include <sys/ktr.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/proc.h> +#include <sys/queue.h> #include <sys/resourcevar.h> +#include <sys/turnstile.h> #include <sys/sched.h> -#include <sys/sbuf.h> -#include <sys/sysctl.h> -#include <sys/vmmeter.h> - -#include <machine/atomic.h> -#include <machine/bus.h> -#include <machine/clock.h> -#include <machine/cpu.h> - -#include <ddb/ddb.h> -#include <vm/vm.h> -#include <vm/vm_extern.h> - /* - * Internal utility macros. + * Constants for the hash table of turnstile chains. TC_SHIFT is a magic + * number chosen because the sleep queue's use the same value for the + * shift. Basically, we ignore the lower 8 bits of the address. + * TC_TABLESIZE must be a power of two for TC_MASK to work properly. */ -#define mtx_unowned(m) ((m)->mtx_lock == MTX_UNOWNED) +#define TC_TABLESIZE 128 /* Must be power of 2. */ +#define TC_MASK (TC_TABLESIZE - 1) +#define TC_SHIFT 8 +#define TC_HASH(lock) (((uintptr_t)(lock) >> TC_SHIFT) & TC_MASK) +#define TC_LOOKUP(lock) &turnstile_chains[TC_HASH(lock)] -#define mtx_owner(m) (mtx_unowned((m)) ? NULL \ - : (struct thread *)((m)->mtx_lock & MTX_FLAGMASK)) - /* - * Lock classes for sleep and spin mutexes. + * There are three different lists of turnstiles as follows. The list + * connected by ts_link entries is a per-thread list of all the turnstiles + * attached to locks that we own. This is used to fixup our priority when + * a lock is released. The other two lists use the ts_hash entries. The + * first of these two is turnstile chain list that a turnstile is on when + * it is attached to a lock. The second list to use ts_hash is the free + * list hung off a turnstile that is attached to a lock. + * + * Each turnstile contains two lists of threads. The ts_blocked list is + * a linked list of threads blocked on the turnstile's lock. The + * ts_pending list is a linked list of threads previously awoken by + * turnstile_signal() or turnstile_wait() that are waiting to be put on + * the run queue. + * + * Locking key: + * c - turnstile chain lock + * q - td_contested lock */ -struct lock_class lock_class_mtx_sleep = { - "sleep mutex", - LC_SLEEPLOCK | LC_RECURSABLE +struct turnstile { + TAILQ_HEAD(, thread) ts_blocked; /* (c + q) Blocked threads. */ + TAILQ_HEAD(, thread) ts_pending; /* (c) Pending threads. */ + LIST_ENTRY(turnstile) ts_hash; /* (c) Chain and free list. */ + LIST_ENTRY(turnstile) ts_link; /* (q) Contested locks. */ + LIST_HEAD(, turnstile) ts_free; /* (c) Free turnstiles. */ + struct lock_object *ts_lockobj; /* (c) Lock we reference. */ + struct thread *ts_owner; /* (q) Who owns the lock. */ }; -struct lock_class lock_class_mtx_spin = { - "spin mutex", - LC_SPINLOCK | LC_RECURSABLE + +struct turnstile_chain { + LIST_HEAD(, turnstile) tc_turnstiles; /* List of turnstiles. */ + struct mtx tc_lock; /* Spin lock for this chain. */ }; -/* - * System-wide mutexes - */ -struct mtx sched_lock; -struct mtx Giant; +static struct mtx td_contested_lock; +static struct turnstile_chain turnstile_chains[TC_TABLESIZE]; + +MALLOC_DEFINE(M_TURNSTILE, "turnstiles", "turnstiles"); /* * Prototypes for non-exported routines. */ +static void init_turnstile0(void *dummy); static void propagate_priority(struct thread *); +static void turnstile_setowner(struct turnstile *ts, struct thread *owner); +/* + * Walks the chain of turnstiles and their owners to propagate the priority + * of the thread being blocked to all the threads holding locks that have to + * release their locks before this thread can run again. + */ static void propagate_priority(struct thread *td) { - int pri = td->td_priority; - struct mtx *m = td->td_blocked; + struct turnstile_chain *tc; + struct turnstile *ts; + struct thread *td1; + int pri; mtx_assert(&sched_lock, MA_OWNED); + pri = td->td_priority; + ts = td->td_blocked; for (;;) { - struct thread *td1; - - td = mtx_owner(m); + td = ts->ts_owner; if (td == NULL) { /* * This really isn't quite right. Really * ought to bump priority of thread that - * next acquires the mutex. + * next acquires the lock. */ - MPASS(m->mtx_lock == MTX_CONTESTED); return; } MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); - KASSERT(!TD_IS_SLEEPING(td), ( - "sleeping thread (pid %d) owns a mutex", + + /* + * XXX: The owner of a turnstile can be stale if it is the + * first thread to grab a slock of a sx lock. In that case + * it is possible for us to be at SSLEEP or some other + * weird state. We should probably just return if the state + * isn't SRUN or SLOCK. + */ + KASSERT(!TD_IS_SLEEPING(td), + ("sleeping thread (pid %d) owns a non-sleepable lock", td->td_proc->p_pid)); - if (td->td_priority <= pri) /* lower is higher priority */ + + /* + * If this thread already has higher priority than the + * thread that is being blocked, we are finished. + */ + if (td->td_priority <= pri) return; - /* * If lock holder is actually running, just bump priority. */ @@ -152,35 +210,42 @@ sched_prio(td, pri); return; } + /* - * Adjust for any other cases. + * Bump this thread's priority. */ td->td_priority = pri; /* - * If we aren't blocked on a mutex, we should be. + * If we aren't blocked on a lock, we should be. */ KASSERT(TD_ON_LOCK(td), ( - "process %d(%s):%d holds %s but isn't blocked on a mutex\n", + "process %d(%s):%d holds %s but isn't blocked on a lock\n", td->td_proc->p_pid, td->td_proc->p_comm, td->td_state, - m->mtx_object.lo_name)); + ts->ts_lockobj->lo_name)); /* - * Pick up the mutex that td is blocked on. + * Pick up the lock that td is blocked on. */ - m = td->td_blocked; - MPASS(m != NULL); + ts = td->td_blocked; + MPASS(ts != NULL); + tc = TC_LOOKUP(ts->ts_lockobj); + mtx_lock_spin(&tc->tc_lock); /* * Check if the thread needs to be moved up on - * the blocked chain + * the blocked chain. It doesn't need to be moved + * if it is already at the head of the list or if + * the item in front of it still has a higher priority. */ - if (td == TAILQ_FIRST(&m->mtx_blocked)) { + if (td == TAILQ_FIRST(&ts->ts_blocked)) { + mtx_unlock_spin(&tc->tc_lock); continue; } td1 = TAILQ_PREV(td, threadqueue, td_lockq); if (td1->td_priority <= pri) { + mtx_unlock_spin(&tc->tc_lock); continue; } @@ -191,8 +256,9 @@ * thread in the chain has a lower priority and that * td1 will thus not be NULL after the loop. */ - TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq); - TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) { + mtx_lock_spin(&td_contested_lock); + TAILQ_REMOVE(&ts->ts_blocked, td, td_lockq); + TAILQ_FOREACH(td1, &ts->ts_blocked, td_lockq) { MPASS(td1->td_proc->p_magic == P_MAGIC); if (td1->td_priority > pri) break; @@ -200,803 +266,450 @@ MPASS(td1 != NULL); TAILQ_INSERT_BEFORE(td1, td, td_lockq); + mtx_unlock_spin(&td_contested_lock); CTR4(KTR_LOCK, - "propagate_priority: p %p moved before %p on [%p] %s", - td, td1, m, m->mtx_object.lo_name); + "propagate_priority: td %p moved before %p on [%p] %s", + td, td1, ts->ts_lockobj, ts->ts_lockobj->lo_name); + mtx_unlock_spin(&tc->tc_lock); } } -#ifdef MUTEX_PROFILING -SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging"); -SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling"); -static int mutex_prof_enable = 0; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, enable, CTLFLAG_RW, - &mutex_prof_enable, 0, "Enable tracing of mutex holdtime"); - -struct mutex_prof { - const char *name; - const char *file; - int line; - uintmax_t cnt_max; - uintmax_t cnt_tot; - uintmax_t cnt_cur; - struct mutex_prof *next; -}; - /* - * mprof_buf is a static pool of profiling records to avoid possible - * reentrance of the memory allocation functions. - * - * Note: NUM_MPROF_BUFFERS must be smaller than MPROF_HASH_SIZE. + * Early initialization of turnstiles. This is not done via a SYSINIT() + * since this needs to be initialized very early when mutexes are first + * initialized. */ -#define NUM_MPROF_BUFFERS 1000 -static struct mutex_prof mprof_buf[NUM_MPROF_BUFFERS]; -static int first_free_mprof_buf; -#define MPROF_HASH_SIZE 1009 -static struct mutex_prof *mprof_hash[MPROF_HASH_SIZE]; -/* SWAG: sbuf size = avg stat. line size * number of locks */ -#define MPROF_SBUF_SIZE 256 * 400 - -static int mutex_prof_acquisitions; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, acquisitions, CTLFLAG_RD, - &mutex_prof_acquisitions, 0, "Number of mutex acquistions recorded"); -static int mutex_prof_records; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, records, CTLFLAG_RD, - &mutex_prof_records, 0, "Number of profiling records"); -static int mutex_prof_maxrecords = NUM_MPROF_BUFFERS; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, maxrecords, CTLFLAG_RD, - &mutex_prof_maxrecords, 0, "Maximum number of profiling records"); -static int mutex_prof_rejected; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, rejected, CTLFLAG_RD, - &mutex_prof_rejected, 0, "Number of rejected profiling records"); -static int mutex_prof_hashsize = MPROF_HASH_SIZE; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, hashsize, CTLFLAG_RD, - &mutex_prof_hashsize, 0, "Hash size"); -static int mutex_prof_collisions = 0; -SYSCTL_INT(_debug_mutex_prof, OID_AUTO, collisions, CTLFLAG_RD, - &mutex_prof_collisions, 0, "Number of hash collisions"); - -/* - * mprof_mtx protects the profiling buffers and the hash. - */ -static struct mtx mprof_mtx; -MTX_SYSINIT(mprof, &mprof_mtx, "mutex profiling lock", MTX_SPIN | MTX_QUIET); - -static u_int64_t -nanoseconds(void) +void +init_turnstiles(void) { - struct timespec tv; + int i; - nanotime(&tv); - return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec); + for (i = 0; i < TC_TABLESIZE; i++) { + LIST_INIT(&turnstile_chains[i].tc_turnstiles); + mtx_init(&turnstile_chains[i].tc_lock, "turnstile chain", + NULL, MTX_SPIN); + } + mtx_init(&td_contested_lock, "td_contested", NULL, MTX_SPIN); +#ifdef INVARIANTS + thread0.td_turnstile = NULL; +#endif } -static int -dump_mutex_prof_stats(SYSCTL_HANDLER_ARGS) +static void +init_turnstile0(void *dummy) { - struct sbuf *sb; - int error, i; - static int multiplier = 1; - if (first_free_mprof_buf == 0) - return (SYSCTL_OUT(req, "No locking recorded", - sizeof("No locking recorded"))); - -retry_sbufops: - sb = sbuf_new(NULL, NULL, MPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN); - sbuf_printf(sb, "%6s %12s %11s %5s %s\n", - "max", "total", "count", "avg", "name"); - /* - * XXX this spinlock seems to be by far the largest perpetrator - * of spinlock latency (1.6 msec on an Athlon1600 was recorded - * even before I pessimized it further by moving the average - * computation here). - */ - mtx_lock_spin(&mprof_mtx); - for (i = 0; i < first_free_mprof_buf; ++i) { - sbuf_printf(sb, "%6ju %12ju %11ju %5ju %s:%d (%s)\n", - mprof_buf[i].cnt_max / 1000, - mprof_buf[i].cnt_tot / 1000, - mprof_buf[i].cnt_cur, - mprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 : - mprof_buf[i].cnt_tot / (mprof_buf[i].cnt_cur * 1000), - mprof_buf[i].file, mprof_buf[i].line, mprof_buf[i].name); - if (sbuf_overflowed(sb)) { - mtx_unlock_spin(&mprof_mtx); - sbuf_delete(sb); - multiplier++; - goto retry_sbufops; - } - } - mtx_unlock_spin(&mprof_mtx); - sbuf_finish(sb); - error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1); - sbuf_delete(sb); - return (error); + thread0.td_turnstile = turnstile_alloc(); } -SYSCTL_PROC(_debug_mutex_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD, - NULL, 0, dump_mutex_prof_stats, "A", "Mutex profiling statistics"); -#endif +SYSINIT(turnstile0, SI_SUB_LOCK, SI_ORDER_ANY, init_turnstile0, NULL); /* - * Function versions of the inlined __mtx_* macros. These are used by - * modules and can also be called from assembly language if needed. + * Set the owner of the lock this turnstile is attached to. */ -void -_mtx_lock_flags(struct mtx *m, int opts, const char *file, int line) +static void +turnstile_setowner(struct turnstile *ts, struct thread *owner) { - MPASS(curthread != NULL); - KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, - ("mtx_lock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, - file, line)); - _get_sleep_lock(m, curthread, opts, file, line); - LOCK_LOG_LOCK("LOCK", &m->mtx_object, opts, m->mtx_recurse, file, - line); - WITNESS_LOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); -#ifdef MUTEX_PROFILING - /* don't reset the timer when/if recursing */ - if (m->mtx_acqtime == 0) { - m->mtx_filename = file; - m->mtx_lineno = line; - m->mtx_acqtime = mutex_prof_enable ? nanoseconds() : 0; - ++mutex_prof_acquisitions; - } -#endif + mtx_assert(&td_contested_lock, MA_OWNED); + MPASS(owner->td_proc->p_magic == P_MAGIC); + MPASS(ts->ts_owner == NULL); + ts->ts_owner = owner; + LIST_INSERT_HEAD(&owner->td_contested, ts, ts_link); } -void -_mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line) +/* + * Malloc a turnstile for a new thread, initialize it and return it. + */ +struct turnstile * +turnstile_alloc(void) { + struct turnstile *ts; - MPASS(curthread != NULL); - KASSERT(m->mtx_object.lo_class == &lock_class_mtx_sleep, - ("mtx_unlock() of spin mutex %s @ %s:%d", m->mtx_object.lo_name, - file, line)); - WITNESS_UNLOCK(&m->mtx_object, opts | LOP_EXCLUSIVE, file, line); - LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file, - line); - mtx_assert(m, MA_OWNED); -#ifdef MUTEX_PROFILING - if (m->mtx_acqtime != 0) { - static const char *unknown = "(unknown)"; - struct mutex_prof *mpp; - u_int64_t acqtime, now; - const char *p, *q; - volatile u_int hash; - - now = nanoseconds(); - acqtime = m->mtx_acqtime; - m->mtx_acqtime = 0; - if (now <= acqtime) - goto out; - for (p = m->mtx_filename; - p != NULL && strncmp(p, "../", 3) == 0; p += 3) - /* nothing */ ; - if (p == NULL || *p == '\0') - p = unknown; - for (hash = m->mtx_lineno, q = p; *q != '\0'; ++q) - hash = (hash * 2 + *q) % MPROF_HASH_SIZE; - mtx_lock_spin(&mprof_mtx); - for (mpp = mprof_hash[hash]; mpp != NULL; mpp = mpp->next) - if (mpp->line == m->mtx_lineno && - strcmp(mpp->file, p) == 0) - break; - if (mpp == NULL) { - /* Just exit if we cannot get a trace buffer */ - if (first_free_mprof_buf >= NUM_MPROF_BUFFERS) { - ++mutex_prof_rejected; - goto unlock; - } - mpp = &mprof_buf[first_free_mprof_buf++]; - mpp->name = mtx_name(m); - mpp->file = p; - mpp->line = m->mtx_lineno; - mpp->next = mprof_hash[hash]; - if (mprof_hash[hash] != NULL) - ++mutex_prof_collisions; - mprof_hash[hash] = mpp; - ++mutex_prof_records; - } - /* - * Record if the mutex has been held longer now than ever - * before. - */ - if (now - acqtime > mpp->cnt_max) - mpp->cnt_max = now - acqtime; - mpp->cnt_tot += now - acqtime; - mpp->cnt_cur++; -unlock: - mtx_unlock_spin(&mprof_mtx); - } -out: -#endif - _rel_sleep_lock(m, curthread, opts, file, line); + ts = malloc(sizeof(struct turnstile), M_TURNSTILE, M_WAITOK | M_ZERO); + TAILQ_INIT(&ts->ts_blocked); >>> TRUNCATED FOR MAIL (1000 lines) <<<
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200311112213.hABMDent006464>