Date: Tue, 17 May 2016 09:56:22 +0000 (UTC) From: Konstantin Belousov <kib@FreeBSD.org> To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org Subject: svn commit: r300043 - in head: include lib/libc/gen lib/libc/include lib/libthr lib/libthr/thread share/man/man3 sys/compat/cloudabi sys/compat/linux sys/kern sys/sys sys/vm Message-ID: <201605170956.u4H9uM5V080828@repo.freebsd.org>
next in thread | raw e-mail | index | archive | help
Author: kib Date: Tue May 17 09:56:22 2016 New Revision: 300043 URL: https://svnweb.freebsd.org/changeset/base/300043 Log: Add implementation of robust mutexes, hopefully close enough to the intention of the POSIX IEEE Std 1003.1TM-2008/Cor 1-2013. A robust mutex is guaranteed to be cleared by the system upon either thread or process owner termination while the mutex is held. The next mutex locker is then notified about inconsistent mutex state and can execute (or abandon) corrective actions. The patch mostly consists of small changes here and there, adding neccessary checks for the inconsistent and abandoned conditions into existing paths. Additionally, the thread exit handler was extended to iterate over the userspace-maintained list of owned robust mutexes, unlocking and marking as terminated each of them. The list of owned robust mutexes cannot be maintained atomically synchronous with the mutex lock state (it is possible in kernel, but is too expensive). Instead, for the duration of lock or unlock operation, the current mutex is remembered in a special slot that is also checked by the kernel at thread termination. Kernel must be aware about the per-thread location of the heads of robust mutex lists and the current active mutex slot. When a thread touches a robust mutex for the first time, a new umtx op syscall is issued which informs about location of lists heads. The umtx sleep queues for PP and PI mutexes are split between non-robust and robust. Somewhat unrelated changes in the patch: 1. Style. 2. The fix for proper tdfind() call use in umtxq_sleep_pi() for shared pi mutexes. 3. Removal of the userspace struct pthread_mutex m_owner field. 4. The sysctl kern.ipc.umtx_vnode_persistent is added, which controls the lifetime of the shared mutex associated with a vnode' page. Reviewed by: jilles (previous version, supposedly the objection was fixed) Discussed with: brooks, Martin Simmons <martin@lispworks.com> (some aspects) Tested by: pho Sponsored by: The FreeBSD Foundation Added: head/share/man/man3/pthread_mutex_consistent.3 (contents, props changed) Modified: head/include/pthread.h head/lib/libc/gen/Symbol.map head/lib/libc/gen/_pthread_stubs.c head/lib/libc/include/libc_private.h head/lib/libthr/pthread.map head/lib/libthr/thread/thr_cond.c head/lib/libthr/thread/thr_init.c head/lib/libthr/thread/thr_mutex.c head/lib/libthr/thread/thr_mutexattr.c head/lib/libthr/thread/thr_private.h head/lib/libthr/thread/thr_umtx.c head/lib/libthr/thread/thr_umtx.h head/share/man/man3/Makefile head/share/man/man3/pthread_cond_wait.3 head/share/man/man3/pthread_mutex_lock.3 head/share/man/man3/pthread_mutex_timedlock.3 head/share/man/man3/pthread_mutex_trylock.3 head/share/man/man3/pthread_mutex_unlock.3 head/share/man/man3/pthread_mutexattr.3 head/sys/compat/cloudabi/cloudabi_thread.c head/sys/compat/linux/linux_fork.c head/sys/kern/kern_exit.c head/sys/kern/kern_thr.c head/sys/kern/kern_thread.c head/sys/kern/kern_umtx.c head/sys/sys/_umtx.h head/sys/sys/proc.h head/sys/sys/umtx.h head/sys/vm/vm_object.c head/sys/vm/vm_object.h head/sys/vm/vnode_pager.c Modified: head/include/pthread.h ============================================================================== --- head/include/pthread.h Tue May 17 09:53:22 2016 (r300042) +++ head/include/pthread.h Tue May 17 09:56:22 2016 (r300043) @@ -135,6 +135,9 @@ enum pthread_mutextype { #define PTHREAD_MUTEX_DEFAULT PTHREAD_MUTEX_ERRORCHECK +#define PTHREAD_MUTEX_STALLED 0 +#define PTHREAD_MUTEX_ROBUST 1 + struct _pthread_cleanup_info { __uintptr_t pthread_cleanup_pad[8]; }; @@ -229,6 +232,8 @@ int pthread_mutexattr_settype(pthread_m __nonnull(1); int pthread_mutexattr_setpshared(pthread_mutexattr_t *, int) __nonnull(1); +int pthread_mutex_consistent(pthread_mutex_t *__mutex) + __nonnull(1) __requires_exclusive(*__mutex); int pthread_mutex_destroy(pthread_mutex_t *__mutex) __nonnull(1) __requires_unlocked(*__mutex); int pthread_mutex_init(pthread_mutex_t *__mutex, @@ -310,6 +315,11 @@ int pthread_mutex_setprioceiling(pthrea int pthread_mutexattr_getprotocol(pthread_mutexattr_t *, int *); int pthread_mutexattr_setprotocol(pthread_mutexattr_t *, int); +int pthread_mutexattr_getrobust(pthread_mutexattr_t *__restrict, + int *__restrict) __nonnull_all; +int pthread_mutexattr_setrobust(pthread_mutexattr_t *, int) + __nonnull(1); + int pthread_attr_getinheritsched(const pthread_attr_t *, int *); int pthread_attr_getschedparam(const pthread_attr_t *, struct sched_param *) __nonnull_all; Modified: head/lib/libc/gen/Symbol.map ============================================================================== --- head/lib/libc/gen/Symbol.map Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libc/gen/Symbol.map Tue May 17 09:56:22 2016 (r300043) @@ -410,6 +410,9 @@ FBSD_1.3 { }; FBSD_1.4 { + pthread_mutex_consistent; + pthread_mutexattr_getrobust; + pthread_mutexattr_setrobust; scandir_b; }; Modified: head/lib/libc/gen/_pthread_stubs.c ============================================================================== --- head/lib/libc/gen/_pthread_stubs.c Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libc/gen/_pthread_stubs.c Tue May 17 09:56:22 2016 (r300043) @@ -125,6 +125,9 @@ pthread_func_entry_t __thr_jtable[PJT_MA {PJT_DUAL_ENTRY(stub_zero)}, /* PJT_CLEANUP_PUSH_IMP */ {PJT_DUAL_ENTRY(stub_zero)}, /* PJT_CANCEL_ENTER */ {PJT_DUAL_ENTRY(stub_zero)}, /* PJT_CANCEL_LEAVE */ + {PJT_DUAL_ENTRY(stub_zero)}, /* PJT_MUTEX_CONSISTENT */ + {PJT_DUAL_ENTRY(stub_zero)}, /* PJT_MUTEXATTR_GETROBUST */ + {PJT_DUAL_ENTRY(stub_zero)}, /* PJT_MUTEXATTR_SETROBUST */ }; /* @@ -226,9 +229,14 @@ STUB_FUNC2(pthread_mutex_init, PJT_MUTEX STUB_FUNC1(pthread_mutex_lock, PJT_MUTEX_LOCK, int, void *) STUB_FUNC1(pthread_mutex_trylock, PJT_MUTEX_TRYLOCK, int, void *) STUB_FUNC1(pthread_mutex_unlock, PJT_MUTEX_UNLOCK, int, void *) +STUB_FUNC1(pthread_mutex_consistent, PJT_MUTEX_CONSISTENT, int, void *) STUB_FUNC1(pthread_mutexattr_destroy, PJT_MUTEXATTR_DESTROY, int, void *) STUB_FUNC1(pthread_mutexattr_init, PJT_MUTEXATTR_INIT, int, void *) STUB_FUNC2(pthread_mutexattr_settype, PJT_MUTEXATTR_SETTYPE, int, void *, int) +STUB_FUNC2(pthread_mutexattr_getrobust, PJT_MUTEXATTR_GETROBUST, int, void *, + int *) +STUB_FUNC2(pthread_mutexattr_setrobust, PJT_MUTEXATTR_SETROBUST, int, void *, + int) STUB_FUNC2(pthread_once, PJT_ONCE, int, void *, void *) STUB_FUNC1(pthread_rwlock_destroy, PJT_RWLOCK_DESTROY, int, void *) STUB_FUNC2(pthread_rwlock_init, PJT_RWLOCK_INIT, int, void *, void *) Modified: head/lib/libc/include/libc_private.h ============================================================================== --- head/lib/libc/include/libc_private.h Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libc/include/libc_private.h Tue May 17 09:56:22 2016 (r300043) @@ -168,6 +168,9 @@ typedef enum { PJT_CLEANUP_PUSH_IMP, PJT_CANCEL_ENTER, PJT_CANCEL_LEAVE, + PJT_MUTEX_CONSISTENT, + PJT_MUTEXATTR_GETROBUST, + PJT_MUTEXATTR_SETROBUST, PJT_MAX } pjt_index_t; Modified: head/lib/libthr/pthread.map ============================================================================== --- head/lib/libthr/pthread.map Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libthr/pthread.map Tue May 17 09:56:22 2016 (r300043) @@ -315,3 +315,9 @@ FBSD_1.1 { FBSD_1.2 { pthread_getthreadid_np; }; + +FBSD_1.4 { + pthread_mutex_consistent; + pthread_mutexattr_getrobust; + pthread_mutexattr_setrobust; +}; Modified: head/lib/libthr/thread/thr_cond.c ============================================================================== --- head/lib/libthr/thread/thr_cond.c Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libthr/thread/thr_cond.c Tue May 17 09:56:22 2016 (r300043) @@ -188,46 +188,57 @@ _pthread_cond_destroy(pthread_cond_t *co */ static int cond_wait_kernel(struct pthread_cond *cvp, struct pthread_mutex *mp, - const struct timespec *abstime, int cancel) + const struct timespec *abstime, int cancel) { - struct pthread *curthread = _get_curthread(); - int recurse; - int error, error2 = 0; + struct pthread *curthread; + int error, error2, recurse, robust; + + curthread = _get_curthread(); + robust = _mutex_enter_robust(curthread, mp); error = _mutex_cv_detach(mp, &recurse); - if (error != 0) + if (error != 0) { + if (robust) + _mutex_leave_robust(curthread, mp); return (error); + } - if (cancel) { + if (cancel) _thr_cancel_enter2(curthread, 0); - error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters, - (struct umutex *)&mp->m_lock, abstime, - CVWAIT_ABSTIME|CVWAIT_CLOCKID); + error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters, + (struct umutex *)&mp->m_lock, abstime, CVWAIT_ABSTIME | + CVWAIT_CLOCKID); + if (cancel) _thr_cancel_leave(curthread, 0); - } else { - error = _thr_ucond_wait((struct ucond *)&cvp->__has_kern_waiters, - (struct umutex *)&mp->m_lock, abstime, - CVWAIT_ABSTIME|CVWAIT_CLOCKID); - } /* * Note that PP mutex and ROBUST mutex may return * interesting error codes. */ if (error == 0) { - error2 = _mutex_cv_lock(mp, recurse); + error2 = _mutex_cv_lock(mp, recurse, true); } else if (error == EINTR || error == ETIMEDOUT) { - error2 = _mutex_cv_lock(mp, recurse); + error2 = _mutex_cv_lock(mp, recurse, true); + /* + * Do not do cancellation on EOWNERDEAD there. The + * cancellation cleanup handler will use the protected + * state and unlock the mutex without making the state + * consistent and the state will be unrecoverable. + */ if (error2 == 0 && cancel) _thr_testcancel(curthread); + if (error == EINTR) error = 0; } else { /* We know that it didn't unlock the mutex. */ - error2 = _mutex_cv_attach(mp, recurse); - if (error2 == 0 && cancel) + _mutex_cv_attach(mp, recurse); + if (cancel) _thr_testcancel(curthread); + error2 = 0; } + if (robust) + _mutex_leave_robust(curthread, mp); return (error2 != 0 ? error2 : error); } @@ -240,14 +251,13 @@ cond_wait_kernel(struct pthread_cond *cv static int cond_wait_user(struct pthread_cond *cvp, struct pthread_mutex *mp, - const struct timespec *abstime, int cancel) + const struct timespec *abstime, int cancel) { - struct pthread *curthread = _get_curthread(); + struct pthread *curthread; struct sleepqueue *sq; - int recurse; - int error; - int defered; + int deferred, error, error2, recurse; + curthread = _get_curthread(); if (curthread->wchan != NULL) PANIC("thread was already on queue."); @@ -260,32 +270,31 @@ cond_wait_user(struct pthread_cond *cvp, * us to check it without locking in pthread_cond_signal(). */ cvp->__has_user_waiters = 1; - defered = 0; - (void)_mutex_cv_unlock(mp, &recurse, &defered); + deferred = 0; + (void)_mutex_cv_unlock(mp, &recurse, &deferred); curthread->mutex_obj = mp; _sleepq_add(cvp, curthread); for(;;) { _thr_clear_wake(curthread); _sleepq_unlock(cvp); - if (defered) { - defered = 0; + if (deferred) { + deferred = 0; if ((mp->m_lock.m_owner & UMUTEX_CONTESTED) == 0) - (void)_umtx_op_err(&mp->m_lock, UMTX_OP_MUTEX_WAKE2, - mp->m_lock.m_flags, 0, 0); + (void)_umtx_op_err(&mp->m_lock, + UMTX_OP_MUTEX_WAKE2, mp->m_lock.m_flags, + 0, 0); } if (curthread->nwaiter_defer > 0) { _thr_wake_all(curthread->defer_waiters, - curthread->nwaiter_defer); + curthread->nwaiter_defer); curthread->nwaiter_defer = 0; } - if (cancel) { + if (cancel) _thr_cancel_enter2(curthread, 0); - error = _thr_sleep(curthread, cvp->__clock_id, abstime); + error = _thr_sleep(curthread, cvp->__clock_id, abstime); + if (cancel) _thr_cancel_leave(curthread, 0); - } else { - error = _thr_sleep(curthread, cvp->__clock_id, abstime); - } _sleepq_lock(cvp); if (curthread->wchan == NULL) { @@ -293,25 +302,26 @@ cond_wait_user(struct pthread_cond *cvp, break; } else if (cancel && SHOULD_CANCEL(curthread)) { sq = _sleepq_lookup(cvp); - cvp->__has_user_waiters = - _sleepq_remove(sq, curthread); + cvp->__has_user_waiters = _sleepq_remove(sq, curthread); _sleepq_unlock(cvp); curthread->mutex_obj = NULL; - _mutex_cv_lock(mp, recurse); + error2 = _mutex_cv_lock(mp, recurse, false); if (!THR_IN_CRITICAL(curthread)) _pthread_exit(PTHREAD_CANCELED); else /* this should not happen */ - return (0); + return (error2); } else if (error == ETIMEDOUT) { sq = _sleepq_lookup(cvp); cvp->__has_user_waiters = - _sleepq_remove(sq, curthread); + _sleepq_remove(sq, curthread); break; } } _sleepq_unlock(cvp); curthread->mutex_obj = NULL; - _mutex_cv_lock(mp, recurse); + error2 = _mutex_cv_lock(mp, recurse, false); + if (error == 0) + error = error2; return (error); } @@ -338,12 +348,12 @@ cond_wait_common(pthread_cond_t *cond, p return (error); if (curthread->attr.sched_policy != SCHED_OTHER || - (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT|UMUTEX_PRIO_INHERIT| - USYNC_PROCESS_SHARED)) != 0 || + (mp->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT | + USYNC_PROCESS_SHARED)) != 0 || (cvp->__flags & USYNC_PROCESS_SHARED) != 0) - return cond_wait_kernel(cvp, mp, abstime, cancel); + return (cond_wait_kernel(cvp, mp, abstime, cancel)); else - return cond_wait_user(cvp, mp, abstime, cancel); + return (cond_wait_user(cvp, mp, abstime, cancel)); } int @@ -420,15 +430,15 @@ cond_signal_common(pthread_cond_t *cond) td = _sleepq_first(sq); mp = td->mutex_obj; cvp->__has_user_waiters = _sleepq_remove(sq, td); - if (mp->m_owner == TID(curthread)) { + if (PMUTEX_OWNER_ID(mp) == TID(curthread)) { if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) { _thr_wake_all(curthread->defer_waiters, - curthread->nwaiter_defer); + curthread->nwaiter_defer); curthread->nwaiter_defer = 0; } curthread->defer_waiters[curthread->nwaiter_defer++] = - &td->wake_addr->value; - mp->m_flags |= PMUTEX_FLAG_DEFERED; + &td->wake_addr->value; + mp->m_flags |= PMUTEX_FLAG_DEFERRED; } else { waddr = &td->wake_addr->value; } @@ -452,15 +462,15 @@ drop_cb(struct pthread *td, void *arg) struct pthread *curthread = ba->curthread; mp = td->mutex_obj; - if (mp->m_owner == TID(curthread)) { + if (PMUTEX_OWNER_ID(mp) == TID(curthread)) { if (curthread->nwaiter_defer >= MAX_DEFER_WAITERS) { _thr_wake_all(curthread->defer_waiters, - curthread->nwaiter_defer); + curthread->nwaiter_defer); curthread->nwaiter_defer = 0; } curthread->defer_waiters[curthread->nwaiter_defer++] = - &td->wake_addr->value; - mp->m_flags |= PMUTEX_FLAG_DEFERED; + &td->wake_addr->value; + mp->m_flags |= PMUTEX_FLAG_DEFERRED; } else { if (ba->count >= MAX_DEFER_WAITERS) { _thr_wake_all(ba->waddrs, ba->count); Modified: head/lib/libthr/thread/thr_init.c ============================================================================== --- head/lib/libthr/thread/thr_init.c Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libthr/thread/thr_init.c Tue May 17 09:56:22 2016 (r300043) @@ -94,6 +94,7 @@ struct pthread_mutex_attr _pthread_mutex .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0, .m_pshared = PTHREAD_PROCESS_PRIVATE, + .m_robust = PTHREAD_MUTEX_STALLED, }; struct pthread_mutex_attr _pthread_mutexattr_adaptive_default = { @@ -101,6 +102,7 @@ struct pthread_mutex_attr _pthread_mutex .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0, .m_pshared = PTHREAD_PROCESS_PRIVATE, + .m_robust = PTHREAD_MUTEX_STALLED, }; /* Default condition variable attributes: */ @@ -265,7 +267,10 @@ static pthread_func_t jmp_table[][2] = { {DUAL_ENTRY(__pthread_cleanup_pop_imp)},/* PJT_CLEANUP_POP_IMP */ {DUAL_ENTRY(__pthread_cleanup_push_imp)},/* PJT_CLEANUP_PUSH_IMP */ {DUAL_ENTRY(_pthread_cancel_enter)}, /* PJT_CANCEL_ENTER */ - {DUAL_ENTRY(_pthread_cancel_leave)} /* PJT_CANCEL_LEAVE */ + {DUAL_ENTRY(_pthread_cancel_leave)}, /* PJT_CANCEL_LEAVE */ + {DUAL_ENTRY(_pthread_mutex_consistent)},/* PJT_MUTEX_CONSISTENT */ + {DUAL_ENTRY(_pthread_mutexattr_getrobust)},/* PJT_MUTEXATTR_GETROBUST */ + {DUAL_ENTRY(_pthread_mutexattr_setrobust)},/* PJT_MUTEXATTR_SETROBUST */ }; static int init_once = 0; @@ -308,7 +313,7 @@ _libpthread_init(struct pthread *curthre int first, dlopened; /* Check if this function has already been called: */ - if ((_thr_initial != NULL) && (curthread == NULL)) + if (_thr_initial != NULL && curthread == NULL) /* Only initialize the threaded application once. */ return; @@ -316,7 +321,7 @@ _libpthread_init(struct pthread *curthre * Check the size of the jump table to make sure it is preset * with the correct number of entries. */ - if (sizeof(jmp_table) != (sizeof(pthread_func_t) * PJT_MAX * 2)) + if (sizeof(jmp_table) != sizeof(pthread_func_t) * PJT_MAX * 2) PANIC("Thread jump table not properly initialized"); memcpy(__thr_jtable, jmp_table, sizeof(jmp_table)); __thr_interpose_libc(); Modified: head/lib/libthr/thread/thr_mutex.c ============================================================================== --- head/lib/libthr/thread/thr_mutex.c Tue May 17 09:53:22 2016 (r300042) +++ head/lib/libthr/thread/thr_mutex.c Tue May 17 09:56:22 2016 (r300043) @@ -1,7 +1,7 @@ /* * Copyright (c) 1995 John Birrell <jb@cimlogic.com.au>. * Copyright (c) 2006 David Xu <davidxu@freebsd.org>. - * Copyright (c) 2015 The FreeBSD Foundation + * Copyright (c) 2015, 2016 The FreeBSD Foundation * * All rights reserved. * @@ -39,7 +39,6 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include <stdbool.h> #include "namespace.h" #include <stdlib.h> #include <errno.h> @@ -64,6 +63,7 @@ _Static_assert(sizeof(struct pthread_mut /* * Prototypes */ +int __pthread_mutex_consistent(pthread_mutex_t *mutex); int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *mutex_attr); int __pthread_mutex_trylock(pthread_mutex_t *mutex); @@ -82,9 +82,13 @@ int __pthread_mutex_setyieldloops_np(pth static int mutex_self_trylock(pthread_mutex_t); static int mutex_self_lock(pthread_mutex_t, const struct timespec *abstime); -static int mutex_unlock_common(struct pthread_mutex *, int, int *); +static int mutex_unlock_common(struct pthread_mutex *, bool, int *); static int mutex_lock_sleep(struct pthread *, pthread_mutex_t, const struct timespec *); +static void mutex_init_robust(struct pthread *curthread); +static int mutex_qidx(struct pthread_mutex *m); +static bool is_robust_mutex(struct pthread_mutex *m); +static bool is_pshared_mutex(struct pthread_mutex *m); __weak_reference(__pthread_mutex_init, pthread_mutex_init); __strong_reference(__pthread_mutex_init, _pthread_mutex_init); @@ -94,6 +98,8 @@ __weak_reference(__pthread_mutex_timedlo __strong_reference(__pthread_mutex_timedlock, _pthread_mutex_timedlock); __weak_reference(__pthread_mutex_trylock, pthread_mutex_trylock); __strong_reference(__pthread_mutex_trylock, _pthread_mutex_trylock); +__weak_reference(_pthread_mutex_consistent, pthread_mutex_consistent); +__strong_reference(_pthread_mutex_consistent, __pthread_mutex_consistent); /* Single underscore versions provided for libc internal usage: */ /* No difference between libc and application usage of these: */ @@ -125,23 +131,23 @@ mutex_init_link(struct pthread_mutex *m) } static void -mutex_assert_is_owned(struct pthread_mutex *m) +mutex_assert_is_owned(struct pthread_mutex *m __unused) { #if defined(_PTHREADS_INVARIANTS) if (__predict_false(m->m_qe.tqe_prev == NULL)) { char msg[128]; snprintf(msg, sizeof(msg), - "mutex %p own %#x %#x is not on list %p %p", - m, m->m_lock.m_owner, m->m_owner, m->m_qe.tqe_prev, - m->m_qe.tqe_next); + "mutex %p own %#x is not on list %p %p", + m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next); PANIC(msg); } #endif } static void -mutex_assert_not_owned(struct pthread_mutex *m) +mutex_assert_not_owned(struct pthread *curthread __unused, + struct pthread_mutex *m __unused) { #if defined(_PTHREADS_INVARIANTS) @@ -149,21 +155,68 @@ mutex_assert_not_owned(struct pthread_mu m->m_qe.tqe_next != NULL)) { char msg[128]; snprintf(msg, sizeof(msg), - "mutex %p own %#x %#x is on list %p %p", - m, m->m_lock.m_owner, m->m_owner, m->m_qe.tqe_prev, - m->m_qe.tqe_next); + "mutex %p own %#x is on list %p %p", + m, m->m_lock.m_owner, m->m_qe.tqe_prev, m->m_qe.tqe_next); + PANIC(msg); + } + if (__predict_false(is_robust_mutex(m) && + (m->m_lock.m_rb_lnk != 0 || m->m_rb_prev != NULL || + (is_pshared_mutex(m) && curthread->robust_list == + (uintptr_t)&m->m_lock) || + (!is_pshared_mutex(m) && curthread->priv_robust_list == + (uintptr_t)&m->m_lock)))) { + char msg[128]; + snprintf(msg, sizeof(msg), + "mutex %p own %#x is on robust linkage %p %p head %p phead %p", + m, m->m_lock.m_owner, (void *)m->m_lock.m_rb_lnk, + m->m_rb_prev, (void *)curthread->robust_list, + (void *)curthread->priv_robust_list); PANIC(msg); } #endif } -static int +static bool is_pshared_mutex(struct pthread_mutex *m) { return ((m->m_lock.m_flags & USYNC_PROCESS_SHARED) != 0); } +static bool +is_robust_mutex(struct pthread_mutex *m) +{ + + return ((m->m_lock.m_flags & UMUTEX_ROBUST) != 0); +} + +int +_mutex_enter_robust(struct pthread *curthread, struct pthread_mutex *m) +{ + +#if defined(_PTHREADS_INVARIANTS) + if (__predict_false(curthread->inact_mtx != 0)) + PANIC("inact_mtx enter"); +#endif + if (!is_robust_mutex(m)) + return (0); + + mutex_init_robust(curthread); + curthread->inact_mtx = (uintptr_t)&m->m_lock; + return (1); +} + +void +_mutex_leave_robust(struct pthread *curthread, struct pthread_mutex *m __unused) +{ + +#if defined(_PTHREADS_INVARIANTS) + if (__predict_false(curthread->inact_mtx != (uintptr_t)&m->m_lock)) + PANIC("inact_mtx leave"); +#endif + curthread->inact_mtx = 0; +} + static int mutex_check_attr(const struct pthread_mutex_attr *attr) { @@ -178,12 +231,27 @@ mutex_check_attr(const struct pthread_mu } static void +mutex_init_robust(struct pthread *curthread) +{ + struct umtx_robust_lists_params rb; + + if (curthread == NULL) + curthread = _get_curthread(); + if (curthread->robust_inited) + return; + rb.robust_list_offset = (uintptr_t)&curthread->robust_list; + rb.robust_priv_list_offset = (uintptr_t)&curthread->priv_robust_list; + rb.robust_inact_offset = (uintptr_t)&curthread->inact_mtx; + _umtx_op(NULL, UMTX_OP_ROBUST_LISTS, sizeof(rb), &rb, NULL); + curthread->robust_inited = 1; +} + +static void mutex_init_body(struct pthread_mutex *pmutex, const struct pthread_mutex_attr *attr) { pmutex->m_flags = attr->m_type; - pmutex->m_owner = 0; pmutex->m_count = 0; pmutex->m_spinloops = 0; pmutex->m_yieldloops = 0; @@ -205,7 +273,10 @@ mutex_init_body(struct pthread_mutex *pm } if (attr->m_pshared == PTHREAD_PROCESS_SHARED) pmutex->m_lock.m_flags |= USYNC_PROCESS_SHARED; - + if (attr->m_robust == PTHREAD_MUTEX_ROBUST) { + mutex_init_robust(NULL); + pmutex->m_lock.m_flags |= UMUTEX_ROBUST; + } if (PMUTEX_TYPE(pmutex->m_flags) == PTHREAD_MUTEX_ADAPTIVE_NP) { pmutex->m_spinloops = _thr_spinloops ? _thr_spinloops: MUTEX_ADAPTIVE_SPINS; @@ -262,7 +333,7 @@ set_inherited_priority(struct pthread *c { struct pthread_mutex *m2; - m2 = TAILQ_LAST(&curthread->mq[TMQ_NORM_PP], mutex_queue); + m2 = TAILQ_LAST(&curthread->mq[mutex_qidx(m)], mutex_queue); if (m2 != NULL) m->m_lock.m_ceilings[1] = m2->m_lock.m_ceilings[0]; else @@ -277,7 +348,8 @@ shared_mutex_init(struct pthread_mutex * .m_type = PTHREAD_MUTEX_DEFAULT, .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0, - .m_pshared = PTHREAD_PROCESS_SHARED + .m_pshared = PTHREAD_PROCESS_SHARED, + .m_robust = PTHREAD_MUTEX_STALLED, }; bool done; @@ -329,7 +401,7 @@ __pthread_mutex_init(pthread_mutex_t *mu if (mutex_attr == NULL || (*mutex_attr)->m_pshared == PTHREAD_PROCESS_PRIVATE) { return (mutex_init(mutex, mutex_attr ? *mutex_attr : NULL, - calloc)); + calloc)); } pmtx = __thr_pshared_offpage(mutex, 1); if (pmtx == NULL) @@ -349,6 +421,7 @@ _pthread_mutex_init_calloc_cb(pthread_mu .m_protocol = PTHREAD_PRIO_NONE, .m_ceiling = 0, .m_pshared = PTHREAD_PROCESS_PRIVATE, + .m_robust = PTHREAD_MUTEX_STALLED, }; int ret; @@ -378,7 +451,6 @@ queue_fork(struct pthread *curthread, st TAILQ_FOREACH(m, qp, m_pqe) { TAILQ_INSERT_TAIL(q, m, m_qe); m->m_lock.m_owner = TID(curthread) | bit; - m->m_owner = TID(curthread); } } @@ -390,6 +462,9 @@ _mutex_fork(struct pthread *curthread) &curthread->mq[TMQ_NORM_PRIV], 0); queue_fork(curthread, &curthread->mq[TMQ_NORM_PP], &curthread->mq[TMQ_NORM_PP_PRIV], UMUTEX_CONTESTED); + queue_fork(curthread, &curthread->mq[TMQ_ROBUST_PP], + &curthread->mq[TMQ_ROBUST_PP_PRIV], UMUTEX_CONTESTED); + curthread->robust_list = 0; } int @@ -407,17 +482,18 @@ _pthread_mutex_destroy(pthread_mutex_t * if (m == THR_PSHARED_PTR) { m1 = __thr_pshared_offpage(mutex, 0); if (m1 != NULL) { - mutex_assert_not_owned(m1); + mutex_assert_not_owned(_get_curthread(), m1); __thr_pshared_destroy(mutex); } *mutex = THR_MUTEX_DESTROYED; return (0); } - if (m->m_owner != 0) { + if (PMUTEX_OWNER_ID(m) != 0 && + (uint32_t)m->m_lock.m_owner != UMUTEX_RB_NOTRECOV) { ret = EBUSY; } else { *mutex = THR_MUTEX_DESTROYED; - mutex_assert_not_owned(m); + mutex_assert_not_owned(_get_curthread(), m); free(m); ret = 0; } @@ -432,31 +508,81 @@ mutex_qidx(struct pthread_mutex *m) if ((m->m_lock.m_flags & UMUTEX_PRIO_PROTECT) == 0) return (TMQ_NORM); - return (TMQ_NORM_PP); + return (is_robust_mutex(m) ? TMQ_ROBUST_PP : TMQ_NORM_PP); } +/* + * Both enqueue_mutex() and dequeue_mutex() operate on the + * thread-private linkage of the locked mutexes and on the robust + * linkage. + * + * Robust list, as seen by kernel, must be consistent even in the case + * of thread termination at arbitrary moment. Since either enqueue or + * dequeue for list walked by kernel consists of rewriting a single + * forward pointer, it is safe. On the other hand, rewrite of the + * back pointer is not atomic WRT the forward one, but kernel does not + * care. + */ static void -enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m) +enqueue_mutex(struct pthread *curthread, struct pthread_mutex *m, + int error) { + struct pthread_mutex *m1; + uintptr_t *rl; int qidx; - m->m_owner = TID(curthread); /* Add to the list of owned mutexes: */ - mutex_assert_not_owned(m); + if (error != EOWNERDEAD) + mutex_assert_not_owned(curthread, m); qidx = mutex_qidx(m); TAILQ_INSERT_TAIL(&curthread->mq[qidx], m, m_qe); if (!is_pshared_mutex(m)) TAILQ_INSERT_TAIL(&curthread->mq[qidx + 1], m, m_pqe); + if (is_robust_mutex(m)) { + rl = is_pshared_mutex(m) ? &curthread->robust_list : + &curthread->priv_robust_list; + m->m_rb_prev = NULL; + if (*rl != 0) { + m1 = __containerof((void *)*rl, + struct pthread_mutex, m_lock); + m->m_lock.m_rb_lnk = (uintptr_t)&m1->m_lock; + m1->m_rb_prev = m; + } else { + m1 = NULL; + m->m_lock.m_rb_lnk = 0; + } + *rl = (uintptr_t)&m->m_lock; + } } static void dequeue_mutex(struct pthread *curthread, struct pthread_mutex *m) { + struct pthread_mutex *mp, *mn; int qidx; - m->m_owner = 0; mutex_assert_is_owned(m); qidx = mutex_qidx(m); + if (is_robust_mutex(m)) { + mp = m->m_rb_prev; + if (mp == NULL) { + if (is_pshared_mutex(m)) { + curthread->robust_list = m->m_lock.m_rb_lnk; + } else { + curthread->priv_robust_list = + m->m_lock.m_rb_lnk; + } + } else { + mp->m_lock.m_rb_lnk = m->m_lock.m_rb_lnk; + } + if (m->m_lock.m_rb_lnk != 0) { + mn = __containerof((void *)m->m_lock.m_rb_lnk, + struct pthread_mutex, m_lock); + mn->m_rb_prev = m->m_rb_prev; + } + m->m_lock.m_rb_lnk = 0; + m->m_rb_prev = NULL; + } TAILQ_REMOVE(&curthread->mq[qidx], m, m_qe); if (!is_pshared_mutex(m)) TAILQ_REMOVE(&curthread->mq[qidx + 1], m, m_pqe); @@ -496,7 +622,7 @@ __pthread_mutex_trylock(pthread_mutex_t struct pthread *curthread; struct pthread_mutex *m; uint32_t id; - int ret; + int ret, robust; ret = check_and_init_mutex(mutex, &m); if (ret != 0) @@ -505,27 +631,32 @@ __pthread_mutex_trylock(pthread_mutex_t id = TID(curthread); if (m->m_flags & PMUTEX_FLAG_PRIVATE) THR_CRITICAL_ENTER(curthread); + robust = _mutex_enter_robust(curthread, m); ret = _thr_umutex_trylock(&m->m_lock, id); - if (__predict_true(ret == 0)) { - enqueue_mutex(curthread, m); - } else if (m->m_owner == id) { + if (__predict_true(ret == 0) || ret == EOWNERDEAD) { + enqueue_mutex(curthread, m, ret); + if (ret == EOWNERDEAD) + m->m_lock.m_flags |= UMUTEX_NONCONSISTENT; + } else if (PMUTEX_OWNER_ID(m) == id) { ret = mutex_self_trylock(m); } /* else {} */ - if (ret && (m->m_flags & PMUTEX_FLAG_PRIVATE)) + if (robust) + _mutex_leave_robust(curthread, m); + if ((ret == 0 || ret == EOWNERDEAD) && + (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0) THR_CRITICAL_LEAVE(curthread); return (ret); } static int mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m, - const struct timespec *abstime) + const struct timespec *abstime) { - uint32_t id, owner; - int count; - int ret; + uint32_t id, owner; + int count, ret; id = TID(curthread); - if (m->m_owner == id) + if (PMUTEX_OWNER_ID(m) == id) return (mutex_self_lock(m, abstime)); /* @@ -534,10 +665,9 @@ mutex_lock_sleep(struct pthread *curthre * the lock is likely to be released quickly and it is * faster than entering the kernel */ - if (__predict_false( - (m->m_lock.m_flags & - (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) != 0)) - goto sleep_in_kernel; + if (__predict_false((m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | + UMUTEX_PRIO_INHERIT | UMUTEX_ROBUST | UMUTEX_NONCONSISTENT)) != 0)) + goto sleep_in_kernel; if (!_thr_is_smp) goto yield_loop; @@ -546,7 +676,8 @@ mutex_lock_sleep(struct pthread *curthre while (count--) { owner = m->m_lock.m_owner; if ((owner & ~UMUTEX_CONTESTED) == 0) { - if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) { + if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, + id | owner)) { ret = 0; goto done; } @@ -560,7 +691,8 @@ yield_loop: _sched_yield(); owner = m->m_lock.m_owner; if ((owner & ~UMUTEX_CONTESTED) == 0) { - if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) { + if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, + id | owner)) { ret = 0; goto done; } @@ -568,38 +700,46 @@ yield_loop: } sleep_in_kernel: - if (abstime == NULL) { + if (abstime == NULL) ret = __thr_umutex_lock(&m->m_lock, id); - } else if (__predict_false( - abstime->tv_nsec < 0 || - abstime->tv_nsec >= 1000000000)) { + else if (__predict_false(abstime->tv_nsec < 0 || + abstime->tv_nsec >= 1000000000)) ret = EINVAL; - } else { + else ret = __thr_umutex_timedlock(&m->m_lock, id, abstime); - } done: - if (ret == 0) - enqueue_mutex(curthread, m); - + if (ret == 0 || ret == EOWNERDEAD) { + enqueue_mutex(curthread, m, ret); + if (ret == EOWNERDEAD) + m->m_lock.m_flags |= UMUTEX_NONCONSISTENT; + } return (ret); } static inline int -mutex_lock_common(struct pthread_mutex *m, - const struct timespec *abstime, int cvattach) +mutex_lock_common(struct pthread_mutex *m, const struct timespec *abstime, + bool cvattach, bool rb_onlist) { - struct pthread *curthread = _get_curthread(); - int ret; + struct pthread *curthread; + int ret, robust; + curthread = _get_curthread(); if (!cvattach && m->m_flags & PMUTEX_FLAG_PRIVATE) THR_CRITICAL_ENTER(curthread); - if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) { - enqueue_mutex(curthread, m); - ret = 0; + if (!rb_onlist) + robust = _mutex_enter_robust(curthread, m); + ret = _thr_umutex_trylock2(&m->m_lock, TID(curthread)); + if (ret == 0 || ret == EOWNERDEAD) { + enqueue_mutex(curthread, m, ret); + if (ret == EOWNERDEAD) + m->m_lock.m_flags |= UMUTEX_NONCONSISTENT; } else { ret = mutex_lock_sleep(curthread, m, abstime); } - if (ret && (m->m_flags & PMUTEX_FLAG_PRIVATE) && !cvattach) + if (!rb_onlist && robust) + _mutex_leave_robust(curthread, m); + if (ret != 0 && ret != EOWNERDEAD && + (m->m_flags & PMUTEX_FLAG_PRIVATE) != 0 && !cvattach) THR_CRITICAL_LEAVE(curthread); return (ret); } @@ -613,7 +753,7 @@ __pthread_mutex_lock(pthread_mutex_t *mu _thr_check_init(); ret = check_and_init_mutex(mutex, &m); if (ret == 0) - ret = mutex_lock_common(m, NULL, 0); + ret = mutex_lock_common(m, NULL, false, false); return (ret); } @@ -627,7 +767,7 @@ __pthread_mutex_timedlock(pthread_mutex_ _thr_check_init(); ret = check_and_init_mutex(mutex, &m); if (ret == 0) - ret = mutex_lock_common(m, abstime, 0); + ret = mutex_lock_common(m, abstime, false, false); return (ret); } @@ -644,16 +784,16 @@ _pthread_mutex_unlock(pthread_mutex_t *m } else { mp = *mutex; } - return (mutex_unlock_common(mp, 0, NULL)); + return (mutex_unlock_common(mp, false, NULL)); } int -_mutex_cv_lock(struct pthread_mutex *m, int count) +_mutex_cv_lock(struct pthread_mutex *m, int count, bool rb_onlist) { - int error; + int error; - error = mutex_lock_common(m, NULL, 1); - if (error == 0) + error = mutex_lock_common(m, NULL, true, rb_onlist); + if (error == 0 || error == EOWNERDEAD) m->m_count = count; return (error); } @@ -667,16 +807,17 @@ _mutex_cv_unlock(struct pthread_mutex *m */ *count = m->m_count; m->m_count = 0; - (void)mutex_unlock_common(m, 1, defer); + (void)mutex_unlock_common(m, true, defer); return (0); } int _mutex_cv_attach(struct pthread_mutex *m, int count) { - struct pthread *curthread = _get_curthread(); + struct pthread *curthread; - enqueue_mutex(curthread, m); + curthread = _get_curthread(); + enqueue_mutex(curthread, m, 0); m->m_count = count; return (0); } @@ -684,12 +825,12 @@ _mutex_cv_attach(struct pthread_mutex *m int _mutex_cv_detach(struct pthread_mutex *mp, int *recurse) { - struct pthread *curthread = _get_curthread(); - int defered; - int error; + struct pthread *curthread; + int deferred, error; + curthread = _get_curthread(); if ((error = _mutex_owned(curthread, mp)) != 0) - return (error); + return (error); /* * Clear the count in case this is a recursive mutex. @@ -699,15 +840,15 @@ _mutex_cv_detach(struct pthread_mutex *m dequeue_mutex(curthread, mp); /* Will this happen in real-world ? */ - if ((mp->m_flags & PMUTEX_FLAG_DEFERED) != 0) { - defered = 1; - mp->m_flags &= ~PMUTEX_FLAG_DEFERED; + if ((mp->m_flags & PMUTEX_FLAG_DEFERRED) != 0) { + deferred = 1; + mp->m_flags &= ~PMUTEX_FLAG_DEFERRED; } else - defered = 0; + deferred = 0; - if (defered) { + if (deferred) { _thr_wake_all(curthread->defer_waiters, - curthread->nwaiter_defer); + curthread->nwaiter_defer); curthread->nwaiter_defer = 0; } return (0); @@ -716,7 +857,7 @@ _mutex_cv_detach(struct pthread_mutex *m static int mutex_self_trylock(struct pthread_mutex *m) { - int ret; + int ret; switch (PMUTEX_TYPE(m->m_flags)) { case PTHREAD_MUTEX_ERRORCHECK: @@ -746,7 +887,7 @@ static int mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime) { struct timespec ts1, ts2; - int ret; + int ret; switch (PMUTEX_TYPE(m->m_flags)) { case PTHREAD_MUTEX_ERRORCHECK: @@ -812,11 +953,11 @@ mutex_self_lock(struct pthread_mutex *m, *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201605170956.u4H9uM5V080828>