From owner-freebsd-hackers Wed Mar 10 18: 4:28 1999 Delivered-To: freebsd-hackers@freebsd.org Received: from mercury.inktomi.com (mercury.inktomi.com [209.1.32.126]) by hub.freebsd.org (Postfix) with ESMTP id A673814D0F for ; Wed, 10 Mar 1999 18:04:27 -0800 (PST) (envelope-from jplevyak@inktomi.com) Received: from proxydev.inktomi.com (proxydev.inktomi.com [209.1.32.44]) by mercury.inktomi.com (8.9.1a/8.9.1) with ESMTP id SAA01995; Wed, 10 Mar 1999 18:04:18 -0800 (PST) Received: (from jplevyak@localhost) by proxydev.inktomi.com (8.8.5/8.7.3) id SAA20737; Wed, 10 Mar 1999 18:04:09 -0800 (PST) Message-ID: <19990310180408.E19442@proxydev.inktomi.com> Date: Wed, 10 Mar 1999 18:04:08 -0800 From: John Plevyak To: "Richard Seaman, Jr." , John Plevyak Cc: hackers@FreeBSD.ORG Subject: Re: bug in linuxthreads for FreeBSD References: <19990309172626.A7182@proxydev.inktomi.com> <19990310105944.F4440@tar.com> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii X-Mailer: Mutt 0.93.2i In-Reply-To: <19990310105944.F4440@tar.com>; from Richard Seaman, Jr. on Wed, Mar 10, 1999 at 10:59:44AM -0600 Sender: owner-freebsd-hackers@FreeBSD.ORG Precedence: bulk X-Loop: FreeBSD.ORG > > The patch I am using unconditionally removes the thread from the > > queue. This does not prevent the thread from being woken up > > extaneously, but it does prevent the ASSERT and corruption of > > the p_nextwaiting lists. > > It might be possible to eliminate, or at least narrow the window > for the "extraneous" wakeup. It would require reworking the linux > threads code more. OTOH, it could just be left as is. Here's > what Butenhof's "Programming with POSIX Threads" says about > the pthread_cond_* wait functions: > > "It is important that you test the predicate after locking the mutex > and before waiting on the condition variable. If a thread signals > a condition variable while no threads are waiting, nothing happens. > If some other thread calls pthread_cond_wait right after that, > it will just keep waiting...." > > and, > > "It is equally important that you test the predicate again when the > thread wakes up. You should always wait for a condition variable > in a loop, to protect against program errors, multiprocessor races, > and spurious wakeups." I agree for (at least for now). I have now tested it for hours under high load and haven't seen any problems. Here is the complete patch. I needed to patch up mutex_lock in mutex.c as well, so I moved a couple things around, and added some asserts. john diff -c --exclude=*.0 --exclude=*o --exclude=*a ./condvar.c ../../../linuxthreads.2/work/linuxthreads-0.71/condvar.c *** ./condvar.c Wed Mar 10 17:37:17 1999 --- ../../../linuxthreads.2/work/linuxthreads-0.71/condvar.c Wed Mar 10 12:20:40 1999 *************** *** 22,29 **** #include "queue.h" #include "restart.h" - static void remove_from_queue(pthread_queue * q, pthread_descr th); - int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *cond_attr) { --- 22,27 ---- *************** *** 51,56 **** --- 49,55 ---- release(&cond->c_spinlock); pthread_mutex_unlock(mutex); suspend_with_cancellation(self); + assert(self->p_nextwaiting == NULL && cond->c_waiting.head != self); pthread_mutex_lock(mutex); /* This is a cancellation point */ if (self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE) { *************** *** 107,159 **** or the timeout occurred (retsleep == 0) or another interrupt occurred (retsleep == -1) */ /* Re-acquire the spinlock */ - acquire(&cond->c_spinlock); /* This is a cancellation point */ ! if (self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE) { remove_from_queue(&cond->c_waiting, self); ! release(&cond->c_spinlock); ! pthread_mutex_lock(mutex); pthread_exit(PTHREAD_CANCELED); - } /* If not signaled: also remove ourselves and return an error code */ ! if (self->p_signal == 0) { ! remove_from_queue(&cond->c_waiting, self); ! release(&cond->c_spinlock); ! pthread_mutex_lock(mutex); return retsleep == 0 ? ETIMEDOUT : EINTR; - } #else acquire(&cond->c_spinlock); enqueue(&cond->c_waiting, self); release(&cond->c_spinlock); pthread_mutex_unlock(mutex); retsleep = 0; ! if (!(self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE)) { /* We really should make thr_sleep return EINTR too. It just returns EAGAIN if it timed out, or 0 if awakened (or EINVAL if bad parameter. */ retsleep = syscall(SYS_thr_sleep, reltime); - } acquire(&cond->c_spinlock); ! if (self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE) { remove_from_queue(&cond->c_waiting, self); ! release(&cond->c_spinlock); ! pthread_mutex_lock(mutex); pthread_exit(PTHREAD_CANCELED); ! } ! if (retsleep) { ! remove_from_queue(&cond->c_waiting, self); ! release(&cond->c_spinlock); ! pthread_mutex_lock(mutex); return retsleep == EAGAIN ? ETIMEDOUT : EINTR; - } - #endif - /* Otherwise, return normally */ - release(&cond->c_spinlock); - pthread_mutex_lock(mutex); return 0; } --- 106,145 ---- or the timeout occurred (retsleep == 0) or another interrupt occurred (retsleep == -1) */ /* Re-acquire the spinlock */ /* This is a cancellation point */ ! acquire(&cond->c_spinlock); ! if (self->p_nextwaiting != NULL || cond->c_waiting.head == self) remove_from_queue(&cond->c_waiting, self); ! release(&cond->c_spinlock); ! pthread_mutex_lock(mutex); ! if (self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE) pthread_exit(PTHREAD_CANCELED); /* If not signaled: also remove ourselves and return an error code */ ! if (self->p_signal == 0) return retsleep == 0 ? ETIMEDOUT : EINTR; #else acquire(&cond->c_spinlock); enqueue(&cond->c_waiting, self); release(&cond->c_spinlock); pthread_mutex_unlock(mutex); retsleep = 0; ! if (!(self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE)) /* We really should make thr_sleep return EINTR too. It just returns EAGAIN if it timed out, or 0 if awakened (or EINVAL if bad parameter. */ retsleep = syscall(SYS_thr_sleep, reltime); acquire(&cond->c_spinlock); ! if (self->p_nextwaiting != NULL || cond->c_waiting.head == self) remove_from_queue(&cond->c_waiting, self); ! release(&cond->c_spinlock); ! pthread_mutex_lock(mutex); ! if (self->p_canceled && self->p_cancelstate == PTHREAD_CANCEL_ENABLE) pthread_exit(PTHREAD_CANCELED); ! if (retsleep) return retsleep == EAGAIN ? ETIMEDOUT : EINTR; #endif return 0; } *************** *** 210,234 **** return 0; } ! /* Auxiliary function on queues */ ! ! static void remove_from_queue(pthread_queue * q, pthread_descr th) { pthread_descr t; ! if (q->head == NULL) return; if (q->head == th) { q->head = th->p_nextwaiting; if (q->head == NULL) q->tail = NULL; th->p_nextwaiting = NULL; ! return; } for (t = q->head; t->p_nextwaiting != NULL; t = t->p_nextwaiting) { if (t->p_nextwaiting == th) { t->p_nextwaiting = th->p_nextwaiting; if (th->p_nextwaiting == NULL) q->tail = t; th->p_nextwaiting = NULL; ! return; } } } --- 196,219 ---- return 0; } ! int remove_from_queue(pthread_queue * q, pthread_descr th) { pthread_descr t; ! if (q->head == NULL) return 0; if (q->head == th) { q->head = th->p_nextwaiting; if (q->head == NULL) q->tail = NULL; th->p_nextwaiting = NULL; ! return 1; } for (t = q->head; t->p_nextwaiting != NULL; t = t->p_nextwaiting) { if (t->p_nextwaiting == th) { t->p_nextwaiting = th->p_nextwaiting; if (th->p_nextwaiting == NULL) q->tail = t; th->p_nextwaiting = NULL; ! return 1; } } + return 0; } diff -c --exclude=*.0 --exclude=*o --exclude=*a ./mutex.c ../../../linuxthreads.2/work/linuxthreads-0.71/mutex.c *** ./mutex.c Wed Mar 10 17:37:17 1999 --- ../../../linuxthreads.2/work/linuxthreads-0.71/mutex.c Wed Mar 10 12:18:22 1999 *************** *** 83,92 **** int pthread_mutex_lock(pthread_mutex_t * mutex) { ! pthread_descr self; while(1) { acquire(&mutex->m_spinlock); switch(mutex->m_kind) { case PTHREAD_MUTEX_FAST_NP: if (mutex->m_count == 0) { --- 83,94 ---- int pthread_mutex_lock(pthread_mutex_t * mutex) { ! pthread_descr self = thread_self(); while(1) { acquire(&mutex->m_spinlock); + if (self->p_nextwaiting != NULL || mutex->m_waiting.head == self) + remove_from_queue(&mutex->m_waiting, self); switch(mutex->m_kind) { case PTHREAD_MUTEX_FAST_NP: if (mutex->m_count == 0) { *************** *** 94,103 **** release(&mutex->m_spinlock); return 0; } - self = thread_self(); break; case PTHREAD_MUTEX_RECURSIVE_NP: - self = thread_self(); if (mutex->m_count == 0 || mutex->m_owner == self) { mutex->m_count++; mutex->m_owner = self; --- 96,103 ---- *************** *** 106,112 **** } break; case PTHREAD_MUTEX_ERRORCHECK_NP: - self = thread_self(); if (mutex->m_count == 0) { mutex->m_count = 1; mutex->m_owner = self; --- 106,111 ---- diff -c --exclude=*.0 --exclude=*o --exclude=*a ./queue.h ../../../linuxthreads.2/work/linuxthreads-0.71/queue.h *** ./queue.h Fri Dec 5 09:28:21 1997 --- ../../../linuxthreads.2/work/linuxthreads-0.71/queue.h Wed Mar 10 12:20:04 1999 *************** *** 60,62 **** --- 60,64 ---- } return th; } + + int remove_from_queue(pthread_queue * q, pthread_descr th); -- John Bradley Plevyak, PhD, jplevyak@inktomi.com, PGP KeyID: 051130BD Inktomi Corporation, 1900 S. Norfolk Street, Suite 110, San Mateo, CA 94403 W:(415)653-2830 F:(415)653-2801 P:(888)491-1332/5103192436.4911332@pagenet.net To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-hackers" in the body of the message