Date: Thu, 28 Oct 2004 16:00:24 -0700 From: Julian Elischer <julian@elischer.org> To: David Xu <davidxu@freebsd.org> Cc: John Baldwin <jhb@freebsd.org> Subject: Re: Infinite loop bug in libc_r on 4.x with condition variables a nd signals Message-ID: <41817A08.9000706@elischer.org> In-Reply-To: <41804D8E.2030003@freebsd.org> References: <41804394.7020306@elischer.org> <41804D8E.2030003@freebsd.org>
next in thread | previous in thread | raw e-mail | index | archive | help
David Xu wrote: > Here is the cvs log: > > Revision Changes Path > 1.58 +1 -0 src/lib/libpthread/thread/thr_create.c > 1.14 +1 -1 src/lib/libpthread/thread/thr_find_thread.c > 1.115 +27 -10 src/lib/libpthread/thread/thr_kern.c > 1.119 +15 -11 src/lib/libpthread/thread/thr_private.h > 1.81 +1 -2 src/lib/libpthread/thread/thr_sig.c commit message was: 1. Move thread list flags into new separate member, and atomically put DEAD thread on GC list, this closes a race between pthread_join and thr_cleanup. 2. Introduce a mutex to protect tcb initialization, tls allocation and deallocation code in rtld seems no lock protection or it is broken, under stress testing, memory is corrupted. translates to: > julian@julian:cvs diff -u > cvs server: Diffing . > Index: thr_create.c > =================================================================== > RCS file: /home/ncvs/src/lib/libpthread/thread/thr_create.c,v > retrieving revision 1.57 > diff -u -r1.57 thr_create.c > --- thr_create.c 12 Aug 2004 12:12:12 -0000 1.57 > +++ thr_create.c 28 Oct 2004 22:55:58 -0000 > @@ -234,6 +234,7 @@ > new_thread->specific_data_count = 0; > new_thread->cleanup = NULL; > new_thread->flags = 0; > + new_thread->tlflags = 0; > new_thread->continuation = NULL; > new_thread->wakeup_time.tv_sec = -1; > new_thread->lock_switch = 0; > Index: thr_find_thread.c > =================================================================== > RCS file: /home/ncvs/src/lib/libpthread/thread/thr_find_thread.c,v > retrieving revision 1.13 > diff -u -r1.13 thr_find_thread.c > --- thr_find_thread.c 17 Jul 2003 23:02:30 -0000 1.13 > +++ thr_find_thread.c 28 Oct 2004 22:55:58 -0000 > @@ -90,7 +90,7 @@ > if (curthread != NULL) > curthread->critical_count--; > if ((thread->refcount == 0) && > - (thread->flags & THR_FLAGS_GC_SAFE) != 0) > + (thread->tlflags & TLFLAGS_GC_SAFE) != 0) > THR_GCLIST_ADD(thread); > KSE_LOCK_RELEASE(curkse, &_thread_list_lock); > _kse_critical_leave(crit); > Index: thr_kern.c > =================================================================== > RCS file: /home/ncvs/src/lib/libpthread/thread/thr_kern.c,v > retrieving revision 1.112 > diff -u -r1.112 thr_kern.c > --- thr_kern.c 15 Aug 2004 16:28:05 -0000 1.112 > +++ thr_kern.c 28 Oct 2004 22:55:58 -0000 > @@ -139,6 +139,9 @@ > static struct thread_hash_head thr_hashtable[THREAD_HASH_QUEUES]; > #define THREAD_HASH(thrd) ((unsigned long)thrd % > THREAD_HASH_QUEUE > S) > > +/* Lock for thread tcb constructor/destructor */ > +static pthread_mutex_t _tcb_mutex; > + > #ifdef DEBUG_THREAD_KERN > static void dump_queues(struct kse *curkse); > #endif > @@ -166,7 +169,7 @@ > struct pthread_sigframe *psf); > static int thr_timedout(struct pthread *thread, struct timespec > *curtime); > static void thr_unlink(struct pthread *thread); > -static void thr_destroy(struct pthread *thread); > +static void thr_destroy(struct pthread *curthread, struct pthread > *thread); > static void thread_gc(struct pthread *thread); > static void kse_gc(struct pthread *thread); > static void kseg_gc(struct pthread *thread); > @@ -240,7 +243,7 @@ > _thr_stack_free(&thread->attr); > if (thread->specific != NULL) > free(thread->specific); > - thr_destroy(thread); > + thr_destroy(curthread, thread); > } > } > > @@ -285,14 +288,14 @@ > /* Free the free threads. */ > while ((thread = TAILQ_FIRST(&free_threadq)) != NULL) { > TAILQ_REMOVE(&free_threadq, thread, tle); > - thr_destroy(thread); > + thr_destroy(curthread, thread); > } > free_thread_count = 0; > > /* Free the to-be-gc'd threads. */ > while ((thread = TAILQ_FIRST(&_thread_gc_list)) != NULL) { > TAILQ_REMOVE(&_thread_gc_list, thread, gcle); > - thr_destroy(thread); > + thr_destroy(curthread, thread); > } > TAILQ_INIT(&gc_ksegq); > _gc_count = 0; > @@ -381,6 +384,7 @@ > if (_lock_init(&_thread_list_lock, LCK_ADAPTIVE, > _kse_lock_wait, _kse_lock_wakeup) != 0) > PANIC("Unable to initialize thread list lock"); > + _pthread_mutex_init(&_tcb_mutex, NULL); > active_kse_count = 0; > active_kseg_count = 0; > _gc_count = 0; > @@ -1204,7 +1208,6 @@ > thread->kseg = _kse_initial->k_kseg; > thread->kse = _kse_initial; > } > - thread->flags |= THR_FLAGS_GC_SAFE; > > /* > * We can't hold the thread list lock while holding the > @@ -1213,6 +1216,7 @@ > KSE_SCHED_UNLOCK(curkse, curkse->k_kseg); > DBG_MSG("Adding thread %p to GC list\n", thread); > KSE_LOCK_ACQUIRE(curkse, &_thread_list_lock); > + thread->tlflags |= TLFLAGS_GC_SAFE; > THR_GCLIST_ADD(thread); > KSE_LOCK_RELEASE(curkse, &_thread_list_lock); > if (sys_scope) { > @@ -1252,7 +1256,7 @@ > /* Check the threads waiting for GC. */ > for (td = TAILQ_FIRST(&_thread_gc_list); td != NULL; td = > td_next) { > td_next = TAILQ_NEXT(td, gcle); > - if ((td->flags & THR_FLAGS_GC_SAFE) == 0) > + if ((td->tlflags & TLFLAGS_GC_SAFE) == 0) > continue; > else if (((td->attr.flags & PTHREAD_SCOPE_SYSTEM) != 0) && > ((td->kse->k_kcb->kcb_kmbx.km_flags & KMF_DONE) == > 0)) { > @@ -2382,7 +2386,14 @@ > if ((thread == NULL) && > ((thread = malloc(sizeof(struct pthread))) != NULL)) { > bzero(thread, sizeof(struct pthread)); > - if ((thread->tcb = _tcb_ctor(thread, curthread == > NULL)) == NULL > ) { > + if (curthread) { > + _pthread_mutex_lock(&_tcb_mutex); > + thread->tcb = _tcb_ctor(thread, 0 /* not > initial tls */) > ; > + _pthread_mutex_unlock(&_tcb_mutex); > + } else { > + thread->tcb = _tcb_ctor(thread, 1 /* initial > tls */); > + } > + if (thread->tcb == NULL) { > free(thread); > thread = NULL; > } else { > @@ -2418,7 +2429,7 @@ > thread->name = NULL; > } > if ((curthread == NULL) || (free_thread_count >= > MAX_CACHED_THREADS)) { > - thr_destroy(thread); > + thr_destroy(curthread, thread); > } else { > /* Add the thread to the free thread list. */ > crit = _kse_critical_enter(); > @@ -2431,14 +2442,20 @@ > } > > static void > -thr_destroy(struct pthread *thread) > +thr_destroy(struct pthread *curthread, struct pthread *thread) > { > int i; > > for (i = 0; i < MAX_THR_LOCKLEVEL; i++) > _lockuser_destroy(&thread->lockusers[i]); > _lock_destroy(&thread->lock); > - _tcb_dtor(thread->tcb); > + if (curthread) { > + _pthread_mutex_lock(&_tcb_mutex); > + _tcb_dtor(thread->tcb); > + _pthread_mutex_unlock(&_tcb_mutex); > + } else { > + _tcb_dtor(thread->tcb); > + } > free(thread->siginfo); > free(thread); > } > Index: thr_private.h > =================================================================== > RCS file: /home/ncvs/src/lib/libpthread/thread/thr_private.h,v > retrieving revision 1.118 > diff -u -r1.118 thr_private.h > --- thr_private.h 7 Aug 2004 15:15:38 -0000 1.118 > +++ thr_private.h 28 Oct 2004 22:55:59 -0000 > @@ -753,9 +753,13 @@ > #define THR_FLAGS_IN_RUNQ 0x0004 /* in run queue using pqe link */ > #define THR_FLAGS_EXITING 0x0008 /* thread is exiting */ > #define THR_FLAGS_SUSPENDED 0x0010 /* thread is suspended */ > -#define THR_FLAGS_GC_SAFE 0x0020 /* thread safe for > cleaning */ > -#define THR_FLAGS_IN_TDLIST 0x0040 /* thread in all > thread list */ > -#define THR_FLAGS_IN_GCLIST 0x0080 /* thread in gc list */ > + > + /* Thread list flags; only set with thread list lock held. */ > +#define TLFLAGS_GC_SAFE 0x0001 /* thread safe for > cleaning */ > +#define TLFLAGS_IN_TDLIST 0x0002 /* thread in all > thread list */ > +#define TLFLAGS_IN_GCLIST 0x0004 /* thread in gc list */ > + int tlflags; > + > /* > * Base priority is the user setable and retrievable priority > * of the thread. It is only affected by explicit calls to > @@ -897,30 +901,30 @@ > * the gc list. > */ > #define THR_LIST_ADD(thrd) do { \ > - if (((thrd)->flags & THR_FLAGS_IN_TDLIST) == 0) { \ > + if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) == 0) { \ > TAILQ_INSERT_HEAD(&_thread_list, thrd, tle); \ > _thr_hash_add(thrd); \ > - (thrd)->flags |= THR_FLAGS_IN_TDLIST; \ > + (thrd)->tlflags |= TLFLAGS_IN_TDLIST; \ > } \ > } while (0) > #define THR_LIST_REMOVE(thrd) do { \ > - if (((thrd)->flags & THR_FLAGS_IN_TDLIST) != 0) { \ > + if (((thrd)->tlflags & TLFLAGS_IN_TDLIST) != 0) { \ > TAILQ_REMOVE(&_thread_list, thrd, tle); \ > _thr_hash_remove(thrd); \ > - (thrd)->flags &= ~THR_FLAGS_IN_TDLIST; \ > + (thrd)->tlflags &= ~TLFLAGS_IN_TDLIST; \ > } \ > } while (0) > #define THR_GCLIST_ADD(thrd) do { \ > - if (((thrd)->flags & THR_FLAGS_IN_GCLIST) == 0) { \ > + if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) == 0) { \ > TAILQ_INSERT_HEAD(&_thread_gc_list, thrd, gcle);\ > - (thrd)->flags |= THR_FLAGS_IN_GCLIST; \ > + (thrd)->tlflags |= TLFLAGS_IN_GCLIST; \ > _gc_count++; \ > } \ > } while (0) > #define THR_GCLIST_REMOVE(thrd) do { \ > - if (((thrd)->flags & THR_FLAGS_IN_GCLIST) != 0) { \ > + if (((thrd)->tlflags & TLFLAGS_IN_GCLIST) != 0) { \ > TAILQ_REMOVE(&_thread_gc_list, thrd, gcle); \ > - (thrd)->flags &= ~THR_FLAGS_IN_GCLIST; \ > + (thrd)->tlflags &= ~TLFLAGS_IN_GCLIST; \ > _gc_count--; \ > } \ > } while (0) > Index: thr_sig.c > =================================================================== > RCS file: /home/ncvs/src/lib/libpthread/thread/thr_sig.c,v > retrieving revision 1.79 > diff -u -r1.79 thr_sig.c > --- thr_sig.c 13 Jul 2004 22:52:11 -0000 1.79 > +++ thr_sig.c 28 Oct 2004 22:55:59 -0000 > @@ -1195,8 +1195,7 @@ > thr_sigframe_save(struct pthread *thread, struct pthread_sigframe *psf) > { > /* This has to initialize all members of the sigframe. */ > - psf->psf_flags = > - thread->flags & (THR_FLAGS_PRIVATE | THR_FLAGS_IN_TDLIST); > + psf->psf_flags = thread->flags & THR_FLAGS_PRIVATE; > psf->psf_interrupted = thread->interrupted; > psf->psf_timeout = thread->timeout; > psf->psf_state = thread->state; > julian@julian: > > Julian Elischer wrote: > >> David, do you have revision numbers of what needs to be MFC'd? >> >> >> David Xu wrote: >> >> >>> Daniel Eischen wrote: >>> >>> >>>>> FWIW, we are having (I think) the same problem on 5.3 with >>>>> libpthread. The >>>>> >>>>> panic there is in the mutex code about an assertion failing >>>>> because a thread >>>>> is on a syncq when it is not supposed to be. >>>>> >>>> >>>> >>>> >>>> David and I recently fixed some races in pthread_join() and >>>> pthread_exit() in -current libpthread. Don't know if those >>>> were responsible... >>>> >>>> >>>> >>> >>> That fix should be MFCed ASAP. >>> >>> >>>> Here's a test program that shows correct behavior with both >>>> libc_r and libpthread in -current. >>>>
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?41817A08.9000706>