Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 10 Jun 2004 22:21:17 -0700
From:      Sean McNeil <sean@mcneil.com>
To:        David Xu <davidxu@freebsd.org>
Cc:        freebsd-threads@freebsd.org
Subject:   Re: signal handler priority issue
Message-ID:  <1086931276.38487.35.camel@server.mcneil.com>
In-Reply-To: <40C93D60.3040003@freebsd.org>
References:  <1086924733.65671.81.camel@server.mcneil.com> <40C93D60.3040003@freebsd.org>

index | next in thread | previous in thread | raw e-mail

On Thu, 2004-06-10 at 22:04, David Xu wrote:
> Can you provide some code to demostrate the problem ? I have interest.
> 
> David Xu

Yes, here is some code snippets from boehm-gc:

master thread doing:

int GC_suspend_all()
{
    int n_live_threads = 0;
    int i;
    GC_thread p;
    int result;
    pthread_t my_thread = pthread_self();
    
    GC_stopping_thread = my_thread;    /* debugging only.      */
    GC_stopping_pid = getpid();                /* debugging only.      */
    for (i = 0; i < THREAD_TABLE_SZ; i++) {
      for (p = GC_threads[i]; p != 0; p = p -> next) {
        if (p -> id != my_thread) {
            if (p -> flags & FINISHED) continue;
            if (p -> stop_info.last_stop_count == GC_stop_count) continue;
	    if (p -> thread_blocked) /* Will wait */ continue;
            n_live_threads++;
	    #if DEBUG_THREADS
	      GC_printf1("Sending suspend signal to 0x%lx\n", p -> id);
	    #endif
        
        result = pthread_kill(p -> id, SIG_SUSPEND);
	    switch(result) {
                case ESRCH:
                    /* Not really there anymore.  Possible? */
                    n_live_threads--;
                    break;
                case 0:
                    break;
                default:
                    ABORT("pthread_kill failed");
            }
        }
      }
    }
    return n_live_threads;
}

and slave threads doing:

void GC_suspend_handler(int sig)
{
    int dummy;
    pthread_t my_thread = pthread_self();
    GC_thread me;
    sigset_t mask;
#   ifdef PARALLEL_MARK
	word my_mark_no = GC_mark_no;
	/* Marker can't proceed until we acknowledge.  Thus this is	*/
	/* guaranteed to be the mark_no correspending to our 		*/
	/* suspension, i.e. the marker can't have incremented it yet.	*/
#   endif
    word my_stop_count = GC_stop_count;

    if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");

#if DEBUG_THREADS
    GC_printf1("Suspending 0x%lx\n", my_thread);
#endif

    me = GC_lookup_thread(my_thread);
    /* The lookup here is safe, since I'm doing this on behalf  */
    /* of a thread which holds the allocation lock in order	*/
    /* to stop the world.  Thus concurrent modification of the	*/
    /* data structure is impossible.				*/
    if (me -> stop_info.last_stop_count == my_stop_count) {
	/* Duplicate signal.  OK if we are retrying.	*/
	if (!GC_retry_signals) {
	    WARN("Duplicate suspend signal in thread %lx\n",
		 pthread_self());
	}
	return;
    }
#   ifdef SPARC
	me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
#   else
	me -> stop_info.stack_ptr = (ptr_t)(&dummy);
#   endif
#   ifdef IA64
	me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
#   endif

    /* Tell the thread that wants to stop the world that this   */
    /* thread has been stopped.  Note that sem_post() is  	*/
    /* the only async-signal-safe primitive in LinuxThreads.    */
    sem_post(&GC_suspend_ack_sem);
    me -> stop_info.last_stop_count = my_stop_count;

#if DEBUG_THREADS
    GC_printf2("Waiting for restart #%d of 0x%lx\n", my_stop_count, my_thread);
#endif

    /* Wait until that thread tells us to restart by sending    */
    /* this thread a SIG_THR_RESTART signal.			*/
    /* SIG_THR_RESTART should be masked at this point.  Thus there	*/
    /* is no race.						*/
    if (sigfillset(&mask) != 0) ABORT("sigfillset() failed");
    if (sigdelset(&mask, SIG_THR_RESTART) != 0) ABORT("sigdelset() failed");
#   ifdef NO_SIGNALS
      if (sigdelset(&mask, SIGINT) != 0) ABORT("sigdelset() failed");
      if (sigdelset(&mask, SIGQUIT) != 0) ABORT("sigdelset() failed");
      if (sigdelset(&mask, SIGTERM) != 0) ABORT("sigdelset() failed");
      if (sigdelset(&mask, SIGABRT) != 0) ABORT("sigdelset() failed");
#   endif
    do {
	    me->stop_info.signal = 0;
	    sigsuspend(&mask);             /* Wait for signal */
    } while (me->stop_info.signal != SIG_THR_RESTART);
    /* If the RESTART signal gets lost, we can still lose.  That should be  */
    /* less likely than losing the SUSPEND signal, since we don't do much   */
    /* between the sem_post and sigsuspend.	   			    */
    /* We'd need more handshaking to work around that, since we don't want  */
    /* to accidentally leave a RESTART signal pending, thus causing us to   */
    /* continue prematurely in a future round.				    */ 

#if DEBUG_THREADS
    GC_printf1("Continuing 0x%lx\n", my_thread);
#endif
}

and here is the output with debug messages:

Stopping the world from 0x50d000
Sending suspend signal to 0x9d1400
Suspending 0x9d1400
World stopped from 0x50d000
Pushing stacks from thread 0x50d000
Stack for thread 0x9d1400 = [7fffffeed95c,7fffffeee000)
Stack for thread 0x50d000 = [7fffffffcf00,800000000000)
World starting
Sending restart signal to 0x9d1400
World started
Buildfile: build.xml
Stopping the world from 0x50d000
Sending suspend signal to 0x9d1400
Waiting for restart #2 of 0x9d1400

There are other things causing output, but you can see that the slave
isn't getting to the sigsuspend before it is evoked again from the
master thread.



home | help

Want to link to this message? Use this
URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?1086931276.38487.35.camel>