Date: Fri, 30 Oct 2009 05:22:40 +0200 From: Kostik Belousov <kostikbel@gmail.com> To: Giorgos Keramidas <keramida@freebsd.org> Cc: svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org Subject: Re: svn commit: r198590 - head/sys/kern Message-ID: <20091030032240.GD2147@deviant.kiev.zoral.com.ua> In-Reply-To: <871vklbxyf.fsf@kobe.laptop> References: <200910291434.n9TEYOVJ099388@svn.freebsd.org> <871vklbxyf.fsf@kobe.laptop>
next in thread | previous in thread | raw e-mail | index | archive | help
--IMjqdzrDRly81ofr Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable On Fri, Oct 30, 2009 at 01:08:08AM +0200, Giorgos Keramidas wrote: > On Thu, 29 Oct 2009 14:34:24 +0000 (UTC), Konstantin Belousov <kib@FreeBS= D.org> wrote: > > Author: kib > > Date: Thu Oct 29 14:34:24 2009 > > New Revision: 198590 > > URL: http://svn.freebsd.org/changeset/base/198590 > > > > Log: > > Trapsignal() calls kern_sigprocmask() when delivering catched signal > > with proc lock held. > > > > Reported and tested by: Mykola Dzham freebsd at levsha org ua > > MFC after: 1 month >=20 > Hi Konstantin, >=20 > Some of the recent kern_sig changes end up recursing on a non-recursive > mutex in kern_sigprocmask() -> reschedule_signals(): >=20 > panic: _mtx_lock_sleep: recursed on non-recursive mutex sigacts @ /usr/sr= c/sys/kern/kern_sig.c:2422 > (kgdb) bt > #0 doadump () at pcpu.h:246 > #1 0xc0680bee in boot (howto=3D260) at /usr/src/sys/kern/kern_shutdown.c= :416 > #2 0xc0680ec2 in panic (fmt=3DVariable "fmt" is not available. > ) at /usr/src/sys/kern/kern_shutdown.c:579 > #3 0xc06716ea in _mtx_lock_sleep (m=3D0xc8154aa8, tid=3D3332925072, opts= =3D0, file=3D0xc09bb332 "/usr/src/sys/kern/kern_sig.c", line=3D2422) > at /usr/src/sys/kern/kern_mutex.c:341 > #4 0xc0671907 in _mtx_lock_flags (m=3D0xc8154aa8, opts=3D0, file=3D0xc09= bb332 "/usr/src/sys/kern/kern_sig.c", line=3D2422) > at /usr/src/sys/kern/kern_mutex.c:203 > #5 0xc0683434 in reschedule_signals (p=3D0xc71172a8, block=3D{__bits =3D= {0, 0, 0, 0}}) at /usr/src/sys/kern/kern_sig.c:2422 > #6 0xc0683751 in kern_sigprocmask (td=3D0xc6a86690, how=3D1, set=3D0xe90= 05cd4, oset=3D0x0, flags=3D2) at /usr/src/sys/kern/kern_sig.c:1027 > #7 0xc0684801 in postsig (sig=3D20) at /usr/src/sys/kern/kern_sig.c:2743 > #8 0xc06be228 in ast (framep=3D0xe9005d38) at /usr/src/sys/kern/subr_tra= p.c:234 > #9 0xc0920624 in doreti_ast () at /usr/src/sys/i386/i386/exception.s:365 >=20 > I think the change that started causing this for MT applications was > change 197963 in /head that added this bit of code in kern_sig.c inside > kern_sigprocmask(): >=20 > : @@ -1012,7 +1012,20 @@ > : break; > : } > : } > : - PROC_UNLOCK(td->td_proc); > : + /* > : + * The new_block set contains signals that were not previosly > : + * blocked, but are blocked now. > : + * > : + * In case we block any signal that was not previously blocked > : + * for td, and process has the signal pending, try to schedule > : + * signal delivery to some thread that does not block the sign= al, > : + * possibly waking it up. > : + */ > : + if (p->p_numthreads !=3D 1) > : + reschedule_signals(p, new_block); > : + > : + PROC_UNLOCK(p); > : return (error); No, this was caused by the r198507 fragment of postsig(), as well as a fragment from the trapsignal(), that added the call to kern_sigprocmask() instead of direct manipulation of thread signal mask. >=20 > AFAICT, postsig() is called with proc->p_sigacts->ps_mtx locked, so when > we are recursing when reschedule_signals() tries to lock it once more. Yes, the same statement holds for trapsignal(). >=20 > Since we are holding the proc lock in kern_sigprocmask(), is it safe to > assert that we own ps_mtx, drop it and re-acquire it immediately after > calling reschedule_signals()? No. Most callers of kern_sigprocmask() do not hold curproc->ps_mtx. Only postsig() and trapsig() call kern_sigprocmask() with ps_mtx locked, so I have to special-case them. Could you, please, test the following patch ? What application did exposed the issue ? diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index 7f5cfa3..e174df1 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -220,7 +220,7 @@ static int sigproptbl[NSIG] =3D { SA_KILL|SA_PROC, /* SIGUSR2 */ }; =20 -static void reschedule_signals(struct proc *p, sigset_t block); +static void reschedule_signals(struct proc *p, sigset_t block, int flags); =20 static void sigqueue_start(void) @@ -1024,7 +1024,7 @@ kern_sigprocmask(struct thread *td, int how, sigset_t= *set, sigset_t *oset, * possibly waking it up. */ if (p->p_numthreads !=3D 1) - reschedule_signals(p, new_block); + reschedule_signals(p, new_block, flags); =20 if (!(flags & SIGPROCMASK_PROC_LOCKED)) PROC_UNLOCK(p); @@ -1859,13 +1859,11 @@ trapsignal(struct thread *td, ksiginfo_t *ksi) #endif (*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)],=20 ksi, &td->td_sigmask); - SIGSETOR(td->td_sigmask, ps->ps_catchmask[_SIG_IDX(sig)]); - if (!SIGISMEMBER(ps->ps_signodefer, sig)) { - SIGEMPTYSET(mask); + mask =3D ps->ps_catchmask[_SIG_IDX(sig)]; + if (!SIGISMEMBER(ps->ps_signodefer, sig)) SIGADDSET(mask, sig); - kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, - SIGPROCMASK_PROC_LOCKED); - } + kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, + SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED); if (SIGISMEMBER(ps->ps_sigreset, sig)) { /* * See kern_sigaction() for origin of this code. @@ -2401,7 +2399,7 @@ stopme: } =20 static void -reschedule_signals(struct proc *p, sigset_t block) +reschedule_signals(struct proc *p, sigset_t block, int flags) { struct sigacts *ps; struct thread *td; @@ -2419,12 +2417,14 @@ reschedule_signals(struct proc *p, sigset_t block) =20 td =3D sigtd(p, i, 0); signotify(td); - mtx_lock(&ps->ps_mtx); + if (!(flags & SIGPROCMASK_PS_LOCKED)) + mtx_lock(&ps->ps_mtx); if (p->p_flag & P_TRACED || SIGISMEMBER(ps->ps_sigcatch, i)) tdsigwakeup(td, i, SIG_CATCH, (SIGISMEMBER(ps->ps_sigintr, i) ? EINTR : ERESTART)); - mtx_unlock(&ps->ps_mtx); + if (!(flags & SIGPROCMASK_PS_LOCKED)) + mtx_unlock(&ps->ps_mtx); } } =20 @@ -2452,7 +2452,7 @@ tdsigcleanup(struct thread *td) SIGFILLSET(unblocked); SIGSETNAND(unblocked, td->td_sigmask); SIGFILLSET(td->td_sigmask); - reschedule_signals(p, unblocked); + reschedule_signals(p, unblocked, 0); =20 } =20 @@ -2734,15 +2734,11 @@ postsig(sig) } else returnmask =3D td->td_sigmask; =20 - kern_sigprocmask(td, SIG_BLOCK, - &ps->ps_catchmask[_SIG_IDX(sig)], NULL, - SIGPROCMASK_PROC_LOCKED); - if (!SIGISMEMBER(ps->ps_signodefer, sig)) { - SIGEMPTYSET(mask); + mask =3D ps->ps_catchmask[_SIG_IDX(sig)]; + if (!SIGISMEMBER(ps->ps_signodefer, sig)) SIGADDSET(mask, sig); - kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, - SIGPROCMASK_PROC_LOCKED); - } + kern_sigprocmask(td, SIG_BLOCK, &mask, NULL, + SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED); =20 if (SIGISMEMBER(ps->ps_sigreset, sig)) { /* diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h index b9a54f0..c27a128 100644 --- a/sys/sys/signalvar.h +++ b/sys/sys/signalvar.h @@ -319,6 +319,7 @@ extern int kern_logsigexit; /* Sysctl variable kern.log= sigexit */ /* flags for kern_sigprocmask */ #define SIGPROCMASK_OLD 0x0001 #define SIGPROCMASK_PROC_LOCKED 0x0002 +#define SIGPROCMASK_PS_LOCKED 0x0004 =20 /* * Machine-independent functions: --IMjqdzrDRly81ofr Content-Type: application/pgp-signature Content-Disposition: inline -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.10 (FreeBSD) iEYEARECAAYFAkrqW/8ACgkQC3+MBN1Mb4ji0QCdHnCI7qNdkGRvk84c1/STUiGc 9VoAnRViQHhckK3jz0HmdWoBWbbtI2aE =h/98 -----END PGP SIGNATURE----- --IMjqdzrDRly81ofr--
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20091030032240.GD2147>