Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 30 Oct 2009 05:22:40 +0200
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Giorgos Keramidas <keramida@freebsd.org>
Cc:        svn-src-head@freebsd.org, svn-src-all@freebsd.org, src-committers@freebsd.org
Subject:   Re: svn commit: r198590 - head/sys/kern
Message-ID:  <20091030032240.GD2147@deviant.kiev.zoral.com.ua>
In-Reply-To: <871vklbxyf.fsf@kobe.laptop>
References:  <200910291434.n9TEYOVJ099388@svn.freebsd.org> <871vklbxyf.fsf@kobe.laptop>

next in thread | previous in thread | raw e-mail | index | archive | help

--IMjqdzrDRly81ofr
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Fri, Oct 30, 2009 at 01:08:08AM +0200, Giorgos Keramidas wrote:
> On Thu, 29 Oct 2009 14:34:24 +0000 (UTC), Konstantin Belousov <kib@FreeBS=
D.org> wrote:
> > Author: kib
> > Date: Thu Oct 29 14:34:24 2009
> > New Revision: 198590
> > URL: http://svn.freebsd.org/changeset/base/198590
> >
> > Log:
> >   Trapsignal() calls kern_sigprocmask() when delivering catched signal
> >   with proc lock held.
> >
> >   Reported and tested by:	Mykola Dzham  freebsd at levsha org ua
> >   MFC after:	1 month
>=20
> Hi Konstantin,
>=20
> Some of the recent kern_sig changes end up recursing on a non-recursive
> mutex in kern_sigprocmask() -> reschedule_signals():
>=20
> panic: _mtx_lock_sleep: recursed on non-recursive mutex sigacts @ /usr/sr=
c/sys/kern/kern_sig.c:2422
> (kgdb) bt
> #0  doadump () at pcpu.h:246
> #1  0xc0680bee in boot (howto=3D260) at /usr/src/sys/kern/kern_shutdown.c=
:416
> #2  0xc0680ec2 in panic (fmt=3DVariable "fmt" is not available.
>     ) at /usr/src/sys/kern/kern_shutdown.c:579
> #3  0xc06716ea in _mtx_lock_sleep (m=3D0xc8154aa8, tid=3D3332925072, opts=
=3D0, file=3D0xc09bb332 "/usr/src/sys/kern/kern_sig.c", line=3D2422)
>     at /usr/src/sys/kern/kern_mutex.c:341
> #4  0xc0671907 in _mtx_lock_flags (m=3D0xc8154aa8, opts=3D0, file=3D0xc09=
bb332 "/usr/src/sys/kern/kern_sig.c", line=3D2422)
>     at /usr/src/sys/kern/kern_mutex.c:203
> #5  0xc0683434 in reschedule_signals (p=3D0xc71172a8, block=3D{__bits =3D=
 {0, 0, 0, 0}}) at /usr/src/sys/kern/kern_sig.c:2422
> #6  0xc0683751 in kern_sigprocmask (td=3D0xc6a86690, how=3D1, set=3D0xe90=
05cd4, oset=3D0x0, flags=3D2) at /usr/src/sys/kern/kern_sig.c:1027
> #7  0xc0684801 in postsig (sig=3D20) at /usr/src/sys/kern/kern_sig.c:2743
> #8  0xc06be228 in ast (framep=3D0xe9005d38) at /usr/src/sys/kern/subr_tra=
p.c:234
> #9  0xc0920624 in doreti_ast () at /usr/src/sys/i386/i386/exception.s:365
>=20
> I think the change that started causing this for MT applications was
> change 197963 in /head that added this bit of code in kern_sig.c inside
> kern_sigprocmask():
>=20
> : @@ -1012,7 +1012,20 @@
> :                          break;
> :                  }
> :          }
> : -        PROC_UNLOCK(td->td_proc);
> : +        /*
> : +         * The new_block set contains signals that were not previosly
> : +         * blocked, but are blocked now.
> : +         *
> : +         * In case we block any signal that was not previously blocked
> : +         * for td, and process has the signal pending, try to schedule
> : +         * signal delivery to some thread that does not block the sign=
al,
> : +         * possibly waking it up.
> : +         */
> : +        if (p->p_numthreads !=3D 1)
> : +                reschedule_signals(p, new_block);
> : +
> : +        PROC_UNLOCK(p);
> :          return (error);
No, this was caused by the r198507 fragment of postsig(), as well as a
fragment from the trapsignal(), that added the call to kern_sigprocmask()
instead of direct manipulation of thread signal mask.

>=20
> AFAICT, postsig() is called with proc->p_sigacts->ps_mtx locked, so when
> we are recursing when reschedule_signals() tries to lock it once more.
Yes, the same statement holds for trapsignal().

>=20
> Since we are holding the proc lock in kern_sigprocmask(), is it safe to
> assert that we own ps_mtx, drop it and re-acquire it immediately after
> calling reschedule_signals()?

No. Most callers of kern_sigprocmask() do not hold curproc->ps_mtx.
Only postsig() and trapsig() call kern_sigprocmask() with ps_mtx locked,
so I have to special-case them.

Could you, please, test the following patch ? What application did
exposed the issue ?

diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 7f5cfa3..e174df1 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -220,7 +220,7 @@ static int sigproptbl[NSIG] =3D {
         SA_KILL|SA_PROC,		/* SIGUSR2 */
 };
=20
-static void reschedule_signals(struct proc *p, sigset_t block);
+static void reschedule_signals(struct proc *p, sigset_t block, int flags);
=20
 static void
 sigqueue_start(void)
@@ -1024,7 +1024,7 @@ kern_sigprocmask(struct thread *td, int how, sigset_t=
 *set, sigset_t *oset,
 	 * possibly waking it up.
 	 */
 	if (p->p_numthreads !=3D 1)
-		reschedule_signals(p, new_block);
+		reschedule_signals(p, new_block, flags);
=20
 	if (!(flags & SIGPROCMASK_PROC_LOCKED))
 		PROC_UNLOCK(p);
@@ -1859,13 +1859,11 @@ trapsignal(struct thread *td, ksiginfo_t *ksi)
 #endif
 		(*p->p_sysent->sv_sendsig)(ps->ps_sigact[_SIG_IDX(sig)],=20
 				ksi, &td->td_sigmask);
-		SIGSETOR(td->td_sigmask, ps->ps_catchmask[_SIG_IDX(sig)]);
-		if (!SIGISMEMBER(ps->ps_signodefer, sig)) {
-			SIGEMPTYSET(mask);
+		mask =3D ps->ps_catchmask[_SIG_IDX(sig)];
+		if (!SIGISMEMBER(ps->ps_signodefer, sig))
 			SIGADDSET(mask, sig);
-			kern_sigprocmask(td, SIG_BLOCK, &mask, NULL,
-			    SIGPROCMASK_PROC_LOCKED);
-		}
+		kern_sigprocmask(td, SIG_BLOCK, &mask, NULL,
+		    SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED);
 		if (SIGISMEMBER(ps->ps_sigreset, sig)) {
 			/*
 			 * See kern_sigaction() for origin of this code.
@@ -2401,7 +2399,7 @@ stopme:
 }
=20
 static void
-reschedule_signals(struct proc *p, sigset_t block)
+reschedule_signals(struct proc *p, sigset_t block, int flags)
 {
 	struct sigacts *ps;
 	struct thread *td;
@@ -2419,12 +2417,14 @@ reschedule_signals(struct proc *p, sigset_t block)
=20
 		td =3D sigtd(p, i, 0);
 		signotify(td);
-		mtx_lock(&ps->ps_mtx);
+		if (!(flags & SIGPROCMASK_PS_LOCKED))
+			mtx_lock(&ps->ps_mtx);
 		if (p->p_flag & P_TRACED || SIGISMEMBER(ps->ps_sigcatch, i))
 			tdsigwakeup(td, i, SIG_CATCH,
 			    (SIGISMEMBER(ps->ps_sigintr, i) ? EINTR :
 			     ERESTART));
-		mtx_unlock(&ps->ps_mtx);
+		if (!(flags & SIGPROCMASK_PS_LOCKED))
+			mtx_unlock(&ps->ps_mtx);
 	}
 }
=20
@@ -2452,7 +2452,7 @@ tdsigcleanup(struct thread *td)
 	SIGFILLSET(unblocked);
 	SIGSETNAND(unblocked, td->td_sigmask);
 	SIGFILLSET(td->td_sigmask);
-	reschedule_signals(p, unblocked);
+	reschedule_signals(p, unblocked, 0);
=20
 }
=20
@@ -2734,15 +2734,11 @@ postsig(sig)
 		} else
 			returnmask =3D td->td_sigmask;
=20
-		kern_sigprocmask(td, SIG_BLOCK,
-		    &ps->ps_catchmask[_SIG_IDX(sig)], NULL,
-		    SIGPROCMASK_PROC_LOCKED);
-		if (!SIGISMEMBER(ps->ps_signodefer, sig)) {
-			SIGEMPTYSET(mask);
+		mask =3D ps->ps_catchmask[_SIG_IDX(sig)];
+		if (!SIGISMEMBER(ps->ps_signodefer, sig))
 			SIGADDSET(mask, sig);
-			kern_sigprocmask(td, SIG_BLOCK, &mask, NULL,
-			    SIGPROCMASK_PROC_LOCKED);
-		}
+		kern_sigprocmask(td, SIG_BLOCK, &mask, NULL,
+		    SIGPROCMASK_PROC_LOCKED | SIGPROCMASK_PS_LOCKED);
=20
 		if (SIGISMEMBER(ps->ps_sigreset, sig)) {
 			/*
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index b9a54f0..c27a128 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -319,6 +319,7 @@ extern int kern_logsigexit;	/* Sysctl variable kern.log=
sigexit */
 /* flags for kern_sigprocmask */
 #define	SIGPROCMASK_OLD		0x0001
 #define	SIGPROCMASK_PROC_LOCKED	0x0002
+#define	SIGPROCMASK_PS_LOCKED	0x0004
=20
 /*
  * Machine-independent functions:

--IMjqdzrDRly81ofr
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.10 (FreeBSD)

iEYEARECAAYFAkrqW/8ACgkQC3+MBN1Mb4ji0QCdHnCI7qNdkGRvk84c1/STUiGc
9VoAnRViQHhckK3jz0HmdWoBWbbtI2aE
=h/98
-----END PGP SIGNATURE-----

--IMjqdzrDRly81ofr--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20091030032240.GD2147>