Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 20 Jun 2009 19:15:40 +0300
From:      Kostik Belousov <kostikbel@gmail.com>
To:        Jilles Tjoelker <jilles@stack.nl>
Cc:        freebsd-arch@freebsd.org
Subject:   Re: deadlocks with intr NFS mounts and ^Z (or: PCATCH and sleepable locks)
Message-ID:  <20090620161540.GF2884@deviant.kiev.zoral.com.ua>
In-Reply-To: <20090619162328.GA79975@stack.nl>
References:  <20090619162328.GA79975@stack.nl>

next in thread | previous in thread | raw e-mail | index | archive | help

--l+goss899txtYvYf
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Content-Transfer-Encoding: quoted-printable

On Fri, Jun 19, 2009 at 06:23:28PM +0200, Jilles Tjoelker wrote:
> I have been having trouble with deadlocks with NFS mounts for a while,
> and I have found at least one way it can deadlock. It seems an issue
> with the sleep/lock system.
>=20
> NFS sleeps while holding a lockmgr lock, waiting for a reply from the
> server. When the mount is set intr, this is an interruptible sleep, so
> that interrupting signals can abort the sleep. However, this also means
> that SIGSTOP etc will suspend the thread without waking it up first, so
> it will be suspended with a lock held.
>=20
> If it holds the wrong locks, it is possible that the shell will not be
> able to run, and the process cannot be continued in the normal manner.
>=20
> Due to some other things I do not understand, it is then possible that
> the process cannot be continued at all (SIGCONT seems ignored), but in
> simple cases SIGCONT works, and things go back to normal.
>=20
> In any case, this situation is undesirable, as even 'umount -f' doesn't
> work while the thread is suspended.
>=20
> Of course, this reasoning applies to any code that goes to sleep
> interruptibly while holding a lock (sx or lockmgr). Is this supposed to
> be possible (likely useful)? If so, a third type of sleep would be
> needed that is interrupted by signals but not suspended? If not,
> something should check that it doesn't happen and NFS intr mounts may
> need to check for signals periodically or find a way to avoid sleeping
> with locks held.
>=20
> The td_locks field is only accessible for the current thread, so it
> cannot be used to check if suspending is safe.
>=20
> Also, making SIGSTOP and the like interrupt/restart syscalls is not
> acceptable unless you find some way to do it such that userland won't
> notice. For example, a read of 10 megabytes from a regular file with
> that much available must not return less then 10 megabytes.

Note that NFS does check for the signals during i/o, so you may get
short reads anyway.

I do think that the right solution both there and with SINGLE_NO_EXIT
case for thread_single is to stop at the usermode boundary instead of
suspending a thread in the interruptible sleep state.

I set error code returned from interrupted msleep() to ERESTART,
that seems to be the right thing, at least to restart the i/o that
transferred no data upon receiving SIGSTOP.

My current patch is below. It contains some not strictly related
changes, e.g. for wakeup().

diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index 5c1d553..28f4f4f 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -2310,18 +2310,22 @@ static void
 sig_suspend_threads(struct thread *td, struct proc *p, int sending)
 {
 	struct thread *td2;
+	int wakeup_swapper;
=20
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	PROC_SLOCK_ASSERT(p, MA_OWNED);
=20
+	wakeup_swapper =3D 0;
 	FOREACH_THREAD_IN_PROC(p, td2) {
 		thread_lock(td2);
 		td2->td_flags |=3D TDF_ASTPENDING | TDF_NEEDSUSPCHK;
 		if ((TD_IS_SLEEPING(td2) || TD_IS_SWAPPED(td2)) &&
-		    (td2->td_flags & TDF_SINTR) &&
-		    !TD_IS_SUSPENDED(td2)) {
-			thread_suspend_one(td2);
-		} else {
+		    (td2->td_flags & TDF_SINTR)) {
+			if (TD_IS_SUSPENDED(td2))
+				wakeup_swapper |=3D thread_unsuspend_one(td2);
+			if (TD_ON_SLEEPQ(td2) && (td2->td_flags & TDF_SINTR))
+				wakeup_swapper |=3D sleepq_abort(td2, ERESTART);
+		} else if (!TD_IS_SUSPENDED(td2)) {
 			if (sending || td !=3D td2)
 				td2->td_flags |=3D TDF_ASTPENDING;
 #ifdef SMP
@@ -2331,6 +2335,8 @@ sig_suspend_threads(struct thread *td, struct proc *p=
, int sending)
 		}
 		thread_unlock(td2);
 	}
+	if (wakeup_swapper)
+		kick_proc0();
 }
=20
 int
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index b91c1a5..d27d027 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -344,11 +344,16 @@ wakeup(void *ident)
 {
 	int wakeup_swapper;
=20
+ repeat:
 	sleepq_lock(ident);
 	wakeup_swapper =3D sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(ident);
-	if (wakeup_swapper)
-		kick_proc0();
+	if (wakeup_swapper) {
+		if (ident =3D=3D &proc0)
+			goto repeat;
+		else
+			kick_proc0();
+	}
 }
=20
 /*
@@ -361,11 +366,16 @@ wakeup_one(void *ident)
 {
 	int wakeup_swapper;
=20
+ repeat:
 	sleepq_lock(ident);
 	wakeup_swapper =3D sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
 	sleepq_release(ident);
-	if (wakeup_swapper)
-		kick_proc0();
+	if (wakeup_swapper) {
+		if (ident =3D=3D &proc0)
+			goto repeat;
+		else
+			kick_proc0();
+	}
 }
=20
 static void
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index bb8779b..800a1d1 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -504,6 +504,22 @@ thread_unlink(struct thread *td)
 	/* Must  NOT clear links to proc! */
 }
=20
+static int
+recalc_remaining(struct proc *p, int mode)
+{
+	int remaining;
+
+	if (mode =3D=3D SINGLE_EXIT)
+		remaining =3D p->p_numthreads;
+	else if (mode =3D=3D SINGLE_BOUNDARY)
+		remaining =3D p->p_numthreads - p->p_boundary_count;
+	else if (mode =3D=3D SINGLE_NO_EXIT)
+		remaining =3D p->p_numthreads - p->p_suspcount;
+	else
+		panic("recalc_remaining: wrong mode %d", mode);
+	return (remaining);
+}
+
 /*
  * Enforce single-threading.
  *
@@ -551,12 +567,7 @@ thread_single(int mode)
 	p->p_flag |=3D P_STOPPED_SINGLE;
 	PROC_SLOCK(p);
 	p->p_singlethread =3D td;
-	if (mode =3D=3D SINGLE_EXIT)
-		remaining =3D p->p_numthreads;
-	else if (mode =3D=3D SINGLE_BOUNDARY)
-		remaining =3D p->p_numthreads - p->p_boundary_count;
-	else
-		remaining =3D p->p_numthreads - p->p_suspcount;
+	remaining =3D recalc_remaining(p, mode);
 	while (remaining !=3D 1) {
 		if (P_SHOULDSTOP(p) !=3D P_STOPPED_SINGLE)
 			goto stopme;
@@ -587,18 +598,17 @@ thread_single(int mode)
 						wakeup_swapper |=3D
 						    sleepq_abort(td2, ERESTART);
 					break;
+				case SINGLE_NO_EXIT:
+					if (TD_IS_SUSPENDED(td2) &&
+					    !(td2->td_flags & TDF_BOUNDARY))
+						wakeup_swapper |=3D
+						    thread_unsuspend_one(td2);
+					if (TD_ON_SLEEPQ(td2) &&
+					    (td2->td_flags & TDF_SINTR))
+						wakeup_swapper |=3D
+						    sleepq_abort(td2, ERESTART);
+					break;
 				default:
-					if (TD_IS_SUSPENDED(td2)) {
-						thread_unlock(td2);
-						continue;
-					}
-					/*
-					 * maybe other inhibited states too?
-					 */
-					if ((td2->td_flags & TDF_SINTR) &&
-					    (td2->td_inhibitors &
-					    (TDI_SLEEPING | TDI_SWAPPED)))
-						thread_suspend_one(td2);
 					break;
 				}
 			}
@@ -611,12 +621,7 @@ thread_single(int mode)
 		}
 		if (wakeup_swapper)
 			kick_proc0();
-		if (mode =3D=3D SINGLE_EXIT)
-			remaining =3D p->p_numthreads;
-		else if (mode =3D=3D SINGLE_BOUNDARY)
-			remaining =3D p->p_numthreads - p->p_boundary_count;
-		else
-			remaining =3D p->p_numthreads - p->p_suspcount;
+		remaining =3D recalc_remaining(p, mode);
=20
 		/*
 		 * Maybe we suspended some threads.. was it enough?
@@ -630,12 +635,7 @@ stopme:
 		 * In the mean time we suspend as well.
 		 */
 		thread_suspend_switch(td);
-		if (mode =3D=3D SINGLE_EXIT)
-			remaining =3D p->p_numthreads;
-		else if (mode =3D=3D SINGLE_BOUNDARY)
-			remaining =3D p->p_numthreads - p->p_boundary_count;
-		else
-			remaining =3D p->p_numthreads - p->p_suspcount;
+		remaining =3D recalc_remaining(p, mode);
 	}
 	if (mode =3D=3D SINGLE_EXIT) {
 		/*

--l+goss899txtYvYf
Content-Type: application/pgp-signature
Content-Disposition: inline

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (FreeBSD)

iEYEARECAAYFAko9CywACgkQC3+MBN1Mb4gfMACg53hcePE93ReoIY5Fcaqgc2lI
M6EAoLeRDAJZeskPOIlNJ+4Hs6W9IsUo
=nuph
-----END PGP SIGNATURE-----

--l+goss899txtYvYf--



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20090620161540.GF2884>