Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 31 Jul 2001 13:48:37 +0200
From:      Sheldon Hearn <sheldonh@starjuice.net>
To:        current@FreeBSD.org
Subject:   Re: -current lockups 
Message-ID:  <2764.996580117@axl.seasidesoftware.co.za>
In-Reply-To: Your message of "Mon, 30 Jul 2001 16:52:27 %2B0200." <438.996504747@axl.seasidesoftware.co.za> 

next in thread | previous in thread | raw e-mail | index | archive | help


On Mon, 30 Jul 2001 16:52:27 +0200, Sheldon Hearn wrote:

> I'll be a lot happier when I can enabled DDB_UNATTENDED and do whatever
> it is that causes my panic of the day and actually get a crashdump
> instead of
> 
> 	panic: witness_restore: lock (sleep mutex) Giant not locked

Right!  We have interesting progress!

The following patchset fixes two problems:

1) The witness code may still panic when panicstr is not NULL.  This is
   a problem when the original panic was caused by the witness code
   itself.

   Solution: when panicstr != NULL, witness backs off on the fascism.

2) The ata code calls await/mawait inside a dump.  This results in a
   hard, uninterruptable lock in ad_reinit().  An NMI might interrupt
   it, but not all of us have NMI boards.

   Solution: as a quick fix, when panicstr != NULL, bail out of
   masleep() without spinning on mutexes.

   The real solution here is arguably for the ata code to be aware of
   the fact that sleeping inside a dump (i.e. panicstr != NULL) is bad
   and stop doing so.

With these fixes in place, I can get a crashdump with a populated
ktr_buf from a witness-related panic.  Joy!

Many thanks to jhb for providing the patch for #1 above and to peter for
the patch for #2 above.  None of this is actually my own work.  I'm just
the guy pressing the panic button on his box incessantly. :-)

Ciao,
Sheldon.

Index: kern/kern_mutex.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_mutex.c,v
retrieving revision 1.64
diff -u -d -r1.64 kern_mutex.c
--- kern/kern_mutex.c	2001/06/25 18:29:32	1.64
+++ kern/kern_mutex.c	2001/07/30 23:14:32
@@ -562,6 +562,9 @@
 void
 _mtx_assert(struct mtx *m, int what, const char *file, int line)
 {
+
+	if (panicstr != NULL)
+		return;
 	switch (what) {
 	case MA_OWNED:
 	case MA_OWNED | MA_RECURSED:
Index: kern/kern_synch.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v
retrieving revision 1.148
diff -u -d -r1.148 kern_synch.c
--- kern/kern_synch.c	2001/07/06 01:16:42	1.148
+++ kern/kern_synch.c	2001/07/31 01:07:23
@@ -554,6 +554,18 @@
 	KASSERT(timo > 0 || mtx_owned(&Giant) || mtx != NULL,
 	    ("sleeping without a mutex"));
 	mtx_lock_spin(&sched_lock);
+	if (cold || panicstr) {
+		/*
+		 * After a panic, or during autoconfiguration,
+		 * just give interrupts a chance, then just return;
+		 * don't run any other procs or panic below,
+		 * in case this is the idle process and already asleep.
+		 */
+		if (mtx != NULL && priority & PDROP)
+			mtx_unlock_flags(mtx, MTX_NOSWITCH);
+		mtx_unlock_spin(&sched_lock);
+		return (0);
+	}
 	DROP_GIANT_NOSWITCH();
 	if (mtx != NULL) {
 		mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
Index: kern/subr_trap.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/subr_trap.c,v
retrieving revision 1.196
diff -u -d -r1.196 subr_trap.c
--- kern/subr_trap.c	2001/07/04 15:36:30	1.196
+++ kern/subr_trap.c	2001/07/30 23:14:42
@@ -72,9 +72,9 @@
 	while ((sig = CURSIG(p)) != 0)
 		postsig(sig);
 	mtx_unlock(&Giant);
+	PROC_UNLOCK(p);
 
 	mtx_lock_spin(&sched_lock);
-	PROC_UNLOCK_NOSWITCH(p);
 	p->p_pri.pri_level = p->p_pri.pri_user;
 	if (resched_wanted(p)) {
 		/*
@@ -96,24 +96,22 @@
 		while ((sig = CURSIG(p)) != 0)
 			postsig(sig);
 		mtx_unlock(&Giant);
-		mtx_lock_spin(&sched_lock);
-		PROC_UNLOCK_NOSWITCH(p);
-	}
+		PROC_UNLOCK(p);
+	} else
+		mtx_unlock_spin(&sched_lock);
 
 	/*
 	 * Charge system time if profiling.
 	 */
-	if (p->p_sflag & PS_PROFIL) {
-		mtx_unlock_spin(&sched_lock);
+	if (p->p_sflag & PS_PROFIL)
 		addupc_task(p, TRAPF_PC(frame),
 			    (u_int)(p->p_sticks - oticks) * psratio);
-	} else
-		mtx_unlock_spin(&sched_lock);
 }
 
 /*
  * Process an asynchronous software trap.
  * This is relatively easy.
+ * This function will return with interrupts disabled.
  */
 void
 ast(framep)
@@ -121,68 +119,64 @@
 {
 	struct proc *p = CURPROC;
 	u_quad_t sticks;
+	critical_t s;
+	int sflag;
 #if defined(DEV_NPX) && !defined(SMP)
 	int ucode;
 #endif
 
 	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
-
-	/*
-	 * We check for a pending AST here rather than in the assembly as
-	 * acquiring and releasing mutexes in assembly is not fun.
-	 */
-	mtx_lock_spin(&sched_lock);
-	if (!(astpending(p) || resched_wanted(p))) {
-		mtx_unlock_spin(&sched_lock);
-		return;
-	}
-
-	sticks = p->p_sticks;
-	p->p_frame = framep;
-
-	astoff(p);
-	cnt.v_soft++;
-	mtx_intr_enable(&sched_lock);
-	if (p->p_sflag & PS_OWEUPC) {
-		p->p_sflag &= ~PS_OWEUPC;
-		mtx_unlock_spin(&sched_lock);
-		mtx_lock(&Giant);
-		addupc_task(p, p->p_stats->p_prof.pr_addr,
-			    p->p_stats->p_prof.pr_ticks);
+	s = critical_enter();
+	while (astpending(p) || resched_wanted(p)) {
+		critical_exit(s);
+		p->p_frame = framep;
+		/*
+		 * This updates the p_sflag's for the checks below in one
+		 * "atomic" operation with turning off the astpending flag.
+		 * If another AST is triggered while we are handling the
+		 * AST's saved in sflag, the astpending flag will be set and
+		 * we will loop again.
+		 */
 		mtx_lock_spin(&sched_lock);
-	}
-	if (p->p_sflag & PS_ALRMPEND) {
-		p->p_sflag &= ~PS_ALRMPEND;
+		sticks = p->p_sticks;
+		sflag = p->p_sflag;
+		p->p_sflag &= ~(PS_OWEUPC | PS_ALRMPEND | PS_PROFPEND);
+		astoff(p);
+		cnt.v_soft++;
 		mtx_unlock_spin(&sched_lock);
-		PROC_LOCK(p);
-		psignal(p, SIGVTALRM);
-		PROC_UNLOCK(p);
-		mtx_lock_spin(&sched_lock);
-	}
+		if (sflag & PS_OWEUPC)
+			addupc_task(p, p->p_stats->p_prof.pr_addr,
+				    p->p_stats->p_prof.pr_ticks);
+		if (sflag & PS_ALRMPEND) {
+			PROC_LOCK(p);
+			psignal(p, SIGVTALRM);
+			PROC_UNLOCK(p);
+		}
 #if defined(DEV_NPX) && !defined(SMP)
-	if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) {
-		PCPU_GET(curpcb)->pcb_flags &= ~PCB_NPXTRAP;
-		mtx_unlock_spin(&sched_lock);
-		ucode = npxtrap();
-		if (ucode != -1) {
-			if (!mtx_owned(&Giant))
-				mtx_lock(&Giant);
-			trapsignal(p, SIGFPE, ucode);
+		/*
+		 * XXX: Does this need a lock?  So long as npxtrap's only
+		 * happen due to a userland fault, we don't need to worry
+		 * about nested faults while in a kernel context.
+		 */
+		if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) {
+			PCPU_GET(curpcb)->pcb_flags &= ~PCB_NPXTRAP;
+			ucode = npxtrap();
+			if (ucode != -1) {
+				if (!mtx_owned(&Giant))
+					mtx_lock(&Giant);
+				trapsignal(p, SIGFPE, ucode);
+			}
 		}
-		mtx_lock_spin(&sched_lock);
-	}
 #endif
-	if (p->p_sflag & PS_PROFPEND) {
-		p->p_sflag &= ~PS_PROFPEND;
-		mtx_unlock_spin(&sched_lock);
-		PROC_LOCK(p);
-		psignal(p, SIGPROF);
-		PROC_UNLOCK(p);
-	} else
-		mtx_unlock_spin(&sched_lock);
-
-	userret(p, framep, sticks);
+		if (sflag & PS_PROFPEND) {
+			PROC_LOCK(p);
+			psignal(p, SIGPROF);
+			PROC_UNLOCK(p);
+		}
 
-	if (mtx_owned(&Giant))
-		mtx_unlock(&Giant);
+		userret(p, framep, sticks);
+		if (mtx_owned(&Giant))
+			mtx_unlock(&Giant);
+		s = critical_enter();
+	}
 }
Index: kern/subr_witness.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/subr_witness.c,v
retrieving revision 1.80
diff -u -d -r1.80 subr_witness.c
--- kern/subr_witness.c	2001/07/20 23:29:25	1.80
+++ kern/subr_witness.c	2001/07/30 23:17:52
@@ -355,7 +355,7 @@
 	if (lock_cur_cnt > lock_max_cnt)
 		lock_max_cnt = lock_cur_cnt;
 	mtx_unlock(&all_mtx);
-	if (!witness_cold && !witness_dead &&
+	if (!witness_cold && !witness_dead && panicstr == NULL &&
 	    (lock->lo_flags & LO_WITNESS) != 0)
 		lock->lo_witness = enroll(lock->lo_name, class);
 	else
@@ -469,7 +469,7 @@
 #endif /* DDB */
 
 	if (witness_cold || witness_dead || lock->lo_witness == NULL ||
-	    panicstr)
+	    panicstr != NULL)
 		return;
 	w = lock->lo_witness;
 	class = lock->lo_class;
@@ -723,7 +723,7 @@
 	int i, j;
 
 	if (witness_cold || witness_dead || lock->lo_witness == NULL ||
-	    panicstr)
+	    panicstr != NULL)
 		return;
 	p = curproc;
 	class = lock->lo_class;
@@ -821,7 +821,7 @@
 	critical_t savecrit;
 	int i, n;
 
-	if (witness_dead || panicstr)
+	if (witness_dead || panicstr != NULL)
 		return (0);
 	KASSERT(!witness_cold, ("%s: witness_cold\n", __func__));
 	n = 0;
@@ -872,7 +872,7 @@
 {
 	struct witness *w;
 
-	if (!witness_watch || witness_dead)
+	if (!witness_watch || witness_dead || panicstr != NULL)
 		return (NULL);
 
 	if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin)
@@ -1309,7 +1309,7 @@
 	struct lock_instance *instance;
 
 	KASSERT(!witness_cold, ("%s: witness_cold\n", __func__));
-	if (lock->lo_witness == NULL || witness_dead)
+	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
 		return;
 
 	KASSERT(lock->lo_class->lc_flags & LC_SLEEPLOCK,
@@ -1329,7 +1329,7 @@
 	struct lock_instance *instance;
 
 	KASSERT(!witness_cold, ("%s: witness_cold\n", __func__));
-	if (lock->lo_witness == NULL || witness_dead)
+	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
 		return;
 
 	KASSERT(lock->lo_class->lc_flags & LC_SLEEPLOCK,
@@ -1351,7 +1351,7 @@
 #ifdef INVARIANT_SUPPORT
 	struct lock_instance *instance;
 
-	if (lock->lo_witness == NULL || witness_dead)
+	if (lock->lo_witness == NULL || witness_dead || panicstr != NULL)
 		return;
 
 	if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0)

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?2764.996580117>