Date: Tue, 31 Jul 2001 13:48:37 +0200 From: Sheldon Hearn <sheldonh@starjuice.net> To: current@FreeBSD.org Subject: Re: -current lockups Message-ID: <2764.996580117@axl.seasidesoftware.co.za> In-Reply-To: Your message of "Mon, 30 Jul 2001 16:52:27 %2B0200." <438.996504747@axl.seasidesoftware.co.za>
next in thread | previous in thread | raw e-mail | index | archive | help
On Mon, 30 Jul 2001 16:52:27 +0200, Sheldon Hearn wrote: > I'll be a lot happier when I can enabled DDB_UNATTENDED and do whatever > it is that causes my panic of the day and actually get a crashdump > instead of > > panic: witness_restore: lock (sleep mutex) Giant not locked Right! We have interesting progress! The following patchset fixes two problems: 1) The witness code may still panic when panicstr is not NULL. This is a problem when the original panic was caused by the witness code itself. Solution: when panicstr != NULL, witness backs off on the fascism. 2) The ata code calls await/mawait inside a dump. This results in a hard, uninterruptable lock in ad_reinit(). An NMI might interrupt it, but not all of us have NMI boards. Solution: as a quick fix, when panicstr != NULL, bail out of masleep() without spinning on mutexes. The real solution here is arguably for the ata code to be aware of the fact that sleeping inside a dump (i.e. panicstr != NULL) is bad and stop doing so. With these fixes in place, I can get a crashdump with a populated ktr_buf from a witness-related panic. Joy! Many thanks to jhb for providing the patch for #1 above and to peter for the patch for #2 above. None of this is actually my own work. I'm just the guy pressing the panic button on his box incessantly. :-) Ciao, Sheldon. Index: kern/kern_mutex.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_mutex.c,v retrieving revision 1.64 diff -u -d -r1.64 kern_mutex.c --- kern/kern_mutex.c 2001/06/25 18:29:32 1.64 +++ kern/kern_mutex.c 2001/07/30 23:14:32 @@ -562,6 +562,9 @@ void _mtx_assert(struct mtx *m, int what, const char *file, int line) { + + if (panicstr != NULL) + return; switch (what) { case MA_OWNED: case MA_OWNED | MA_RECURSED: Index: kern/kern_synch.c =================================================================== RCS file: /home/ncvs/src/sys/kern/kern_synch.c,v retrieving revision 1.148 diff -u -d -r1.148 kern_synch.c --- kern/kern_synch.c 2001/07/06 01:16:42 1.148 +++ kern/kern_synch.c 2001/07/31 01:07:23 @@ -554,6 +554,18 @@ KASSERT(timo > 0 || mtx_owned(&Giant) || mtx != NULL, ("sleeping without a mutex")); mtx_lock_spin(&sched_lock); + if (cold || panicstr) { + /* + * After a panic, or during autoconfiguration, + * just give interrupts a chance, then just return; + * don't run any other procs or panic below, + * in case this is the idle process and already asleep. + */ + if (mtx != NULL && priority & PDROP) + mtx_unlock_flags(mtx, MTX_NOSWITCH); + mtx_unlock_spin(&sched_lock); + return (0); + } DROP_GIANT_NOSWITCH(); if (mtx != NULL) { mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED); Index: kern/subr_trap.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_trap.c,v retrieving revision 1.196 diff -u -d -r1.196 subr_trap.c --- kern/subr_trap.c 2001/07/04 15:36:30 1.196 +++ kern/subr_trap.c 2001/07/30 23:14:42 @@ -72,9 +72,9 @@ while ((sig = CURSIG(p)) != 0) postsig(sig); mtx_unlock(&Giant); + PROC_UNLOCK(p); mtx_lock_spin(&sched_lock); - PROC_UNLOCK_NOSWITCH(p); p->p_pri.pri_level = p->p_pri.pri_user; if (resched_wanted(p)) { /* @@ -96,24 +96,22 @@ while ((sig = CURSIG(p)) != 0) postsig(sig); mtx_unlock(&Giant); - mtx_lock_spin(&sched_lock); - PROC_UNLOCK_NOSWITCH(p); - } + PROC_UNLOCK(p); + } else + mtx_unlock_spin(&sched_lock); /* * Charge system time if profiling. */ - if (p->p_sflag & PS_PROFIL) { - mtx_unlock_spin(&sched_lock); + if (p->p_sflag & PS_PROFIL) addupc_task(p, TRAPF_PC(frame), (u_int)(p->p_sticks - oticks) * psratio); - } else - mtx_unlock_spin(&sched_lock); } /* * Process an asynchronous software trap. * This is relatively easy. + * This function will return with interrupts disabled. */ void ast(framep) @@ -121,68 +119,64 @@ { struct proc *p = CURPROC; u_quad_t sticks; + critical_t s; + int sflag; #if defined(DEV_NPX) && !defined(SMP) int ucode; #endif KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); - - /* - * We check for a pending AST here rather than in the assembly as - * acquiring and releasing mutexes in assembly is not fun. - */ - mtx_lock_spin(&sched_lock); - if (!(astpending(p) || resched_wanted(p))) { - mtx_unlock_spin(&sched_lock); - return; - } - - sticks = p->p_sticks; - p->p_frame = framep; - - astoff(p); - cnt.v_soft++; - mtx_intr_enable(&sched_lock); - if (p->p_sflag & PS_OWEUPC) { - p->p_sflag &= ~PS_OWEUPC; - mtx_unlock_spin(&sched_lock); - mtx_lock(&Giant); - addupc_task(p, p->p_stats->p_prof.pr_addr, - p->p_stats->p_prof.pr_ticks); + s = critical_enter(); + while (astpending(p) || resched_wanted(p)) { + critical_exit(s); + p->p_frame = framep; + /* + * This updates the p_sflag's for the checks below in one + * "atomic" operation with turning off the astpending flag. + * If another AST is triggered while we are handling the + * AST's saved in sflag, the astpending flag will be set and + * we will loop again. + */ mtx_lock_spin(&sched_lock); - } - if (p->p_sflag & PS_ALRMPEND) { - p->p_sflag &= ~PS_ALRMPEND; + sticks = p->p_sticks; + sflag = p->p_sflag; + p->p_sflag &= ~(PS_OWEUPC | PS_ALRMPEND | PS_PROFPEND); + astoff(p); + cnt.v_soft++; mtx_unlock_spin(&sched_lock); - PROC_LOCK(p); - psignal(p, SIGVTALRM); - PROC_UNLOCK(p); - mtx_lock_spin(&sched_lock); - } + if (sflag & PS_OWEUPC) + addupc_task(p, p->p_stats->p_prof.pr_addr, + p->p_stats->p_prof.pr_ticks); + if (sflag & PS_ALRMPEND) { + PROC_LOCK(p); + psignal(p, SIGVTALRM); + PROC_UNLOCK(p); + } #if defined(DEV_NPX) && !defined(SMP) - if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) { - PCPU_GET(curpcb)->pcb_flags &= ~PCB_NPXTRAP; - mtx_unlock_spin(&sched_lock); - ucode = npxtrap(); - if (ucode != -1) { - if (!mtx_owned(&Giant)) - mtx_lock(&Giant); - trapsignal(p, SIGFPE, ucode); + /* + * XXX: Does this need a lock? So long as npxtrap's only + * happen due to a userland fault, we don't need to worry + * about nested faults while in a kernel context. + */ + if (PCPU_GET(curpcb)->pcb_flags & PCB_NPXTRAP) { + PCPU_GET(curpcb)->pcb_flags &= ~PCB_NPXTRAP; + ucode = npxtrap(); + if (ucode != -1) { + if (!mtx_owned(&Giant)) + mtx_lock(&Giant); + trapsignal(p, SIGFPE, ucode); + } } - mtx_lock_spin(&sched_lock); - } #endif - if (p->p_sflag & PS_PROFPEND) { - p->p_sflag &= ~PS_PROFPEND; - mtx_unlock_spin(&sched_lock); - PROC_LOCK(p); - psignal(p, SIGPROF); - PROC_UNLOCK(p); - } else - mtx_unlock_spin(&sched_lock); - - userret(p, framep, sticks); + if (sflag & PS_PROFPEND) { + PROC_LOCK(p); + psignal(p, SIGPROF); + PROC_UNLOCK(p); + } - if (mtx_owned(&Giant)) - mtx_unlock(&Giant); + userret(p, framep, sticks); + if (mtx_owned(&Giant)) + mtx_unlock(&Giant); + s = critical_enter(); + } } Index: kern/subr_witness.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_witness.c,v retrieving revision 1.80 diff -u -d -r1.80 subr_witness.c --- kern/subr_witness.c 2001/07/20 23:29:25 1.80 +++ kern/subr_witness.c 2001/07/30 23:17:52 @@ -355,7 +355,7 @@ if (lock_cur_cnt > lock_max_cnt) lock_max_cnt = lock_cur_cnt; mtx_unlock(&all_mtx); - if (!witness_cold && !witness_dead && + if (!witness_cold && !witness_dead && panicstr == NULL && (lock->lo_flags & LO_WITNESS) != 0) lock->lo_witness = enroll(lock->lo_name, class); else @@ -469,7 +469,7 @@ #endif /* DDB */ if (witness_cold || witness_dead || lock->lo_witness == NULL || - panicstr) + panicstr != NULL) return; w = lock->lo_witness; class = lock->lo_class; @@ -723,7 +723,7 @@ int i, j; if (witness_cold || witness_dead || lock->lo_witness == NULL || - panicstr) + panicstr != NULL) return; p = curproc; class = lock->lo_class; @@ -821,7 +821,7 @@ critical_t savecrit; int i, n; - if (witness_dead || panicstr) + if (witness_dead || panicstr != NULL) return (0); KASSERT(!witness_cold, ("%s: witness_cold\n", __func__)); n = 0; @@ -872,7 +872,7 @@ { struct witness *w; - if (!witness_watch || witness_dead) + if (!witness_watch || witness_dead || panicstr != NULL) return (NULL); if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) @@ -1309,7 +1309,7 @@ struct lock_instance *instance; KASSERT(!witness_cold, ("%s: witness_cold\n", __func__)); - if (lock->lo_witness == NULL || witness_dead) + if (lock->lo_witness == NULL || witness_dead || panicstr != NULL) return; KASSERT(lock->lo_class->lc_flags & LC_SLEEPLOCK, @@ -1329,7 +1329,7 @@ struct lock_instance *instance; KASSERT(!witness_cold, ("%s: witness_cold\n", __func__)); - if (lock->lo_witness == NULL || witness_dead) + if (lock->lo_witness == NULL || witness_dead || panicstr != NULL) return; KASSERT(lock->lo_class->lc_flags & LC_SLEEPLOCK, @@ -1351,7 +1351,7 @@ #ifdef INVARIANT_SUPPORT struct lock_instance *instance; - if (lock->lo_witness == NULL || witness_dead) + if (lock->lo_witness == NULL || witness_dead || panicstr != NULL) return; if ((lock->lo_class->lc_flags & LC_SLEEPLOCK) != 0) To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-current" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?2764.996580117>