Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 13 Jun 2005 10:55:33 -0400
From:      "Richard Legault" <rlegault@SANDVINE.com>
To:        <freebsd-stable@freebsd.org>
Subject:   reboot panic kernel 5.3
Message-ID:  <71837C040963F748B9B94E123A28967853029A@mailserver.sandvine.com>

next in thread | raw e-mail | index | archive | help
		Kernel is 5.3  +  Sandvine modifications

		I have uncovered a race condition during reboot, as the system is =
going down it kernel panics.
		This problem is reproducible on my system, it occurs approx 1 out of =
every 20 reboots.

		Stack Trace and variables of interest.
		#0  doadump () at pcpu.h:159
		#1  0xa05924a2 in boot (howto=3D260) at =
/usr/src/sys/kern/kern_shutdown.c:421
		#2  0xa05928a0 in panic (fmt=3D0xa076ff74 "%s") at =
/usr/src/sys/kern/kern_shutdown.c:584
		#3  0xa073853f in trap_fatal (frame=3D0xcdb54c1c, eva=3D0) at =
/usr/src/sys/i386/i386/trap.c:829
		#4  0xa0738215 in trap_pfault (frame=3D0xcdb54c1c, usermode=3D0, =
eva=3D8) at /usr/src/sys/i386/i386/trap.c:746
		#5  0xa0737d9e in trap (frame=3D
		      {tf_fs =3D 24, tf_es =3D 16, tf_ds =3D -1520304112, tf_edi =3D =
15, tf_esi =3D -1511925548, tf_ebp =3D -843756432, tf_isp =3D =
-843756472, tf_ebx =3D -1516062592
		0, tf_ecx =3D -1516062592, tf_eax =3D 0, tf_trapno =3D 12, tf_err =3D =
0, tf_eip =3D -1604789009, tf_cs =3D 8, tf_eflags =3D 66118, tf_esp =3D =
-1516062592, tf_ss =3D 0}
		    at /usr/src/sys/i386/i386/trap.c:436
		#6  0xa0723d8a in calltrap () at =
/usr/src/sys/i386/i386/exception.s:202
		#7  0x00000018 in ?? ()
		#8  0x00000010 in ?? ()
		#9  0xa5620010 in ?? ()
		#10 0x0000000f in ?? ()
		#11 0xa5e1d8d4 in ?? ()
		#12 0xcdb54c70 in ?? ()
		#13 0xcdb54c48 in ?? ()
		#14 0xa5a2b880 in ?? ()
		#15 0x00000000 in ?? ()
		#16 0xa5a2b880 in ?? ()
		#17 0x00000000 in ?? ()
		#18 0x0000000c in ?? ()
		#19 0x00000000 in ?? ()
		#20 0xa058dcef in cr_cansignal (cred=3D0xa5a2b880, proc=3D0xa5e1d8d4, =
signum=3D15) at /usr/src/sys/kern/kern_prot.c:1495
		#21 0xa058dd87 in p_cansignal (td=3D0xa5789c80, p=3D0xa5e1d8d4, =
signum=3D15) at /usr/src/sys/kern/kern_prot.c:1535
		#22 0xa0595192 in killpg1 (td=3D0xa5789c80, sig=3D15, =
pgid=3D-1511925548, all=3D1) at /usr/src/sys/kern/kern_sig.c:1321
		#23 0xa059553e in kill (td=3D0xa5789c80, uap=3D0xcdb54d14) at =
/usr/src/sys/kern/kern_sig.c:1398
		#24 0xa07388db in syscall (frame=3D
		      {tf_fs =3D 47, tf_es =3D 47, tf_ds =3D 47, tf_edi =3D 0, tf_esi =
=3D 1, tf_ebp =3D -1614811884, tf_isp =3D -843756172, tf_ebx =3D =
1746232072, tf_edx =3D 2, tf_ecx
		x =3D 37, tf_trapno =3D 12, tf_err =3D 2, tf_eip =3D 1745696235, tf_cs =
=3D 31, tf_eflags =3D 642, tf_esp =3D -1614811972, tf_ss =3D 47})
		    at /usr/src/sys/i386/i386/trap.c:1021
		#25 0xa0723ddf in Xint0x80_syscall () at =
/usr/src/sys/i386/i386/exception.s:263



		The code where the crash occurred in cr_cansignal

			if (cred->cr_ruid !=3D proc->p_ucred->cr_ruid &&
			    cred->cr_ruid !=3D proc->p_ucred->cr_svuid &&
			    cred->cr_uid !=3D proc->p_ucred->cr_ruid &&
			    cred->cr_uid !=3D proc->p_ucred->cr_svuid) {
				/* Not permitted without privilege. */
				error =3D suser_cred(cred, SUSER_ALLOWJAIL);
				if (error)
					return (error);
			}


		(kgdb) p *cred
		$2 =3D {cr_ref =3D 2614, cr_uid =3D 0, cr_ruid =3D 0, cr_svuid =3D 0, =
cr_ngroups =3D 3, cr_groups =3D {0, 0, 5, 0 <repeats 13 times>}, cr_rgid =
=3D 0, cr_svgid =3D 0,
		  cr_uidinfo =3D 0xa5620740, cr_ruidinfo =3D 0xa5620740, cr_prison =3D =
0x0, cr_label =3D 0x0, cr_mtxp =3D 0xa560946c}
		(kgdb) p *proc
		$3 =3D {p_list =3D {le_next =3D 0xa5b0154c, le_prev =3D 0xa07edc64}, =
p_ksegrps =3D {tqh_first =3D 0xa56fa620, tqh_last =3D 0xa56fa624}, =
p_threads =3D {
		    tqh_first =3D 0xa5b51e10, tqh_last =3D 0xa5b51e18}, p_suspended =
=3D {tqh_first =3D 0x0, tqh_last =3D 0xa5e1d8ec}, p_ucred =3D 0x0, p_fd =
=3D 0x0, p_fdtol =3D 0x0,
		  p_stats =3D 0xd0019000, p_limit =3D 0x0, p_upages_obj =3D =
0xa5b1cc60, p_sigacts =3D 0x0, p_flag =3D 24576, p_sflag =3D 1, p_state =
=3D PRS_NEW, p_pid =3D 1465, p_ha
		    le_next =3D 0x0, le_prev =3D 0xa561b6e4}, p_pglist =3D {le_next =
=3D 0xa5b4cc5c, le_prev =3D 0xa5b4b054}, p_pptr =3D 0xa5b4b000, =
p_sibling =3D {le_next =3D 0x0,
		    le_prev =3D 0xa5b4b068}, p_children =3D {lh_first =3D 0x0}, p_mtx =
=3D {mtx_object =3D {lo_class =3D 0xa07c19dc, lo_name =3D 0xa0789fa5 =
"process lock",
		      lo_type =3D 0xa0789fa5 "process lock", lo_flags =3D 4390912, =
lo_list =3D {tqe_next =3D 0x0, tqe_prev =3D 0x0}, lo_witness =3D 0x0}, =
mtx_lock =3D 2776145026,
		    mtx_recurse =3D 0}, p_oppid =3D 0, p_vmspace =3D 0x0, p_swtime =3D =
9, p_realtimer =3D {it_interval =3D {tv_sec =3D 0, tv_usec =3D 0}, =
it_value =3D {tv_sec =3D 0,
		      tv_usec =3D 0}}, p_runtime =3D {sec =3D 0, frac =3D =
29866236321160512}, p_uu =3D 0, p_su =3D 1590, p_iu =3D 0, p_uticks =3D =
0, p_sticks =3D 0, p_iticks =3D 0,
		  p_profthreads =3D 0, p_maxthrwaits =3D 0, p_traceflag =3D 0, =
p_tracevp =3D 0x0, p_tracecred =3D 0x0, p_textvp =3D 0x0, p_siglist =3D =
{__bits =3D {0, 0, 0, 0}},
		  p_lock =3D 0 '\0', p_sigiolst =3D {slh_first =3D 0x0}, p_sigparent =
=3D 20, p_sig =3D 0, p_code =3D 0, p_stops =3D 0, p_stype =3D 0, p_step =
=3D 0 '\0', p_pfsflags =3D 0
		  p_nlminfo =3D 0x0, p_aioinfo =3D 0x0, p_singlethread =3D 0x0, =
p_suspcount =3D 0, p_xthread =3D 0xa5b51e10, p_boundary_count =3D 0, =
p_magic =3D 3203398350,
		  p_comm =3D "sleep\000r", '\0' <repeats 12 times>, p_pgrp =3D 0x0, =
p_sysent =3D 0xa07dae20, p_args =3D 0x0, p_cpulimit =3D =
9223372036854775807, p_nice =3D 0 '\0
		  p_xstat =3D 0, p_klist =3D {kl_lock =3D 0xa5e1d940, kl_list =3D =
{slh_first =3D 0x0}}, p_numthreads =3D 1, p_numksegrps =3D 1, p_md =3D =
{md_ldt =3D 0x0}, p_itcallout
		    c_links =3D {sle =3D {sle_next =3D 0x0}, tqe =3D {tqe_next =3D =
0x0, tqe_prev =3D 0x0}}, c_time =3D 0, c_arg =3D 0x0, c_func =3D 0, =
c_flags =3D 8}, p_uarea =3D 0xd00190
		  p_acflag =3D 0, p_ru =3D 0x0, p_peers =3D 0x0, p_leader =3D =
0xa5e1d8d4, p_emuldata =3D 0x0, p_label =3D 0x0, p_sched =3D 0xa5e1da98}
		(kgdb)


		Notice the value of proc->ucred
		(kgdb) p proc->p_ucred
		$4 =3D (struct ucred *) 0x0
		(kgdb)

		Thus the crash.
		Somehow the p_ucred has been nulled during this routine.
		At the time of the crash the following variables had theses values
		cred->jail=3D0=20
		see_other_uids=3D1
		see_other_gids=3D1
		thus proc->p_ucred is not used before the crash.

		Uncertain where the race condition could reside.


		I noticed in function kern_wait() in kern_exit.c
		that the setting p_p_ucred=3DNULL was not protected.
		I added PROC_LOCK(p) and PROC_UNLOCK(p) around the call=20
		but the panic still occurred.

		This is a critical issue for us and I am willing to assist in anyway =
that I can.


Richard Legault
Senior Engineer
519-880-2400 ext 2722
www.sandvine.com





Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?71837C040963F748B9B94E123A28967853029A>