Skip site navigation (1)Skip section navigation (2)
Date:      Sat, 17 Feb 2018 17:39:39 -0800
From:      Mark Millard <marklmi26-fbsd@yahoo.com>
To:        FreeBSD Current <freebsd-current@freebsd.org>, FreeBSD Hackers <freebsd-hackers@freebsd.org>
Subject:   amd64 head -r329465 (non-debug build, but with symbols): "panic: spin lock held too long" during make check-old, reported during a sys_vfork
Message-ID:  <DA76F62D-3373-47CA-AD95-DE9BA580772B@yahoo.com>

next in thread | raw e-mail | index | archive | help
This is for FreeBSD running under Hyper-V on a Windows 10 Pro machine.
The FreeBSD "disk" bindings are to SSDs, not the insides of NTFS files.
29 logical processors assigned to FreeBSD (on a 32-thread Ryzen
Threadripper 1950X). No other Hyper-V use.

This happened during:

# =
~/sys_build_scripts.amd64-host/make_powerpc64vtsc_nodebug_clang_altbinutil=
s-amd64-host.sh check-old =
DESTDIR=3D/usr/obj/DESTDIRs/clang-powerpc64-installworld_altbinutils
Script started, output file is =
/root/sys_typescripts/typescript_make_powerpc64vtsc_nodebug_clang_altbinut=
ils-amd64-host-2018-02-17:15:56:20
>>> Checking for old files


(Hand typed from a picture of a window's content
at slighly different times, expect typos:)

KDB: enter: panic
[thread pid 42170 tid 100752 ]
Stopped at kdb_enter+0x3b: movq $0,kdb_why
db> call doadump
Dumping 4825 out of 110559 MB: ... (omitted) ...
Dump complete
=3D 0


(The "pid 42170" identification as the process reporting the
issue does not seem to appear in the core.txt.0 file.)


# ls -lTdt /var/crash/*
-rw-r--r--  1 root  wheel      100792 Feb 17 16:09:18 2018 =
/var/crash/core.txt.0
lrwxr-xr-x  1 root  wheel           8 Feb 17 16:09:08 2018 =
/var/crash/vmcore.last -> vmcore.0
lrwxr-xr-x  1 root  wheel           6 Feb 17 16:09:08 2018 =
/var/crash/info.last -> info.0
-rw-------  1 root  wheel  5060296704 Feb 17 16:09:08 2018 =
/var/crash/vmcore.0
-rw-------  1 root  wheel         392 Feb 17 16:08:59 2018 =
/var/crash/info.0
-rw-r--r--  1 root  wheel           2 Feb 17 16:08:59 2018 =
/var/crash/bounds
-rw-r--r--  1 root  wheel           5 Nov 22 04:34:36 2017 =
/var/crash/minfree

=46rom /var/crash/core.txt.0 :

Unread portion of the kernel message buffer:
spin lock 0xffffffff81b2dcc0 (sched lock 5) held by 0xfffff8011d936560 =
(tid 100691) too long
panic: spin lock held too long
cpuid =3D 5
time =3D 1518911834
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2b/frame =
0xfffffe00f10094d0
vpanic() at vpanic+0x18d/frame 0xfffffe00f1009530
panic() at panic+0x43/frame 0xfffffe00f1009590
_mtx_lock_indefinite_check() at _mtx_lock_indefinite_check+0x71/frame =
0xfffffe00f10095a0
thread_lock_flags_() at thread_lock_flags_+0xdb/frame 0xfffffe00f1009610
statclock_cnt() at statclock_cnt+0xdc/frame 0xfffffe00f1009650
handleevents() at handleevents+0x113/frame 0xfffffe00f10096a0
timercb() at timercb+0xa9/frame 0xfffffe00f10096f0
lapic_handle_timer() at lapic_handle_timer+0xa7/frame 0xfffffe00f1009730
timerint_u() at timerint_u+0x96/frame 0xfffffe00f1009810
thread_lock_flags_() at thread_lock_flags_+0xc1/frame 0xfffffe00f1009880
fork1() at fork1+0x1b9f/frame 0xfffffe00f1009930
sys_vfork() at sys_vfork+0x4c/frame 0xfffffe00f1009980
amd64_syscall() at amd64_syscall+0xa48/frame 0xfffffe00f1009ab0
fast_syscall_common() at fast_syscall_common+0x101/frame 0x7fffffffcfc0
KDB: enter: panic

__curthread () at ./machine/pcpu.h:230
230             __asm("movq %%gs:%1,%0" : "=3Dr" (td)
(kgdb) #0  __curthread () at ./machine/pcpu.h:230
#1  doadump (textdump=3D-2122191464) at =
/usr/src/sys/kern/kern_shutdown.c:347
#2  0xffffffff8040b42c in db_fncall_generic (addr=3D<optimized out>,=20
   rv=3D<optimized out>, nargs=3D<optimized out>, args=3D<optimized =
out>)
   at /usr/src/sys/ddb/db_command.c:609
#3  db_fncall (dummy1=3D<optimized out>, dummy2=3D<optimized out>,=20
   dummy3=3D<optimized out>, dummy4=3D<optimized out>)
   at /usr/src/sys/ddb/db_command.c:657
#4  0xffffffff8040af79 in db_command (last_cmdp=3D<optimized out>,=20
   cmd_table=3D<optimized out>, dopager=3D<optimized out>)
   at /usr/src/sys/ddb/db_command.c:481
#5  0xffffffff8040acf4 in db_command_loop ()
   at /usr/src/sys/ddb/db_command.c:534
#6  0xffffffff8040df9f in db_trap (type=3D<optimized out>, =
code=3D<optimized out>)
   at /usr/src/sys/ddb/db_main.c:250
#7  0xffffffff80b370e3 in kdb_trap (type=3D3, code=3D-61456, =
tf=3D<optimized out>)
   at /usr/src/sys/kern/subr_kdb.c:697
#8  0xffffffff80fa2c5c in trap (frame=3D0xfffffe00f1009400)
   at /usr/src/sys/amd64/amd64/trap.c:547
#9  <signal handler called>
#10 kdb_enter (why=3D0xffffffff811f280b "panic", msg=3D<optimized out>)
   at /usr/src/sys/kern/subr_kdb.c:479
#11 0xffffffff80aef17a in vpanic (fmt=3D<optimized out>, =
ap=3D0xfffffe00f1009570)
   at /usr/src/sys/kern/kern_shutdown.c:801
#12 0xffffffff80aeefc3 in panic (fmt=3D0x0)
   at /usr/src/sys/kern/kern_shutdown.c:739
#13 0xffffffff80acfa31 in _mtx_lock_indefinite_check (m=3D<optimized =
out>,=20
   ldap=3D<optimized out>) at /usr/src/sys/kern/kern_mutex.c:1224
#14 0xffffffff80acfb9b in thread_lock_flags_ (td=3D0xfffff818719d1000,=20=

   opts=3D<optimized out>, file=3D<optimized out>, line=3D<optimized =
out>)
   at /usr/src/sys/kern/kern_mutex.c:913
#15 0xffffffff80a89d6c in statclock_cnt (cnt=3D1, usermode=3D<optimized =
out>)
   at /usr/src/sys/kern/kern_clock.c:768
#16 0xffffffff810d0003 in handleevents (now=3D43230207690178, fake=3D0)
   at /usr/src/sys/kern/kern_clocksource.c:196
#17 0xffffffff810d0709 in timercb (et=3D0xffffffff81c528e8 <lapic_et>,=20=

   arg=3D<optimized out>) at /usr/src/sys/kern/kern_clocksource.c:353
#18 0xffffffff8110dad7 in lapic_handle_timer (frame=3D0xfffffe00f1009740)
   at /usr/src/sys/x86/x86/local_apic.c:1305
#19 0xffffffff80f849d0 in timerint_u ()
   at /usr/src/sys/amd64/amd64/apic_vector.S:132
#20 0xfffffe00f1009828 in ?? ()
#21 0x000000000000b6b1 in ?? ()
#22 0x0000000000002000 in ?? ()
#23 0x00000000ffffdfff in ?? ()
#24 0x00c11c08e43e7fd5 in ?? ()
#25 0x00000000000003e8 in ?? ()
#26 0x00000000fffff1eb in ?? ()
#27 0xfffffe00f1009828 in ?? ()
#28 0xfffffe00f1009810 in ?? ()
#29 0x00000000800e6d01 in ?? ()
#30 0x0000000000000064 in ?? ()
#31 0xfffff8011d936560 in ?? ()
#32 0xfffffe00f1009828 in ?? ()
#33 0xffffffff81771014 in mtx_delay ()
#34 0x0000000000000000 in ?? ()
(kgdb)=20

. . .
UID   PID  PPID CPU PRI NI   VSZ   RSS MWCHAN   STAT TT     TIME COMMAND
. . .
  0  1110  1102   0  33  0 12024  3076 ttyin    D+    -  0:00.00 [sh]
  0  1120  1044   0  20  0 18572  7936 select   Ds    -  0:00.00 [sshd]
1001  1123  1120   0  20  0 18936  8044 select   D     -  0:00.00 [sshd]
1001  1124  1123   0  34  0 12120  3196 wait     Ds    -  0:00.00 [sh]
  0  1134  1124   0  22  0 12060  3148 wait     D     -  0:00.00 [su]
  0  1135  1134   0  20  0 12312  3244 wait     D     -  0:00.00 [sh]
  0 42072  1135   0  25  0 11464  3060 wait     D+    -  0:00.00 [sh]
  0 42072  1135   0  25  0 11464  3060 wait     D+    -  0:00.00 [sh]
  0 42075 42072   0  20  0 10928  2480 select   D+    -  0:00.00 =
[script]
  0 42076 42075   0  52  0 10160  1396 wait     Ds+   -  0:00.00 [make]
  0 42108 42076   0  52  0 12236  3224 wait     D+    -  0:00.00 [make]
  0 42168 42108   0  52  0 11496  3068 wait     D+    -  0:00.00 [sh]
  0 42169 42168   0  52  0 12608  3568 pipewr   D+    -  0:00.00 [make]
  0 42170 42168   0  72  0 10956  2284 -        R+    -  0:00.00 [xargs]
  0 42171 42168   0  35  0 11500  3064 piperd   D+    -  0:00.00 [sh]
  0 46769 42170   0  72  0 10956  2284 -        ?VL+  -  0:00.00 [xargs]
. . .




Context details:

# uname -apKU
FreeBSD FBSDFSSD 12.0-CURRENT FreeBSD 12.0-CURRENT  r329465M  amd64 =
amd64 1200058 1200058

# svnlite status /usr/src | sort
?       /usr/src/sys/amd64/conf/GENERIC-DBG
?       /usr/src/sys/amd64/conf/GENERIC-NODBG
?       /usr/src/sys/arm/conf/GENERIC-DBG
?       /usr/src/sys/arm/conf/GENERIC-NODBG
?       /usr/src/sys/arm64/conf/GENERIC-DBG
?       /usr/src/sys/arm64/conf/GENERIC-NODBG
?       /usr/src/sys/powerpc/conf/GENERIC64vtsc-DBG
?       /usr/src/sys/powerpc/conf/GENERIC64vtsc-NODBG
?       /usr/src/sys/powerpc/conf/GENERICvtsc-DBG
?       /usr/src/sys/powerpc/conf/GENERICvtsc-NODBG
M       /usr/src/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
M       /usr/src/contrib/llvm/tools/lld/ELF/Arch/PPC64.cpp
M       /usr/src/crypto/openssl/crypto/armcap.c
M       /usr/src/lib/libkvm/kvm_powerpc.c
M       /usr/src/lib/libkvm/kvm_private.c
M       /usr/src/stand/defs.mk
M       /usr/src/stand/powerpc/boot1.chrp/Makefile
M       /usr/src/stand/powerpc/kboot/Makefile
M       /usr/src/sys/arm64/arm64/identcpu.c
M       /usr/src/sys/conf/kmod.mk
M       /usr/src/sys/conf/ldscript.powerpc
M       /usr/src/sys/kern/subr_pcpu.c
M       /usr/src/sys/powerpc/aim/mmu_oea64.c
M       /usr/src/sys/powerpc/ofw/ofw_machdep.c
M       /usr/src/sys/powerpc/powerpc/interrupt.c
M       /usr/src/sys/powerpc/powerpc/mp_machdep.c
M       /usr/src/sys/powerpc/powerpc/trap.c
M       /usr/src/sys/vm/uma_core.c
M       /usr/src/usr.bin/top/machine.c

The GENERIC* files include GENERIC and then set explicit
debug status choices of mine. Most of the rest is tied to
historical powerpc and powerpc64 investigations. I also
have top report the maximum swap-in-use figure that it
sees during its run.

# svnlite diff /usr/src/sys/vm/uma_core.c
Index: /usr/src/sys/vm/uma_core.c
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
--- /usr/src/sys/vm/uma_core.c	(revision 329465)
+++ /usr/src/sys/vm/uma_core.c	(working copy)
@@ -3428,7 +3428,7 @@
void
uma_reclaim_wakeup(void)
{
-
+printf("limit %lX, total %lX, needed %d\n", uma_kmem_limit, =
uma_kmem_total, uma_reclaim_needed);
	if (atomic_fetchadd_int(&uma_reclaim_needed, 1) =3D=3D 0)
		wakeup(uma_reclaim);
}



Side note: It took the automatic fsck and 2 manual fsck
runs to get back to a clean status (before I could get to
multi-user).

=3D=3D=3D
Mark Millard
marklmi at yahoo.com
( markmi at dsl-only.net is
going away in 2018-Feb, late)




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?DA76F62D-3373-47CA-AD95-DE9BA580772B>