Date: Tue, 4 May 2021 20:38:39 +0200 From: Michael Schmiedgen <schmiedgen@gmx.net> To: Mark Johnston <markj@freebsd.org> Cc: freebsd-net@freebsd.org Subject: Re: page fault while in kernel mode - after upgrade from 12.2 to 13.0 Message-ID: <d37716a3-927d-b200-c805-b31d7b36383d@gmx.net> In-Reply-To: <YJBS8YMZFkMtWPEu@nuc> References: <d7c3bfbd-2e54-c0f4-ec23-5dab08287ea3@gmx.net> <YJBS8YMZFkMtWPEu@nuc>
next in thread | previous in thread | raw e-mail | index | archive | help
Hi Mark, sorry for the delay, I only can test after work. I triggered another 2 pan= ics, this time with a different result (see below). Can I provide some more information? Thank you! Michael =2D-- #1 Fatal trap 12: page fault while in kernel mode cpuid =3D 1; apic id =3D 01 fault virtual address =3D 0x388 fault code =3D supervisor read data, page not present instruction pointer =3D 0x20:0xffffffff80d3fa67 stack pointer =3D 0x28:0xfffffe0115bea9c0 frame pointer =3D 0x28:0xfffffe0115beaa20 code segment =3D base 0x0, limit 0xfffff, type 0x1b =3D DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags =3D interrupt enabled, resume, IOPL =3D 0 current process =3D 12 (swi1: netisr 0) trap number =3D 12 panic: page fault cpuid =3D 1 time =3D 1620144777 KDB: stack backtrace: #0 0xffffffff80c57345 at kdb_backtrace+0x65 #1 0xffffffff80c09d21 at vpanic+0x181 #2 0xffffffff80c09b93 at panic+0x43 #3 0xffffffff8108b187 at trap_fatal+0x387 #4 0xffffffff8108b1df at trap_pfault+0x4f #5 0xffffffff8108a83d at trap+0x27d #6 0xffffffff810617a8 at calltrap+0x8 #7 0xffffffff80bcae5d at ithread_loop+0x24d #8 0xffffffff80bc7c5e at fork_exit+0x7e #9 0xffffffff8106282e at fork_trampoline+0xe Uptime: 3m51s Dumping 2617 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%.= .91% __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 55 __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" (offsetof(str= uct pcpu, (kgdb) list *0xffffffff80d3fa67 0xffffffff80d3fa67 is in swi_net (/usr/src/sys/net/netisr.c:918). 913 if (local_npw.nw_head =3D=3D NULL) 914 local_npw.nw_tail =3D NULL; 915 local_npw.nw_len--; 916 VNET_ASSERT(m->m_pkthdr.rcvif !=3D NULL, 917 ("%s:%d rcvif =3D=3D NULL: m=3D%p", __func__, = __LINE__, m)); 918 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 919 netisr_proto[proto].np_handler(m); 920 CURVNET_RESTORE(); 921 } 922 KASSERT(local_npw.nw_len =3D=3D 0, (kgdb) backtrace #0 __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 #1 doadump (textdump=3D<optimized out>) at /usr/src/sys/kern/kern_shutdow= n.c:399 #2 0xffffffff80c09916 in kern_reboot (howto=3D260) at /usr/src/sys/kern/k= ern_shutdown.c:486 #3 0xffffffff80c09d90 in vpanic (fmt=3D<optimized out>, ap=3D<optimized o= ut>) at /usr/src/sys/kern/kern_shutdown.c:919 #4 0xffffffff80c09b93 in panic (fmt=3D<unavailable>) at /usr/src/sys/kern= /kern_shutdown.c:843 #5 0xffffffff8108b187 in trap_fatal (frame=3D0xfffffe0115bea900, eva=3D90= 4) at /usr/src/sys/amd64/amd64/trap.c:915 #6 0xffffffff8108b1df in trap_pfault (frame=3Dframe@entry=3D0xfffffe0115b= ea900, usermode=3Dfalse, signo=3D<optimized out>, signo@entry=3D0x0, ucode= =3D<optimized out>, ucode@entry=3D0x0) at /usr/src/sys/amd64/amd64/trap.c:732 #7 0xffffffff8108a83d in trap (frame=3D0xfffffe0115bea900) at /usr/src/sy= s/amd64/amd64/trap.c:398 #8 <signal handler called> #9 0xffffffff80d3fa67 in netisr_process_workstream_proto (nwsp=3D<optimiz= ed out>, proto=3D1) at /usr/src/sys/net/netisr.c:918 #10 swi_net (arg=3D<optimized out>) at /usr/src/sys/net/netisr.c:966 #11 0xffffffff80bcae5d in intr_event_execute_handlers (p=3D<optimized out>= , ie=3D0xfffff80003dbb600) at /usr/src/sys/kern/kern_intr.c:1168 #12 ithread_execute_handlers (p=3D<optimized out>, ie=3D0xfffff80003dbb600= ) at /usr/src/sys/kern/kern_intr.c:1181 #13 ithread_loop (arg=3Darg@entry=3D0xfffff80003dced40) at /usr/src/sys/ke= rn/kern_intr.c:1269 #14 0xffffffff80bc7c5e in fork_exit (callout=3D0xffffffff80bcac10 <ithread= _loop>, arg=3D0xfffff80003dced40, frame=3D0xfffffe0115beab00) at /usr/src/sys/kern/kern_fork.c:1069 #15 <signal handler called> =2D-- #2 Fatal trap 12: page fault while in kernel mode cpuid =3D 1; apic id =3D 01 fault virtual address =3D 0x8 fault code =3D supervisor read data, page not present instruction pointer =3D 0x20:0xffffffff80ca599c stack pointer =3D 0x28:0xfffffe0115bea6c0 frame pointer =3D 0x28:0xfffffe0115bea700 code segment =3D base 0x0, limit 0xfffff, type 0x1b =3D DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags =3D interrupt enabled, resume, IOPL =3D 0 current process =3D 12 (swi1: netisr 0) trap number =3D 12 panic: page fault cpuid =3D 1 time =3D 1620152374 KDB: stack backtrace: #0 0xffffffff80c57345 at kdb_backtrace+0x65 #1 0xffffffff80c09d21 at vpanic+0x181 #2 0xffffffff80c09b93 at panic+0x43 #3 0xffffffff8108b187 at trap_fatal+0x387 #4 0xffffffff8108b1df at trap_pfault+0x4f #5 0xffffffff8108a83d at trap+0x27d #6 0xffffffff810617a8 at calltrap+0x8 #7 0xffffffff80dbf0ae at tcp_do_segment+0x10ce #8 0xffffffff80dbd21e at tcp_input+0xabe #9 0xffffffff80dafc15 at ip_input+0x125 #10 0xffffffff80d3fa7b at swi_net+0x12b #11 0xffffffff80bcae5d at ithread_loop+0x24d #12 0xffffffff80bc7c5e at fork_exit+0x7e #13 0xffffffff8106282e at fork_trampoline+0xe Uptime: 2h3m59s Dumping 2666 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%.= .91% __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 55 __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" (offsetof(str= uct pcpu, (kgdb) #0 __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 #1 doadump (textdump=3D<optimized out>) at /usr/src/sys/kern/kern_shutdown.c:399 #2 0xffffffff80c09916 in kern_reboot (howto=3D260) at /usr/src/sys/kern/kern_shutdown.c:486 #3 0xffffffff80c09d90 in vpanic (fmt=3D<optimized out>, ap=3D<optimized o= ut>) at /usr/src/sys/kern/kern_shutdown.c:919 #4 0xffffffff80c09b93 in panic (fmt=3D<unavailable>) at /usr/src/sys/kern/kern_shutdown.c:843 #5 0xffffffff8108b187 in trap_fatal (frame=3D0xfffffe0115bea600, eva=3D8) at /usr/src/sys/amd64/amd64/trap.c:915 #6 0xffffffff8108b1df in trap_pfault (frame=3Dframe@entry=3D0xfffffe0115b= ea600, usermode=3Dfalse, signo=3D<optimized out>, signo@entry=3D0x0, ucode=3D<optimized out>, ucode@entry=3D0x0) at /usr/src/sys/amd64/amd64/trap.c:732 #7 0xffffffff8108a83d in trap (frame=3D0xfffffe0115bea600) at /usr/src/sys/amd64/amd64/trap.c:398 #8 <signal handler called> #9 sbcut_internal (sb=3D0xfffff80522aa09c0, len=3D203, len@entry=3D476) at /usr/src/sys/kern/uipc_sockbuf.c:1491 #10 0xffffffff80ca5b8a in sbcut_locked (sb=3D0xfffff80522aa09c0, len=3D-743943424, len@entry=3D476) at /usr/src/sys/kern/uipc_sockbuf.= c:1591 #11 0xffffffff80dbf0ae in tcp_do_segment (m=3D0xfffff8004c2aae00, th=3D<optimized out>, so=3D<optimized out>, tp=3D<optimized out>, drop_hdrlen=3D52, tlen=3D<optimized out>, iptos=3D0 '\000') at /usr/src/sys/netinet/tcp_input.c:2918 #12 0xffffffff80dbd21e in tcp_input (mp=3D<optimized out>, offp=3D<optimized out>, proto=3D<optimized out>) at /usr/src/sys/netinet/tcp_input.c:1382 #13 0xffffffff80dafc15 in ip_input (m=3D0x0) at /usr/src/sys/netinet/ip_input.c:829 #14 0xffffffff80d3fa7b in netisr_process_workstream_proto ( nwsp=3D<optimized out>, proto=3D1) at /usr/src/sys/net/netisr.c:919 #15 swi_net (arg=3D<optimized out>) at /usr/src/sys/net/netisr.c:966 #16 0xffffffff80bcae5d in intr_event_execute_handlers (p=3D<optimized out>= , ie=3D0xfffff80003bbe500) at /usr/src/sys/kern/kern_intr.c:1168 #17 ithread_execute_handlers (p=3D<optimized out>, ie=3D0xfffff80003bbe500= ) at /usr/src/sys/kern/kern_intr.c:1181 #18 ithread_loop (arg=3Darg@entry=3D0xfffff80003cb6d40) at /usr/src/sys/kern/kern_intr.c:1269 #19 0xffffffff80bc7c5e in fork_exit ( callout=3D0xffffffff80bcac10 <ithread_loop>, arg=3D0xfffff80003cb6d40= , frame=3D0xfffffe0115beab00) at /usr/src/sys/kern/kern_fork.c:1069 #20 <signal handler called> =2D-- On 03.05.2021 21:45, Mark Johnston wrote: > On Mon, May 03, 2021 at 08:04:30PM +0200, Michael Schmiedgen wrote: >> Hi List, >> >> if I start a Samba jail, after a few seconds the system crashes. Very r= eproducible. >> >> System has ~10 jails and 3 bhyve VMs. Dell server, Xeon E3-1240, 64GB R= AM, 3 way mirror ZFS. >> >> It also occurs a few seconds after I start a phone call using the SIP V= M of that machine, >> very strange. >> >> I got some log messages suggesting raising somaxconn, so I did >> >> kern.ipc.somaxconn=3D4096 >> >> in sysctl.conf >> >> >> Below some debug information, please let me know if I should provide fu= rther information. >> >> Should I open a bug or something? >> >> Thank you very much! >> Michael >> >> >> >> Fatal trap 12: page fault while in kernel mode >> cpuid =3D 0; apic id =3D 00 >> fault virtual address =3D 0x0 >> fault code =3D supervisor read data, page not present >> instruction pointer =3D 0x20:0xffffffff80ca52c0 >> stack pointer =3D 0x28:0xfffffe019d039650 >> frame pointer =3D 0x28:0xfffffe019d039690 >> code segment =3D base 0x0, limit 0xfffff, type 0x1b >> =3D DPL 0, pres 1, long 1, def32 0, gran 1 >> processor eflags =3D interrupt enabled, resume, IOPL =3D 0 >> current process =3D 649 (devd) >> trap number =3D 12 >> panic: page fault >> cpuid =3D 0 >> time =3D 1620061253 >> KDB: stack backtrace: >> #0 0xffffffff80c57345 at kdb_backtrace+0x65 >> #1 0xffffffff80c09d21 at vpanic+0x181 >> #2 0xffffffff80c09b93 at panic+0x43 >> #3 0xffffffff8108b187 at trap_fatal+0x387 >> #4 0xffffffff8108b1df at trap_pfault+0x4f >> #5 0xffffffff8108a83d at trap+0x27d >> #6 0xffffffff810617a8 at calltrap+0x8 >> #7 0xffffffff80ca51c3 at sbappendaddr_locked+0x93 >> #8 0xffffffff80cb437a at uipc_send+0x73a >> #9 0xffffffff80ca9053 at sosend_generic+0x633 >> #10 0xffffffff80ca94e0 at sosend+0x50 >> #11 0xffffffff80caff2e at kern_sendit+0x20e >> #12 0xffffffff80cb032b at sendit+0x1db >> #13 0xffffffff80cb013d at sys_sendto+0x4d >> #14 0xffffffff8108ba8c at amd64_syscall+0x10c >> #15 0xffffffff810620ce at fast_syscall_common+0xf8 >> Uptime: 2m2s >> Dumping 2373 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..8= 1%..91% >> >> __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 >> 55 __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" (offsetof(= struct pcpu, >> (kgdb) list *0xffffffff80ca52c0 >> 0xffffffff80ca52c0 is in sbappendaddr_locked_internal (/usr/src/sys/ker= n/uipc_sockbuf.c:1169). >> 1164 if (ctrl_last) >> 1165 ctrl_last->m_next =3D m0; /* concatenate data t= o control */ >> 1166 else >> 1167 control =3D m0; >> 1168 m->m_next =3D control; >> 1169 for (n =3D m; n->m_next !=3D NULL; n =3D n->m_next) >> 1170 sballoc(sb, n); >> 1171 sballoc(sb, n); >> 1172 nlast =3D n; >> 1173 SBLINKRECORD(sb, m); > > So we are crashing because "n" is somehow NULL? That seems difficult to > explain. Can you show the local variables in this frame? > > Does the panic always have the same stack trace? >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?d37716a3-927d-b200-c805-b31d7b36383d>