Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 4 May 2021 20:38:39 +0200
From:      Michael Schmiedgen <schmiedgen@gmx.net>
To:        Mark Johnston <markj@freebsd.org>
Cc:        freebsd-net@freebsd.org
Subject:   Re: page fault while in kernel mode - after upgrade from 12.2 to 13.0
Message-ID:  <d37716a3-927d-b200-c805-b31d7b36383d@gmx.net>
In-Reply-To: <YJBS8YMZFkMtWPEu@nuc>
References:  <d7c3bfbd-2e54-c0f4-ec23-5dab08287ea3@gmx.net> <YJBS8YMZFkMtWPEu@nuc>

next in thread | previous in thread | raw e-mail | index | archive | help
Hi Mark,

sorry for the delay, I only can test after work. I triggered another 2 pan=
ics, this time
with a different result (see below). Can I provide some more information?

Thank you!
   Michael



=2D-- #1


Fatal trap 12: page fault while in kernel mode
cpuid =3D 1; apic id =3D 01
fault virtual address   =3D 0x388
fault code              =3D supervisor read data, page not present
instruction pointer     =3D 0x20:0xffffffff80d3fa67
stack pointer           =3D 0x28:0xfffffe0115bea9c0
frame pointer           =3D 0x28:0xfffffe0115beaa20
code segment            =3D base 0x0, limit 0xfffff, type 0x1b
                         =3D DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        =3D interrupt enabled, resume, IOPL =3D 0
current process         =3D 12 (swi1: netisr 0)
trap number             =3D 12
panic: page fault
cpuid =3D 1
time =3D 1620144777
KDB: stack backtrace:
#0 0xffffffff80c57345 at kdb_backtrace+0x65
#1 0xffffffff80c09d21 at vpanic+0x181
#2 0xffffffff80c09b93 at panic+0x43
#3 0xffffffff8108b187 at trap_fatal+0x387
#4 0xffffffff8108b1df at trap_pfault+0x4f
#5 0xffffffff8108a83d at trap+0x27d
#6 0xffffffff810617a8 at calltrap+0x8
#7 0xffffffff80bcae5d at ithread_loop+0x24d
#8 0xffffffff80bc7c5e at fork_exit+0x7e
#9 0xffffffff8106282e at fork_trampoline+0xe
Uptime: 3m51s
Dumping 2617 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%.=
.91%

__curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
55              __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" (offsetof(str=
uct pcpu,
(kgdb) list *0xffffffff80d3fa67
0xffffffff80d3fa67 is in swi_net (/usr/src/sys/net/netisr.c:918).
913                     if (local_npw.nw_head =3D=3D NULL)
914                             local_npw.nw_tail =3D NULL;
915                     local_npw.nw_len--;
916                     VNET_ASSERT(m->m_pkthdr.rcvif !=3D NULL,
917                         ("%s:%d rcvif =3D=3D NULL: m=3D%p", __func__, =
__LINE__, m));
918                     CURVNET_SET(m->m_pkthdr.rcvif->if_vnet);
919                     netisr_proto[proto].np_handler(m);
920                     CURVNET_RESTORE();
921             }
922             KASSERT(local_npw.nw_len =3D=3D 0,
(kgdb) backtrace
#0  __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
#1  doadump (textdump=3D<optimized out>) at /usr/src/sys/kern/kern_shutdow=
n.c:399
#2  0xffffffff80c09916 in kern_reboot (howto=3D260) at /usr/src/sys/kern/k=
ern_shutdown.c:486
#3  0xffffffff80c09d90 in vpanic (fmt=3D<optimized out>, ap=3D<optimized o=
ut>) at /usr/src/sys/kern/kern_shutdown.c:919
#4  0xffffffff80c09b93 in panic (fmt=3D<unavailable>) at /usr/src/sys/kern=
/kern_shutdown.c:843
#5  0xffffffff8108b187 in trap_fatal (frame=3D0xfffffe0115bea900, eva=3D90=
4) at /usr/src/sys/amd64/amd64/trap.c:915
#6  0xffffffff8108b1df in trap_pfault (frame=3Dframe@entry=3D0xfffffe0115b=
ea900, usermode=3Dfalse, signo=3D<optimized out>, signo@entry=3D0x0, ucode=
=3D<optimized
out>, ucode@entry=3D0x0) at /usr/src/sys/amd64/amd64/trap.c:732
#7  0xffffffff8108a83d in trap (frame=3D0xfffffe0115bea900) at /usr/src/sy=
s/amd64/amd64/trap.c:398
#8  <signal handler called>
#9  0xffffffff80d3fa67 in netisr_process_workstream_proto (nwsp=3D<optimiz=
ed out>, proto=3D1) at /usr/src/sys/net/netisr.c:918
#10 swi_net (arg=3D<optimized out>) at /usr/src/sys/net/netisr.c:966
#11 0xffffffff80bcae5d in intr_event_execute_handlers (p=3D<optimized out>=
, ie=3D0xfffff80003dbb600) at /usr/src/sys/kern/kern_intr.c:1168
#12 ithread_execute_handlers (p=3D<optimized out>, ie=3D0xfffff80003dbb600=
) at /usr/src/sys/kern/kern_intr.c:1181
#13 ithread_loop (arg=3Darg@entry=3D0xfffff80003dced40) at /usr/src/sys/ke=
rn/kern_intr.c:1269
#14 0xffffffff80bc7c5e in fork_exit (callout=3D0xffffffff80bcac10 <ithread=
_loop>, arg=3D0xfffff80003dced40, frame=3D0xfffffe0115beab00) at
/usr/src/sys/kern/kern_fork.c:1069
#15 <signal handler called>


=2D-- #2


Fatal trap 12: page fault while in kernel mode
cpuid =3D 1; apic id =3D 01
fault virtual address   =3D 0x8
fault code              =3D supervisor read data, page not present
instruction pointer     =3D 0x20:0xffffffff80ca599c
stack pointer           =3D 0x28:0xfffffe0115bea6c0
frame pointer           =3D 0x28:0xfffffe0115bea700
code segment            =3D base 0x0, limit 0xfffff, type 0x1b
                         =3D DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        =3D interrupt enabled, resume, IOPL =3D 0
current process         =3D 12 (swi1: netisr 0)
trap number             =3D 12
panic: page fault
cpuid =3D 1
time =3D 1620152374
KDB: stack backtrace:
#0 0xffffffff80c57345 at kdb_backtrace+0x65
#1 0xffffffff80c09d21 at vpanic+0x181
#2 0xffffffff80c09b93 at panic+0x43
#3 0xffffffff8108b187 at trap_fatal+0x387
#4 0xffffffff8108b1df at trap_pfault+0x4f
#5 0xffffffff8108a83d at trap+0x27d
#6 0xffffffff810617a8 at calltrap+0x8
#7 0xffffffff80dbf0ae at tcp_do_segment+0x10ce
#8 0xffffffff80dbd21e at tcp_input+0xabe
#9 0xffffffff80dafc15 at ip_input+0x125
#10 0xffffffff80d3fa7b at swi_net+0x12b
#11 0xffffffff80bcae5d at ithread_loop+0x24d
#12 0xffffffff80bc7c5e at fork_exit+0x7e
#13 0xffffffff8106282e at fork_trampoline+0xe
Uptime: 2h3m59s
Dumping 2666 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..81%.=
.91%

__curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
55              __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" (offsetof(str=
uct pcpu,
(kgdb) #0  __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
#1  doadump (textdump=3D<optimized out>)
     at /usr/src/sys/kern/kern_shutdown.c:399
#2  0xffffffff80c09916 in kern_reboot (howto=3D260)
     at /usr/src/sys/kern/kern_shutdown.c:486
#3  0xffffffff80c09d90 in vpanic (fmt=3D<optimized out>, ap=3D<optimized o=
ut>)
     at /usr/src/sys/kern/kern_shutdown.c:919
#4  0xffffffff80c09b93 in panic (fmt=3D<unavailable>)
     at /usr/src/sys/kern/kern_shutdown.c:843
#5  0xffffffff8108b187 in trap_fatal (frame=3D0xfffffe0115bea600, eva=3D8)
     at /usr/src/sys/amd64/amd64/trap.c:915
#6  0xffffffff8108b1df in trap_pfault (frame=3Dframe@entry=3D0xfffffe0115b=
ea600,
     usermode=3Dfalse, signo=3D<optimized out>, signo@entry=3D0x0,
     ucode=3D<optimized out>, ucode@entry=3D0x0)
     at /usr/src/sys/amd64/amd64/trap.c:732
#7  0xffffffff8108a83d in trap (frame=3D0xfffffe0115bea600)
     at /usr/src/sys/amd64/amd64/trap.c:398
#8  <signal handler called>
#9  sbcut_internal (sb=3D0xfffff80522aa09c0, len=3D203, len@entry=3D476)
     at /usr/src/sys/kern/uipc_sockbuf.c:1491
#10 0xffffffff80ca5b8a in sbcut_locked (sb=3D0xfffff80522aa09c0,
     len=3D-743943424, len@entry=3D476) at /usr/src/sys/kern/uipc_sockbuf.=
c:1591
#11 0xffffffff80dbf0ae in tcp_do_segment (m=3D0xfffff8004c2aae00,
     th=3D<optimized out>, so=3D<optimized out>, tp=3D<optimized out>,
     drop_hdrlen=3D52, tlen=3D<optimized out>, iptos=3D0 '\000')
     at /usr/src/sys/netinet/tcp_input.c:2918
#12 0xffffffff80dbd21e in tcp_input (mp=3D<optimized out>,
     offp=3D<optimized out>, proto=3D<optimized out>)
     at /usr/src/sys/netinet/tcp_input.c:1382
#13 0xffffffff80dafc15 in ip_input (m=3D0x0)
     at /usr/src/sys/netinet/ip_input.c:829
#14 0xffffffff80d3fa7b in netisr_process_workstream_proto (
     nwsp=3D<optimized out>, proto=3D1) at /usr/src/sys/net/netisr.c:919
#15 swi_net (arg=3D<optimized out>) at /usr/src/sys/net/netisr.c:966
#16 0xffffffff80bcae5d in intr_event_execute_handlers (p=3D<optimized out>=
,
     ie=3D0xfffff80003bbe500) at /usr/src/sys/kern/kern_intr.c:1168
#17 ithread_execute_handlers (p=3D<optimized out>, ie=3D0xfffff80003bbe500=
)
     at /usr/src/sys/kern/kern_intr.c:1181
#18 ithread_loop (arg=3Darg@entry=3D0xfffff80003cb6d40)
     at /usr/src/sys/kern/kern_intr.c:1269
#19 0xffffffff80bc7c5e in fork_exit (
     callout=3D0xffffffff80bcac10 <ithread_loop>, arg=3D0xfffff80003cb6d40=
,
     frame=3D0xfffffe0115beab00) at /usr/src/sys/kern/kern_fork.c:1069
#20 <signal handler called>


=2D--






On 03.05.2021 21:45, Mark Johnston wrote:
> On Mon, May 03, 2021 at 08:04:30PM +0200, Michael Schmiedgen wrote:
>> Hi List,
>>
>> if I start a Samba jail, after a few seconds the system crashes. Very r=
eproducible.
>>
>> System has ~10 jails and 3 bhyve VMs. Dell server, Xeon E3-1240, 64GB R=
AM, 3 way mirror ZFS.
>>
>> It also occurs a few seconds after I start a phone call using the SIP V=
M of that machine,
>> very strange.
>>
>> I got some log messages suggesting raising somaxconn, so I did
>>
>> kern.ipc.somaxconn=3D4096
>>
>> in sysctl.conf
>>
>>
>> Below some debug information, please let me know if I should provide fu=
rther information.
>>
>> Should I open a bug or something?
>>
>> Thank you very much!
>>     Michael
>>
>>
>>
>> Fatal trap 12: page fault while in kernel mode
>> cpuid =3D 0; apic id =3D 00
>> fault virtual address   =3D 0x0
>> fault code              =3D supervisor read data, page not present
>> instruction pointer     =3D 0x20:0xffffffff80ca52c0
>> stack pointer           =3D 0x28:0xfffffe019d039650
>> frame pointer           =3D 0x28:0xfffffe019d039690
>> code segment            =3D base 0x0, limit 0xfffff, type 0x1b
>>                           =3D DPL 0, pres 1, long 1, def32 0, gran 1
>> processor eflags        =3D interrupt enabled, resume, IOPL =3D 0
>> current process         =3D 649 (devd)
>> trap number             =3D 12
>> panic: page fault
>> cpuid =3D 0
>> time =3D 1620061253
>> KDB: stack backtrace:
>> #0 0xffffffff80c57345 at kdb_backtrace+0x65
>> #1 0xffffffff80c09d21 at vpanic+0x181
>> #2 0xffffffff80c09b93 at panic+0x43
>> #3 0xffffffff8108b187 at trap_fatal+0x387
>> #4 0xffffffff8108b1df at trap_pfault+0x4f
>> #5 0xffffffff8108a83d at trap+0x27d
>> #6 0xffffffff810617a8 at calltrap+0x8
>> #7 0xffffffff80ca51c3 at sbappendaddr_locked+0x93
>> #8 0xffffffff80cb437a at uipc_send+0x73a
>> #9 0xffffffff80ca9053 at sosend_generic+0x633
>> #10 0xffffffff80ca94e0 at sosend+0x50
>> #11 0xffffffff80caff2e at kern_sendit+0x20e
>> #12 0xffffffff80cb032b at sendit+0x1db
>> #13 0xffffffff80cb013d at sys_sendto+0x4d
>> #14 0xffffffff8108ba8c at amd64_syscall+0x10c
>> #15 0xffffffff810620ce at fast_syscall_common+0xf8
>> Uptime: 2m2s
>> Dumping 2373 out of 65454 MB:..1%..11%..21%..31%..41%..51%..61%..71%..8=
1%..91%
>>
>> __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55
>> 55              __asm("movq %%gs:%P1,%0" : "=3Dr" (td) : "n" (offsetof(=
struct pcpu,
>> (kgdb) list *0xffffffff80ca52c0
>> 0xffffffff80ca52c0 is in sbappendaddr_locked_internal (/usr/src/sys/ker=
n/uipc_sockbuf.c:1169).
>> 1164            if (ctrl_last)
>> 1165                    ctrl_last->m_next =3D m0; /* concatenate data t=
o control */
>> 1166            else
>> 1167                    control =3D m0;
>> 1168            m->m_next =3D control;
>> 1169            for (n =3D m; n->m_next !=3D NULL; n =3D n->m_next)
>> 1170                    sballoc(sb, n);
>> 1171            sballoc(sb, n);
>> 1172            nlast =3D n;
>> 1173            SBLINKRECORD(sb, m);
>
> So we are crashing because "n" is somehow NULL?  That seems difficult to
> explain.  Can you show the local variables in this frame?
>
> Does the panic always have the same stack trace?
>




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?d37716a3-927d-b200-c805-b31d7b36383d>