Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 Sep 2015 11:33:58 +0300
From:      Andriy Gapon <avg@FreeBSD.org>
To:        freebsd-net <freebsd-net@FreeBSD.org>, "George V. Neville-Neil" <gnn@FreeBSD.org>
Subject:   page fault in tcp_do_segment (r287759 suspected)
Message-ID:  <56011276.4060206@FreeBSD.org>

next in thread | raw e-mail | index | archive | help

I've just got the following panic on amd64 r288066.  I never experienced that
kind of panic with r286985.

Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x10
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff8073dd68
stack pointer           = 0x28:0xfffffe02b4cb9640
frame pointer           = 0x28:0xfffffe02b4cb9700
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 12 (irq260: re0)
trap number             = 12
panic: page fault
cpuid = 1
KDB: stack backtrace:
db_trace_self_wrapper() at 0xffffffff8041c9fb = db_trace_self_wrapper+0x2b/frame
0xfffffe02b4cb9110
kdb_backtrace() at 0xffffffff80668239 = kdb_backtrace+0x39/frame 0xfffffe02b4cb91c0
vpanic() at 0xffffffff806334d2 = vpanic+0x152/frame 0xfffffe02b4cb9200
panic() at 0xffffffff80633213 = panic+0x43/frame 0xfffffe02b4cb9260
trap_fatal() at 0xffffffff8081fdc3 = trap_fatal+0x343/frame 0xfffffe02b4cb92c0
trap_pfault() at 0xffffffff8081fff6 = trap_pfault+0x206/frame 0xfffffe02b4cb9350
trap() at 0xffffffff8081f70a = trap+0x4ca/frame 0xfffffe02b4cb9560
trap_check() at 0xffffffff8082011a = trap_check+0x2a/frame 0xfffffe02b4cb9580
calltrap() at 0xffffffff80807db3 = calltrap+0x8/frame 0xfffffe02b4cb9580
--- trap 0xc, rip = 0xffffffff8073dd68, rsp = 0xfffffe02b4cb9650, rbp =
0xfffffe02b4cb9700 ---
tcp_do_segment() at 0xffffffff8073dd68 = tcp_do_segment+0xbc8/frame
0xfffffe02b4cb9700
tcp_input() at 0xffffffff8073c899 = tcp_input+0x999/frame 0xfffffe02b4cb9810
ip_input() at 0xffffffff80733cbe = ip_input+0xbe/frame 0xfffffe02b4cb9860
netisr_dispatch_src() at 0xffffffff807121fe = netisr_dispatch_src+0x17e/frame
0xfffffe02b4cb98d0
netisr_dispatch() at 0xffffffff80712481 = netisr_dispatch+0x11/frame
0xfffffe02b4cb98e0
ether_demux() at 0xffffffff8070984b = ether_demux+0x13b/frame 0xfffffe02b4cb9910
ether_input_internal() at 0xffffffff8070a3ec = ether_input_internal+0x32c/frame
0xfffffe02b4cb9950
ether_nh_input() at 0xffffffff8070a093 = ether_nh_input+0x23/frame
0xfffffe02b4cb9960
netisr_dispatch_src() at 0xffffffff807121fe = netisr_dispatch_src+0x17e/frame
0xfffffe02b4cb99d0
netisr_dispatch() at 0xffffffff80712481 = netisr_dispatch+0x11/frame
0xfffffe02b4cb99e0
ether_input() at 0xffffffff80709b3c = ether_input+0x2c/frame 0xfffffe02b4cb9a00
re_rxeof() at 0xffffffff8049b858 = re_rxeof+0x228/frame 0xfffffe02b4cb9a60
re_intr_msi() at 0xffffffff8049d78f = re_intr_msi+0xbf/frame 0xfffffe02b4cb9aa0
intr_event_execute_handlers() at 0xffffffff805fec1f =
intr_event_execute_handlers+0x12f/frame 0xfffffe02b4cb9b00
ithread_execute_handlers() at 0xffffffff805ff74c =
ithread_execute_handlers+0x2c/frame 0xfffffe02b4cb9b20
ithread_loop() at 0xffffffff805ff5bb = ithread_loop+0x5b/frame 0xfffffe02b4cb9b80
fork_exit() at 0xffffffff805fc23b = fork_exit+0xdb/frame 0xfffffe02b4cb9bf0
fork_trampoline() at 0xffffffff808082ee = fork_trampoline+0xe/frame
0xfffffe02b4cb9bf0
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---

(kgdb) fr 9
#9  0xffffffff8073dd68 in tcp_do_segment (m=<value optimized out>,
th=0xfffff80052387022, so=0xfffff80207149000, tp=<value optimized out>,
drop_hdrlen=60, tlen=<value optimized out>, iptos=<value optimized out>,
ti_locked=Cannot access memory at address 0x1
)
    at /usr/src/sys/netinet/tcp_input.c:3103
3103                    INP_INFO_RUNLOCK(&V_tcbinfo);
(kgdb) list
3098                    tcp_dropwithreset(m, th, NULL, tlen, rstreason);
3099            return;
3100
3101    drop:
3102            if (ti_locked == TI_RLOCKED) {
3103                    INP_INFO_RUNLOCK(&V_tcbinfo);
3104                    ti_locked = TI_UNLOCKED;
3105            }
3106    #ifdef INVARIANTS
3107

But judging from the disassembly the fault happens right after calling rw_runlock():

0xffffffff8073dd50 <tcp_do_segment+2992>:       mov    $0xffffffff80ff84d0,%rdi
0xffffffff8073dd57 <tcp_do_segment+2999>:       mov    $0xffffffff809ba28f,%rsi
0xffffffff8073dd5e <tcp_do_segment+3006>:       mov    $0xc1f,%edx
0xffffffff8073dd63 <tcp_do_segment+3011>:       callq  0xffffffff806309c0
<_rw_runlock_cookie>
0xffffffff8073dd68 <tcp_do_segment+3016>:       mov    0x10(%r12),%rdx
0xffffffff8073dd6d <tcp_do_segment+3021>:       mov    %r15,%rdi
0xffffffff8073dd70 <tcp_do_segment+3024>:       mov    %r14,%rsi

That code actually looks like the following DTrace probe a few lines below:

TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));

So, it seems like 'm' could be NULL here.
I see two places in tcp_do_segment() where m gets assigned with NULL followed by
goto drop.  If I had to guess then my guess would be that one of those code
paths was taken.
Since those NULL assignments were there for more than a year, then I would guess
that the addition of the probe is to blame:
https://svnweb.freebsd.org/base?view=revision&revision=287759

-- 
Andriy Gapon



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?56011276.4060206>