From owner-freebsd-net@freebsd.org  Tue Sep 22 08:35:03 2015
Return-Path: <owner-freebsd-net@freebsd.org>
Delivered-To: freebsd-net@mailman.ysv.freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org
 [IPv6:2001:1900:2254:206a::19:1])
 by mailman.ysv.freebsd.org (Postfix) with ESMTP id 8F33CA03D13
 for <freebsd-net@mailman.ysv.freebsd.org>;
 Tue, 22 Sep 2015 08:35:03 +0000 (UTC) (envelope-from avg@FreeBSD.org)
Received: from citadel.icyb.net.ua (citadel.icyb.net.ua [212.40.38.140])
 by mx1.freebsd.org (Postfix) with ESMTP id ACF791491;
 Tue, 22 Sep 2015 08:35:02 +0000 (UTC) (envelope-from avg@FreeBSD.org)
Received: from porto.starpoint.kiev.ua (porto-e.starpoint.kiev.ua
 [212.40.38.100])
 by citadel.icyb.net.ua (8.8.8p3/ICyb-2.3exp) with ESMTP id LAA01002;
 Tue, 22 Sep 2015 11:35:00 +0300 (EEST)
 (envelope-from avg@FreeBSD.org)
Received: from localhost ([127.0.0.1])
 by porto.starpoint.kiev.ua with esmtp (Exim 4.34 (FreeBSD))
 id 1ZeJ2R-0009s2-Uo; Tue, 22 Sep 2015 11:34:59 +0300
To: freebsd-net <freebsd-net@FreeBSD.org>,
 "George V. Neville-Neil" <gnn@FreeBSD.org>
From: Andriy Gapon <avg@FreeBSD.org>
Subject: page fault in tcp_do_segment (r287759 suspected)
X-Enigmail-Draft-Status: N1110
Message-ID: <56011276.4060206@FreeBSD.org>
Date: Tue, 22 Sep 2015 11:33:58 +0300
User-Agent: Mozilla/5.0 (X11; FreeBSD amd64; rv:38.0) Gecko/20100101
 Thunderbird/38.2.0
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 7bit
X-BeenThere: freebsd-net@freebsd.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: Networking and TCP/IP with FreeBSD <freebsd-net.freebsd.org>
List-Unsubscribe: <https://lists.freebsd.org/mailman/options/freebsd-net>,
 <mailto:freebsd-net-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/freebsd-net/>
List-Post: <mailto:freebsd-net@freebsd.org>
List-Help: <mailto:freebsd-net-request@freebsd.org?subject=help>
List-Subscribe: <https://lists.freebsd.org/mailman/listinfo/freebsd-net>,
 <mailto:freebsd-net-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Tue, 22 Sep 2015 08:35:03 -0000


I've just got the following panic on amd64 r288066.  I never experienced that
kind of panic with r286985.

Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x10
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff8073dd68
stack pointer           = 0x28:0xfffffe02b4cb9640
frame pointer           = 0x28:0xfffffe02b4cb9700
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 12 (irq260: re0)
trap number             = 12
panic: page fault
cpuid = 1
KDB: stack backtrace:
db_trace_self_wrapper() at 0xffffffff8041c9fb = db_trace_self_wrapper+0x2b/frame
0xfffffe02b4cb9110
kdb_backtrace() at 0xffffffff80668239 = kdb_backtrace+0x39/frame 0xfffffe02b4cb91c0
vpanic() at 0xffffffff806334d2 = vpanic+0x152/frame 0xfffffe02b4cb9200
panic() at 0xffffffff80633213 = panic+0x43/frame 0xfffffe02b4cb9260
trap_fatal() at 0xffffffff8081fdc3 = trap_fatal+0x343/frame 0xfffffe02b4cb92c0
trap_pfault() at 0xffffffff8081fff6 = trap_pfault+0x206/frame 0xfffffe02b4cb9350
trap() at 0xffffffff8081f70a = trap+0x4ca/frame 0xfffffe02b4cb9560
trap_check() at 0xffffffff8082011a = trap_check+0x2a/frame 0xfffffe02b4cb9580
calltrap() at 0xffffffff80807db3 = calltrap+0x8/frame 0xfffffe02b4cb9580
--- trap 0xc, rip = 0xffffffff8073dd68, rsp = 0xfffffe02b4cb9650, rbp =
0xfffffe02b4cb9700 ---
tcp_do_segment() at 0xffffffff8073dd68 = tcp_do_segment+0xbc8/frame
0xfffffe02b4cb9700
tcp_input() at 0xffffffff8073c899 = tcp_input+0x999/frame 0xfffffe02b4cb9810
ip_input() at 0xffffffff80733cbe = ip_input+0xbe/frame 0xfffffe02b4cb9860
netisr_dispatch_src() at 0xffffffff807121fe = netisr_dispatch_src+0x17e/frame
0xfffffe02b4cb98d0
netisr_dispatch() at 0xffffffff80712481 = netisr_dispatch+0x11/frame
0xfffffe02b4cb98e0
ether_demux() at 0xffffffff8070984b = ether_demux+0x13b/frame 0xfffffe02b4cb9910
ether_input_internal() at 0xffffffff8070a3ec = ether_input_internal+0x32c/frame
0xfffffe02b4cb9950
ether_nh_input() at 0xffffffff8070a093 = ether_nh_input+0x23/frame
0xfffffe02b4cb9960
netisr_dispatch_src() at 0xffffffff807121fe = netisr_dispatch_src+0x17e/frame
0xfffffe02b4cb99d0
netisr_dispatch() at 0xffffffff80712481 = netisr_dispatch+0x11/frame
0xfffffe02b4cb99e0
ether_input() at 0xffffffff80709b3c = ether_input+0x2c/frame 0xfffffe02b4cb9a00
re_rxeof() at 0xffffffff8049b858 = re_rxeof+0x228/frame 0xfffffe02b4cb9a60
re_intr_msi() at 0xffffffff8049d78f = re_intr_msi+0xbf/frame 0xfffffe02b4cb9aa0
intr_event_execute_handlers() at 0xffffffff805fec1f =
intr_event_execute_handlers+0x12f/frame 0xfffffe02b4cb9b00
ithread_execute_handlers() at 0xffffffff805ff74c =
ithread_execute_handlers+0x2c/frame 0xfffffe02b4cb9b20
ithread_loop() at 0xffffffff805ff5bb = ithread_loop+0x5b/frame 0xfffffe02b4cb9b80
fork_exit() at 0xffffffff805fc23b = fork_exit+0xdb/frame 0xfffffe02b4cb9bf0
fork_trampoline() at 0xffffffff808082ee = fork_trampoline+0xe/frame
0xfffffe02b4cb9bf0
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---

(kgdb) fr 9
#9  0xffffffff8073dd68 in tcp_do_segment (m=<value optimized out>,
th=0xfffff80052387022, so=0xfffff80207149000, tp=<value optimized out>,
drop_hdrlen=60, tlen=<value optimized out>, iptos=<value optimized out>,
ti_locked=Cannot access memory at address 0x1
)
    at /usr/src/sys/netinet/tcp_input.c:3103
3103                    INP_INFO_RUNLOCK(&V_tcbinfo);
(kgdb) list
3098                    tcp_dropwithreset(m, th, NULL, tlen, rstreason);
3099            return;
3100
3101    drop:
3102            if (ti_locked == TI_RLOCKED) {
3103                    INP_INFO_RUNLOCK(&V_tcbinfo);
3104                    ti_locked = TI_UNLOCKED;
3105            }
3106    #ifdef INVARIANTS
3107

But judging from the disassembly the fault happens right after calling rw_runlock():

0xffffffff8073dd50 <tcp_do_segment+2992>:       mov    $0xffffffff80ff84d0,%rdi
0xffffffff8073dd57 <tcp_do_segment+2999>:       mov    $0xffffffff809ba28f,%rsi
0xffffffff8073dd5e <tcp_do_segment+3006>:       mov    $0xc1f,%edx
0xffffffff8073dd63 <tcp_do_segment+3011>:       callq  0xffffffff806309c0
<_rw_runlock_cookie>
0xffffffff8073dd68 <tcp_do_segment+3016>:       mov    0x10(%r12),%rdx
0xffffffff8073dd6d <tcp_do_segment+3021>:       mov    %r15,%rdi
0xffffffff8073dd70 <tcp_do_segment+3024>:       mov    %r14,%rsi

That code actually looks like the following DTrace probe a few lines below:

TCP_PROBE3(debug__input, tp, th, mtod(m, const char *));

So, it seems like 'm' could be NULL here.
I see two places in tcp_do_segment() where m gets assigned with NULL followed by
goto drop.  If I had to guess then my guess would be that one of those code
paths was taken.
Since those NULL assignments were there for more than a year, then I would guess
that the addition of the probe is to blame:
https://svnweb.freebsd.org/base?view=revision&revision=287759

-- 
Andriy Gapon