Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 14 Jan 2013 20:04:04 +0000
From:      Po-Li Soong <polis@spectralogic.com>
To:        Konstantin Belousov <kostikbel@gmail.com>
Cc:        "stable@FreeBSD.org" <stable@FreeBSD.org>
Subject:   RE: zio_done panic on unadulterated FreeBSD Release 9.1
Message-ID:  <0C4D65F6A0FC9E4B95EA114508C7E0FE5F66EF0E@reactor.sldomain.com>
In-Reply-To: <20130113175527.GB2561@kib.kiev.ua>
References:  <0C4D65F6A0FC9E4B95EA114508C7E0FE5F66DDB6@reactor.sldomain.com> <20130109234924.GM2561@kib.kiev.ua> <0C4D65F6A0FC9E4B95EA114508C7E0FE5F66E4F3@reactor.sldomain.com> <20130113175527.GB2561@kib.kiev.ua>

next in thread | previous in thread | raw e-mail | index | archive | help
Konstantin,

First of all, I agree with you that it would be very strange to have crashe=
d at vm_page_free_toq+0x45, by which point m (in the register rbx. See belo=
w for the assembly listing.) has been dereferenced a few times. However, th=
ere is a discrepancy between the KDB backtrace and the annotated one just f=
ew lines below. In the annotated backtrace, it appears that it is the vm_pa=
ge_remove that runs into the panic at 0xffffffff80b50597, which is at line =
975. That source line looks a lot more probable for causing a panic than th=
at in vm_page_free_toq. Listed below are the assembly listing of vm_page_fr=
ee_toq and vm_page_remove in the proximity of places of concerns.

Regards,

Po-Li Soong

Dump of assembler code for function vm_page_free_toq:
0xffffffff80b506b0 <vm_page_free_toq+0>:        push   %rbp
0xffffffff80b506b1 <vm_page_free_toq+1>:        mov    %rsp,%rbp
0xffffffff80b506b4 <vm_page_free_toq+4>:        sub    $0x20,%rsp
0xffffffff80b506b8 <vm_page_free_toq+8>:        mov    %rbx,-0x18(%rbp)
0xffffffff80b506bc <vm_page_free_toq+12>:       mov    %r12,-0x10(%rbp)
0xffffffff80b506c0 <vm_page_free_toq+16>:       mov    %rdi,%rbx
0xffffffff80b506c3 <vm_page_free_toq+19>:       mov    %r13,-0x8(%rbp)
0xffffffff80b506c7 <vm_page_free_toq+23>:       incl   %gs:0xac
0xffffffff80b506cf <vm_page_free_toq+31>:       testb  $0x2,0x6d(%rdi)
0xffffffff80b506d3 <vm_page_free_toq+35>:       jne    0xffffffff80b50901 <=
vm_page_free_toq+593>
0xffffffff80b506d9 <vm_page_free_toq+41>:       cmpb   $0x0,0x71(%rdi)
0xffffffff80b506dd <vm_page_free_toq+45>:       jne    0xffffffff80b50912 <=
vm_page_free_toq+610>
0xffffffff80b506e3 <vm_page_free_toq+51>:       testb  $0x4,0x6e(%rdi)
0xffffffff80b506e7 <vm_page_free_toq+55>:       je     0xffffffff80b50863 <=
vm_page_free_toq+435>
0xffffffff80b506ed <vm_page_free_toq+61>:       mov    %rbx,%rdi
0xffffffff80b506f0 <vm_page_free_toq+64>:       callq  0xffffffff80b50540 <=
vm_page_remove>
0xffffffff80b506f5 <vm_page_free_toq+69>:       movzbl 0x6d(%rbx),%eax
0xffffffff80b506f9 <vm_page_free_toq+73>:       test   $0x4,%al
0xffffffff80b506fb <vm_page_free_toq+75>:       jne    0xffffffff80b50767 <=
vm_page_free_toq+183>
0xffffffff80b506fd <vm_page_free_toq+77>:       mov    0x68(%rbx),%esi
=20
=20
Dump of assembler code for function vm_page_remove:
0xffffffff80b50540 <vm_page_remove+0>:  push   %rbp
0xffffffff80b50541 <vm_page_remove+1>:  mov    %rsp,%rbp
0xffffffff80b50544 <vm_page_remove+4>:  push   %r13
0xffffffff80b50546 <vm_page_remove+6>:  push   %r12
0xffffffff80b50548 <vm_page_remove+8>:  push   %rbx
0xffffffff80b50549 <vm_page_remove+9>:  mov    %rdi,%rbx
0xffffffff80b5054c <vm_page_remove+12>: sub    $0x8,%rsp
0xffffffff80b50550 <vm_page_remove+16>: mov    0x30(%rdi),%r13
0xffffffff80b50554 <vm_page_remove+20>: movzwl 0x6e(%rdi),%eax
0xffffffff80b50558 <vm_page_remove+24>: test   %r13,%r13
0xffffffff80b5055b <vm_page_remove+27>: je     0xffffffff80b50610 <vm_page_=
remove+208>
0xffffffff80b50561 <vm_page_remove+33>: test   $0x1,%al
0xffffffff80b50563 <vm_page_remove+35>: jne    0xffffffff80b50635 <vm_page_=
remove+245>
0xffffffff80b50569 <vm_page_remove+41>: mov    0x10(%rbx),%r12
0xffffffff80b5056d <vm_page_remove+45>: test   %r12,%r12
0xffffffff80b50570 <vm_page_remove+48>: je     0xffffffff80b5057d <vm_page_=
remove+61>
0xffffffff80b50572 <vm_page_remove+50>: cmp    %rbx,0x20(%r12)
0xffffffff80b50577 <vm_page_remove+55>: je     0xffffffff80b50660 <vm_page_=
remove+288>
0xffffffff80b5057d <vm_page_remove+61>: mov    0x18(%rbx),%rcx
0xffffffff80b50581 <vm_page_remove+65>: mov    0x8(%rcx),%rsi
0xffffffff80b50585 <vm_page_remove+69>: mov    (%rsi),%rdx
0xffffffff80b50588 <vm_page_remove+72>: test   %rdx,%rdx
0xffffffff80b5058b <vm_page_remove+75>: je     0xffffffff80b50597 <vm_page_=
remove+87>
0xffffffff80b5058d <vm_page_remove+77>: cmp    %rbx,0x28(%rdx)
0xffffffff80b50591 <vm_page_remove+81>: je     0xffffffff80b50646 <vm_page_=
remove+262>
0xffffffff80b50597 <vm_page_remove+87>: mov    0x58(%r13),%rsi    ; <<-----=
--------------
0xffffffff80b5059b <vm_page_remove+91>: cmp    %rbx,%rsi
0xffffffff80b5059e <vm_page_remove+94>: je     0xffffffff80b505a9 <vm_page_=
remove+105>
0xffffffff80b505a0 <vm_page_remove+96>: mov    0x38(%rbx),%rdi
0xffffffff80b505a4 <vm_page_remove+100>:        callq  0xffffffff80b4fa90 <=
vm_page_splay>
0xffffffff80b505a9 <vm_page_remove+105>:        mov    0x20(%rbx),%rax
0xffffffff80b505ad <vm_page_remove+109>:        test   %rax,%rax
0xffffffff80b505b0 <vm_page_remove+112>:        mov    %rax,%rdx
0xffffffff80b505b3 <vm_page_remove+115>:        je     0xffffffff80b50672 <=
vm_page_remove+306>
0xffffffff80b505b9 <vm_page_remove+121>:        mov    0x28(%rbx),%rsi
0xffffffff80b505bd <vm_page_remove+125>:        test   %rsi,%rsi

-----Original Message-----
From: Konstantin Belousov [mailto:kostikbel@gmail.com]=20
Sent: Sunday, January 13, 2013 10:55 AM
To: Po-Li Soong
Cc: stable@FreeBSD.org
Subject: Re: zio_done panic on unadulterated FreeBSD Release 9.1

On Fri, Jan 11, 2013 at 03:09:58PM +0000, Po-Li Soong wrote:
> (kgdb) p/x *(struct vm_object *)0xffffffff81281580
> $1 =3D {mtx =3D {lock_object =3D {lo_name =3D 0xffffffff80e54bbd,
>       lo_flags =3D 0x1430000, lo_data =3D 0x0, lo_witness =3D 0x0},
>     mtx_lock =3D 0xfffffe0006f44000}, object_list =3D {
>     tqe_next =3D 0xffffffff81281240, tqe_prev =3D 0xffffffff812814a0},
>   shadow_head =3D {lh_first =3D 0x0}, shadow_list =3D {le_next =3D 0x0,
>     le_prev =3D 0x0}, memq =3D {tqh_first =3D 0xfffffe00cfd3f880,
>     tqh_last =3D 0xfffffe00c9cac398}, root =3D 0xfffffe00cd733ab0,
>   size =3D 0x7ffffff, generation =3D 0x1, ref_count =3D 0x3f8, shadow_cou=
nt =3D 0x0,
>   memattr =3D 0x6, type =3D 0x4, flags =3D 0x1000, pg_color =3D 0x0, pad1=
 =3D 0x0,
>   resident_page_count =3D 0x9b729, backing_object =3D 0x0,
>   backing_object_offset =3D 0x0, pager_object_list =3D {tqe_next =3D 0x0,
>     tqe_prev =3D 0x0}, rvq =3D {lh_first =3D 0xfffffe00c7dd2140}, cache =
=3D 0x0,
>   handle =3D 0x0, un_pager =3D {vnp =3D {vnp_size =3D 0x0, writemappings =
=3D 0x0},
>     devp =3D {devp_pglist =3D {tqh_first =3D 0x0, tqh_last =3D 0x0}, ops =
=3D 0x0},
>     sgp =3D {sgp_pglist =3D {tqh_first =3D 0x0, tqh_last =3D 0x0}}, swp =
=3D {
>       swp_bcount =3D 0x0}}, cred =3D 0x0, charge =3D 0x0, paging_in_progr=
ess=20
> =3D 0x1}
>=20
> (kgdb)  p/x *(struct vm_page *)0xfffffe00cd733ab0
> $2 =3D {pageq =3D {tqe_next =3D 0x0, tqe_prev =3D 0xfffffe00c7e7d678}, li=
stq =3D {
>     tqe_next =3D 0xfffffe00cd733b28, tqe_prev =3D 0xfffffe00cd7331d8},
>   left =3D 0xfffffe00c9b31c38, right =3D 0xfffffe00cd735c70,
>   object =3D 0xfffffffb81281580, pindex =3D 0x7495a, phys_addr =3D 0xbe95=
a000, md =3D {
>     pv_list =3D {tqh_first =3D 0x0, tqh_last =3D 0xfffffe00cd733af8},
>     pat_mode =3D 0x6}, queue =3D 0xff, segind =3D 0x2, hold_count =3D 0x0=
,
>   order =3D 0xd, pool =3D 0x0, cow =3D 0x0, wire_count =3D 0x0, aflags =
=3D 0x0,
>   flags =3D 0x0, oflags =3D 0x4, act_count =3D 0x0, busy =3D 0x0, valid =
=3D 0xff,
>   dirty =3D 0x0}
>=20
> (kgdb) list *vm_page_free_toq+0x45
> 0xffffffff80b506f5 is in vm_page_free_toq (/usr/src/sys/vm/vm_page.c:1878=
).
> warning: Source file is more recent than executable.
>=20
> 1873
> 1874            /*
> 1875             * If fictitious remove object association and
> 1876             * return, otherwise delay object association removal.
> 1877             */
> 1878            if ((m->flags & PG_FICTITIOUS) !=3D 0) {
> 1879                    return;
> 1880            }
> 1881
> 1882            m->valid =3D 0;
> (kgdb)
This is strange. Can you disassemble your instance of the
vm_page_free_toq() and show me the assembler listing ? The line you show ha=
s nothing to cause page fault if the m pointer itself is valid.

>=20
>=20
> -----Original Message-----
> From: Konstantin Belousov [mailto:kostikbel@gmail.com]
> Sent: Wednesday, January 09, 2013 4:49 PM
> To: Po-Li Soong
> Cc: stable@FreeBSD.org
> Subject: Re: zio_done panic on unadulterated FreeBSD Release 9.1
>=20
> On Wed, Jan 09, 2013 at 08:03:38PM +0000, Po-Li Soong wrote:
> > Hi,
> >=20
> > My name is Po-Li Soong. I ran into a crash not long after installing th=
e 9.1 release on my home machine. I was performing a test run of file trans=
fer with samba server running on the FreeBSD installation. The transfer rat=
e was about 70-80 MB/sec. The core.txt is attached. If there are other cras=
h dumps needed, please let me know.
> >=20
> > I first discussed this panic with Justin Gibbs, a coworker of mine at S=
pectra Logic. He referred me to this email address, suggesting that the inf=
ormation should be relevant to you. Thanks for the help.
> >=20
> > Regards,
> >=20
> > Po-Li Soong
> >=20
>=20
> > maestoso dumped core - see /var/crash/vmcore.0
> >=20
> > Sat Jan  5 19:53:24 MST 2013
> >=20
> > FreeBSD maestoso 9.1-RELEASE FreeBSD 9.1-RELEASE #0 r243825: Tue Dec  4=
 09:23:10 UTC 2012     root@farrell.cse.buffalo.edu:/usr/obj/usr/src/sys/GE=
NERIC  amd64
> >=20
> > panic: page fault
> >=20
> > GNU gdb 6.1.1 [FreeBSD]
> > Copyright 2004 Free Software Foundation, Inc.
> > GDB is free software, covered by the GNU General Public License, and=20
> > you are welcome to change it and/or distribute copies of it under certa=
in conditions.
> > Type "show copying" to see the conditions.
> > There is absolutely no warranty for GDB.  Type "show warranty" for deta=
ils.
> > This GDB was configured as "amd64-marcel-freebsd"...
> >=20
> > Unread portion of the kernel message buffer:
> >=20
> >=20
> > Fatal trap 12: page fault while in kernel mode cpuid =3D 1; apic id =3D=
 01
> > fault virtual address	=3D 0xfffffffb812815d8
> > fault code		=3D supervisor read data, page not present
> > instruction pointer	=3D 0x20:0xffffffff80b50597
> > stack pointer	        =3D 0x28:0xffffff80fa3bc8d0
> > frame pointer	        =3D 0x28:0xffffff80fa3bc900
> > code segment		=3D base 0x0, limit 0xfffff, type 0x1b
> > 			=3D DPL 0, pres 1, long 1, def32 0, gran 1
> > processor eflags	=3D interrupt enabled, resume, IOPL =3D 0
> > current process		=3D 0 (zio_write_intr_5)
> > trap number		=3D 12
> > panic: page fault
> > cpuid =3D 3
> > KDB: stack backtrace:
> > #0 0xffffffff809208a6 at kdb_backtrace+0x66
> > #1 0xffffffff808ea8be at panic+0x1ce
> > #2 0xffffffff80bd8240 at trap_fatal+0x290
> > #3 0xffffffff80bd857d at trap_pfault+0x1ed
> > #4 0xffffffff80bd8b9e at trap+0x3ce
> > #5 0xffffffff80bc315f at calltrap+0x8
> > #6 0xffffffff80b506f5 at vm_page_free_toq+0x45
> > #7 0xffffffff80b4f276 at vm_object_page_remove+0x196
> > #8 0xffffffff80b46b06 at vm_map_delete+0x316
> > #9 0xffffffff80b46c11 at vm_map_remove+0x51
> > #10 0xffffffff80b3a70a at uma_large_free+0x3a
> > #11 0xffffffff808d589a at free+0x5a
> > #12 0xffffffff8169b4ce at zio_done+0x2ee
> > #13 0xffffffff81699063 at zio_execute+0xc3
> > #14 0xffffffff8092cf55 at taskqueue_run_locked+0x85
> > #15 0xffffffff8092ded6 at taskqueue_thread_loop+0x46
> > #16 0xffffffff808bb9ef at fork_exit+0x11f
> > #17 0xffffffff80bc368e at fork_trampoline+0xe
> > Uptime: 3h19m34s
> > Dumping 571 out of 3561
> > MB:..3%..12%..23%..31%..42%..51%..62%..73%..82%..93%
> >=20
> > Reading symbols from /boot/kernel/zfs.ko...Reading symbols from /boot/k=
ernel/zfs.ko.symbols...done.
> > done.
> > Loaded symbols for /boot/kernel/zfs.ko Reading symbols from=20
> > /boot/kernel/opensolaris.ko...Reading symbols from /boot/kernel/opensol=
aris.ko.symbols...done.
> > done.
> > Loaded symbols for /boot/kernel/opensolaris.ko
> > #0  doadump (textdump=3DVariable "textdump" is not available.
> > ) at pcpu.h:224
> > 224	pcpu.h: No such file or directory.
> > 	in pcpu.h
> > (kgdb) #0  doadump (textdump=3DVariable "textdump" is not available.
> > ) at pcpu.h:224
> > #1  0xffffffff808ea3a1 in kern_reboot (howto=3D260)
> >     at /usr/src/sys/kern/kern_shutdown.c:448
> > #2  0xffffffff808ea897 in panic (fmt=3D0x1 <Address 0x1 out of bounds>)
> >     at /usr/src/sys/kern/kern_shutdown.c:636
> > #3  0xffffffff80bd8240 in trap_fatal (frame=3D0xc, eva=3DVariable "eva"=
 is not available.
> > )
> >     at /usr/src/sys/amd64/amd64/trap.c:857
> > #4  0xffffffff80bd857d in trap_pfault (frame=3D0xffffff80fa3bc820, user=
mode=3D0)
> >     at /usr/src/sys/amd64/amd64/trap.c:773
> > #5  0xffffffff80bd8b9e in trap (frame=3D0xffffff80fa3bc820)
> >     at /usr/src/sys/amd64/amd64/trap.c:456
> P
> > #6  0xffffffff80bc315f in calltrap ()
> >     at /usr/src/sys/amd64/amd64/exception.S:228
> > #7  0xffffffff80b50597 in vm_page_remove (m=3D0xfffffe00cd733ab0)
> >     at /usr/src/sys/vm/vm_page.c:975
> > #8  0xffffffff80b506f5 in vm_page_free_toq (m=3D0xfffffe00cd733ab0)
> >     at /usr/src/sys/vm/vm_page.c:1872
> > #9  0xffffffff80b4f276 in vm_object_page_remove (object=3D0xffffffff812=
81580,=20
> >     start=3D477512, end=3D477539, options=3DVariable "options" is not a=
vailable.
> > ) at /usr/src/sys/vm/vm_object.c:1899
> > #10 0xffffffff80b46b06 in vm_map_delete (map=3D0xfffffe00020000e8, star=
t=3DVariable "start" is not available.
> > )
> >     at /usr/src/sys/vm/vm_map.c:2739
> > #11 0xffffffff80b46c11 in vm_map_remove (map=3D0xfffffe00020000e8,=20
> >     start=3D18446743525909626880, end=3D18446743525909737472)
> >     at /usr/src/sys/vm/vm_map.c:2871
> > #12 0xffffffff80b3a70a in uma_large_free (slab=3D0xfffffe00aceff8e0)
> >     at /usr/src/sys/vm/uma_core.c:3085
> > #13 0xffffffff808d589a in free (addr=3D0xffffff8074948000,=20
> >     mtp=3D0xffffffff81747c20) at /usr/src/sys/kern/kern_malloc.c:572
> > #14 0xffffffff8169b4ce in zio_done (zio=3D0xfffffe007a9906e0)
> >     at
> > /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/f
> > s/
> > zfs/zio.c:2960
> > #15 0xffffffff81699063 in zio_execute (zio=3D0xfffffe007a9906e0)
> >     at
> > /usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/f
> > s/
> > zfs/zio.c:1196
> > #16 0xffffffff8092cf55 in taskqueue_run_locked (queue=3D0xfffffe0006ed9=
a00)
> >     at /usr/src/sys/kern/subr_taskqueue.c:308
> > #17 0xffffffff8092ded6 in taskqueue_thread_loop (arg=3DVariable "arg" i=
s not available.
> > )
> >     at /usr/src/sys/kern/subr_taskqueue.c:497
> > #18 0xffffffff808bb9ef in fork_exit (
> >     callout=3D0xffffffff8092de90 <taskqueue_thread_loop>,=20
> >     arg=3D0xfffffe0006c072e0, frame=3D0xffffff80fa3bcc40)
> >     at /usr/src/sys/kern/kern_fork.c:992
> > #19 0xffffffff80bc368e in fork_trampoline ()
> >     at /usr/src/sys/amd64/amd64/exception.S:602
> > #20 0x0000000000000000 in ?? ()
> > #21 0x0000000000000000 in ?? ()
> > #22 0x0000000000000000 in ?? ()
> > #23 0x0000000000000000 in ?? ()
> > #24 0x0000000000000000 in ?? ()
> > #25 0x0000000000000000 in ?? ()
> > #26 0x0000000000000000 in ?? ()
> > #27 0x0000000000000000 in ?? ()
> > #28 0x0000000000000000 in ?? ()
> > #29 0x0000000000000000 in ?? ()
> > #30 0x0000000000000000 in ?? ()
> > #31 0x0000000000000000 in ?? ()
> > #32 0x0000000000000000 in ?? ()
> > #33 0x0000000000000000 in ?? ()
> > #34 0x0000000000000000 in ?? ()
> > #35 0x0000000000000000 in ?? ()
> > #36 0x0000000000000000 in ?? ()
> > #37 0x0000000000000000 in ?? ()
> > #38 0x0000000000000000 in ?? ()
> > #39 0x0000000000000000 in ?? ()
> > #40 0x0000000000000000 in ?? ()
> > #41 0x0000000000000000 in ?? ()
> > #42 0x0000000000000000 in ?? ()
> > #43 0x0000000000000000 in ?? ()
> > #44 0xffffffff81242880 in tdq_cpu ()
> > #45 0xffffffff81242880 in tdq_cpu ()
> > #46 0xfffffe0006f44000 in ?? ()
> > #47 0x0000000000000000 in ?? ()
> > #48 0xffffff80fa3bc290 in ?? ()
> > #49 0xffffff80fa3bc238 in ?? ()
> > #50 0xfffffe00049a88e0 in ?? ()
> > #51 0xffffffff8091352e in sched_switch (td=3D0xffffffff812228a0,=20
> >     newtd=3D0xfffffe0006c072e0, flags=3DVariable "flags" is not availab=
le.
> > ) at /usr/src/sys/kern/sched_ule.c:1921
> > Previous frame inner to this frame (corrupt stack?)
> > (kgdb)
>=20
> Please, at the kgdb prompt, do
> p/x *(struct vm_object *)0xffffffff81281580 p/x *(struct vm_page=20
> *)0xfffffe00cd733ab0 list *vm_page_free_toq+0x45



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?0C4D65F6A0FC9E4B95EA114508C7E0FE5F66EF0E>