Date: Fri, 1 Feb 2008 14:13:18 +0100 (CET) From: Wojciech Puchar <wojtek@wojtek.tensor.gdynia.pl> To: Andrea Venturoli <ml@netfence.it> Cc: freebsd-questions@freebsd.org Subject: Re: Panics with GEOM Message-ID: <20080201141231.I2159@wojtek.tensor.gdynia.pl> In-Reply-To: <47A31051.7020908@netfence.it> References: <47A31051.7020908@netfence.it>
next in thread | previous in thread | raw e-mail | index | archive | help
> advise where to post if this is not appropriate. > > An amd64/SMP server of mine worked quite reliably for some times. > Now I added two more disks and created a gmirror with them; from that point > on it experienced locks, crash and panics. i use amd64/SMP+gmirror+gstripe+geli and works stable for a long time. problem is probably somewhere else > Since it was a 6.2 at the time, I immediately upgraded to 6.3: this did not > solve, but, at least, the box would reboot and get me a crash dump. > > So here it is: looks like the i/o subsystem has problems, so the two disk > might really be related, but still, I'm not sure. > > > >> # kgdb kernel.debug /var/crash/vmcore.5 >> [GDB will not be able to debug user-mode threads: /usr/lib/libthread_db.so: >> Undefined symbol "ps_pglobal_lookup"] >> GNU gdb 6.1.1 [FreeBSD] >> Copyright 2004 Free Software Foundation, Inc. >> GDB is free software, covered by the GNU General Public License, and you >> are >> welcome to change it and/or distribute copies of it under certain >> conditions. >> Type "show copying" to see the conditions. >> There is absolutely no warranty for GDB. Type "show warranty" for details. >> This GDB was configured as "amd64-marcel-freebsd". >> >> Unread portion of the kernel message buffer: >> >> >> Fatal trap 12: page fault while in kernel mode >> cpuid = 1; apic id = 01 >> fault virtual address = 0x50006 >> fault code = supervisor read data, page not present >> instruction pointer = 0x8:0xffffffff8020e076 >> stack pointer = 0x10:0xffffffffa831c7a0 >> frame pointer = 0x10:0xffffffffa831c7e0 >> code segment = base 0x0, limit 0xfffff, type 0x1b >> = DPL 0, pres 1, long 1, def32 0, gran 1 >> processor eflags = interrupt enabled, resume, IOPL = 0 >> current process = 88587 (clamscan) >> trap number = 12 >> panic: page fault >> cpuid = 1 >> Uptime: 3d8h32m39s >> Dumping 1023 MB (2 chunks) >> chunk 0: 1MB (151 pages) ... ok >> chunk 1: 1023MB (261744 pages) 1007 991 975 959 943 >> <110>ipfw: 65534 Deny TCP 192.168.101.1:58319 192.168.101.4:54663 in via >> fxp0 >> <110>ipfw: 65534 Deny TCP 192.168.101.1:58319 192.168.101.4:54663 in via >> fxp0 >> 927 911 895 879 863 847 831 815 799 783 767 751 735 719 703 687 671 655 >> 639 623 607 591 575 559 543 527 511 495 479 463 447 431 415 399 383 367 351 >> 335 319 303 287 271 255 239 223 207 191 175panic: ahd_run_qoutfifo >> recursion >> cpuid = 1 >> 159 143 127 111 95 79 63 47 31 15 >> >> #0 doadump () at pcpu.h:172 >> 172 __asm __volatile("movq %%gs:0,%0" : "=r" (td)); >> (kgdb) bt >> #0 doadump () at pcpu.h:172 >> #1 0xffffffff80257115 in boot (howto=260) at >> /usr/src/sys/kern/kern_shutdown.c:409 >> #2 0xffffffff80257825 in panic (fmt=0xffffff00110e9980 "X\023\021\021") at >> /usr/src/sys/kern/kern_shutdown.c:565 >> #3 0xffffffff803b50c6 in trap_fatal (frame=0xc, eva=18446742974484093312) >> at /usr/src/sys/amd64/amd64/trap.c:669 >> #4 0xffffffff803b546d in trap_pfault (frame=0xffffffffa831c6f0, >> usermode=0) at /usr/src/sys/amd64/amd64/trap.c:580 >> #5 0xffffffff803b56cd in trap (frame= >> {tf_rdi = -1098891843040, tf_rsi = -1098516260992, tf_rdx = >> -1098692440992, tf_rcx = 1, tf_r8 = 0, tf_r9 = 327686, tf_rax = 2048, >> tf_rbx = -1098891843040, tf_rbp = -1473132576, tf_r10 = -1098978658048, >> tf_r11 = -1098938410752, tf_r12 = -1098516260992, tf_r13 = 327686, tf_r14 = >> -1098891842864, tf_r15 = -1705935624, tf_trapno = 12, tf_addr = 327686, >> tf_flags = 1108101564416, tf_err = 0, tf_rip = -2145329034, tf_cs = 8, >> tf_rflags = 66178, tf_rsp = -1473132624, tf_ss = 16}) at >> /usr/src/sys/amd64/amd64/trap.c:353 >> #6 0xffffffff8039c49b in calltrap () at >> /usr/src/sys/amd64/amd64/exception.S:168 >> #7 0xffffffff8020e076 in g_io_request (bp=0xffffff0024f12a20, >> cp=0xffffff003b541780) at /usr/src/sys/geom/geom_io.c:275 >> #8 0xffffffff803709ad in ufs_strategy (ap=0xffffff0024f12a20) at >> /usr/src/sys/ufs/ufs/ufs_vnops.c:1973 >> #9 0xffffffff803e5549 in VOP_STRATEGY_APV (vop=0xffffffff805702c0, >> a=0xffffffffa831c840) at vnode_if.c:1796 >> #10 0xffffffff802b510c in bufstrategy (bo=0xffffff0024f12a20, >> bp=0xffffff003b541780) at vnode_if.h:928 >> #11 0xffffffff802b4575 in breadn (vp=0xffffff000a55eba0, >> blkno=-1098516260992, size=819186784, rablkno=0x0, rabsize=0x0, cnt=0, >> cred=0x0, bpp=0x800) at buf.h:426 >> #12 0xffffffff802b48fe in bread (vp=0xffffff0024f12a20, >> blkno=-1098516260992, size=819186784, cred=0x1, bpp=0x0) at >> /usr/src/sys/kern/vfs_bio.c:723 >> #13 0xffffffff80363886 in ffs_read (ap=0xffffff0024f12a20) at >> /usr/src/sys/ufs/ffs/ffs_vnops.c:523 >> #14 0xffffffff803e3efa in VOP_READ_APV (vop=0x800, a=0xffffff003b541780) at >> vnode_if.c:643 >> #15 0xffffffff80370649 in ufs_readdir (ap=0xffffffffa831cad0) at >> vnode_if.h:343 >> #16 0xffffffff803e419d in VOP_READDIR_APV (vop=0x800, a=0xffffff003b541780) >> at vnode_if.c:1427 >> #17 0xffffffff802d0657 in getdirentries (td=0xffffff00110e9980, >> uap=0xffffffffa831cbc0) at vnode_if.h:746 >> #18 0xffffffff803b6052 in syscall (frame= >> {tf_rdi = 4, tf_rsi = 58564608, tf_rdx = 4096, tf_rcx = 58550056, >> tf_r8 = 0, tf_r9 = 140737488347784, tf_rax = 196, tf_rbx = 58550016, tf_rbp >> = 58550016, tf_r10 = 34367908128, tf_r11 = 58626048, tf_r12 = 5320784, >> tf_r13 = 58550016, tf_r14 = 58540768, tf_r15 = 3, tf_trapno = 12, tf_addr = >> 34365898752, tf_flags = 31845, tf_err = 2, tf_rip = 34377717596, tf_cs = >> 43, tf_rflags = 582, tf_rsp = 140737488348456, tf_ss = 35}) at >> /usr/src/sys/amd64/amd64/trap.c:807 >> #19 0xffffffff8039c698 in Xfast_syscall () at >> /usr/src/sys/amd64/amd64/exception.S:287 >> #20 0x000000080112575c in ?? () >> Previous frame inner to this frame (corrupt stack?) > > > I guess what I should look into is this (see the arrow): > >> (kgdb) list >> 270 KASSERT(bp->bio_length % cp->provider->sectorsize > == 0, >> 271 ("wrong length %jd for sectorsize %u", >> 272 bp->bio_length, cp->provider->sectorsize)); >> 273 } >> 274 >> 275 ------->>> g_trace(G_T_BIO, "bio_request(%p) from %p(%s) to %p(%s) cmd > %d", >> 276 bp, cp, cp->geom->name, pp, pp->name, bp->bio_cmd); >> 277 >> 278 bp->bio_from = cp; >> 279 bp->bio_to = pp; > >> (kgdb) p bp >> $9 = (struct bio *) 0xffffff0024f12a20 >> (kgdb) p cp >> $10 = (struct g_consumer *) 0xffffff003b541780 >> (kgdb) p pp >> $11 = (struct g_provider *) 0x50006 >> (kgdb) p *bp >> $12 = {bio_cmd = 1 '\001', bio_flags = 0 '\0', bio_cflags = 0 '\0', >> bio_pflags = 0 '\0', bio_dev = 0x0, bio_disk = 0x0, bio_offset = 6160384, >> bio_bcount = 0, >> bio_data = 0xffffffff9dfdc000 "MANT.shx\220@?\024\"\200~@", bio_error = >> 0, bio_resid = 0, bio_done = 0xffffffff802110c0 <g_vfs_done>, bio_driver1 = >> 0x0, bio_driver2 = 0x0, bio_caller1 = 0x0, >> bio_caller2 = 0xffffffff9a517cf8, bio_queue = {tqe_next = 0x0, tqe_prev = >> 0x0}, bio_attribute = 0x0, bio_from = 0x0, bio_to = 0x0, bio_length = 2048, >> bio_completed = 0, bio_children = 0, >> bio_inbed = 0, bio_parent = 0x0, bio_t0 = {sec = 0, frac = 0}, bio_task = >> 0, bio_task_arg = 0x0, bio_pblkno = 0} >> (kgdb) p *cp >> $13 = {geom = 0xffffff0030d3cc60, consumer = {le_next = 0x0, le_prev = >> 0xffffffff9a6718f8}, provider = 0xd0006, consumers = {le_next = >> 0xffffff003b541380, le_prev = 0xffffff002e4072a0}, >> acr = 680, acw = 0, ace = 6531640, spoiled = 0, stat = 0x0, nstart = 0, >> nend = 0, private = 0xffffff0002f8b3c0, index = 995365760} >> (kgdb) p *pp >> Cannot access memory at address 0x50006 >> (kgdb) > > > Can anyone provide some insight? > > > > bye & Thanks > av. > > _______________________________________________ > freebsd-questions@freebsd.org mailing list > http://lists.freebsd.org/mailman/listinfo/freebsd-questions > To unsubscribe, send any mail to "freebsd-questions-unsubscribe@freebsd.org" > >
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20080201141231.I2159>