Date: Mon, 27 Jun 2005 15:41:20 -0500 From: dave baukus <dbaukus@chiaro.com> To: freebsd-scsi@freebsd.org Subject: iur crash ? -- long Message-ID: <42C06470.9080700@chiaro.com>
next in thread | raw e-mail | index | archive | help
I have a crash on BSD4.10 w/ a heavily modified network stack, but the disk/scsi subsystem is ostensively unmodified. I'm reasonably certain that the crash is caused by iir_intr(void *arg) passing 0xcOdedead to bcopy() as a length. We have INVARIANTS enabled. The quick question is: has this been seen ? The long question goes like this: ---------------------- Here's the stack trace: #0 dumpsys () at ../../kern/kern_shutdown.c:519 #1 0xc01e7253 in boot (howto=0x100) at ../../kern/kern_shutdown.c:331 #2 0xc01e76cb in panic (fmt=0xc04cb120 "vm_fault: fault on nofault entry, addr: %lx") at ../../kern/kern_shutdown.c:635 #3 0xc03c810c in vm_fault (map=0xc059fa78, vaddr=0xf5315000, fault_type=0x1, fault_flags=0x0) at ../../vm/vm_fault.c:240 #4 0xc041ca5e in trap_pfault (frame=0xf8225d90, usermode=0x0, eva=0xf5315000) at ../../i386/i386/trap.c:921 #5 0xc041c5e0 in trap (frame={tf_fs = 0xc01e0010, tf_es = 0xffff0010, tf_ds = 0xcb3c0010, tf_edi = 0xcbaeb52e, tf_esi = 0xf5315000, tf_ebp = 0xf8225e0c, tf_isp = 0xf8225dbc, tf_ebx = 0xc0dedead, tf_edx = 0xf5222b20, tf_ecx = 0x3033ee73, tf_eax = 0xd67d652e, tf_trapno = 0xc, tf_err = 0x0, tf_eip = 0xc041b502, tf_cs = 0x8, tf_eflags = 0x10202, tf_esp = 0xcadf9000, tf_ss = 0xcb9f9000}) at ../../i386/i386/trap.c:500 #6 0xc041b502 in generic_bcopy () #7 0xc04252fd in intr_mux (arg=x) at ../../i386/isa/intr_machdep.c:609 #8 0xc040e74e in vec9 () #9 0xc01df10b in exit1 (p=0xf80c88a0, rv=0xf) at ../../kern/kern_exit.c:225 #10 0xc01e9262 in sigexit (p=0xf80c88a0, sig=0xf) at ../../kern/kern_sig.c:1519 #11 0xc01e8fa4 in postsig (sig=0xf) at ../../kern/kern_sig.c:1422 #12 0xc041d254 in syscall2 (frame={tf_fs = 0xbfbf002f, tf_es = 0x8fe002f, tf_ds = 0xbfbf002f, tf_edi = 0xbfbff800, tf_esi = 0x8fe5540, tf_ebp = 0xbfbff800, tf_isp = 0xf8225fd4, tf_ebx = 0x64, tf_edx = 0xbfbff780, tf_ecx = 0xbfbff700, tf_eax = 0x4, tf_trapno = 0x7, tf_err = 0x2, tf_eip = 0x88fe6ef4, tf_cs = 0x1f, tf_eflags = 0x203, tf_esp = 0xbfbff604, tf_ss = 0x2f}) at ../../i386/i386/trap.c:177 -------------------------------- Since intr_mux() does not call generic_bcopy()/bcopy(), the stack frames must be a mangled. The frames 4 to 7 look like: 0xf8225d10: 0xf8225d40 0xc01eabd3 0xffffffff frame 4 0xf8225d44 trap_pfault 0xf8225d20: 0xc041ca5e 0xc059fa78 0xf5315000 0x00000001 0xf8225d30: 0x00000000 0x0000000c 0xf80c88a0 0xf5315000 frame 5 trap 0xf8225d40: 0xcb3cdf01 0xf8225d88 0xc041c5e0 0xf8225d90 0xf8225d50: 0x00000000 0xf5315000 0x006c0200 0xf5315000 0xf8225d60: 0xcbaeb52e 0x00000000 0xc0dedead 0x00000000 0xf8225d70: 0xcbae4e5a 0xf8225d80 0xc0421783 0xc04217e9 calltrap 0xf8225d80: 0xf8225e0c 0xc040dcb4 0xf8225e0c 0xc040d0c0 0xf8225d90: 0xc01e0010 0xffff0010 0xcb3c0010 0xcbaeb52e 0xf8225da0: 0xf5315000 0xf8225e0c 0xf8225dbc 0xc0dedead 0xf8225db0: 0xf5222b20 0x3033ee73 0xd67d652e 0x0000000c 0xf8225dc0: 0x00000000 0xc041b502 0x00000008 0x00010202 0xf8225dd0: 0xcadf9000 0xcb9f9000 0xc01b7e11 0xf5222b20 0xf8225de0: 0xcb9f904e 0xc0dedead 0xc6048460 0x00400200 0xf8225df0: 0x00000000 0xf5222b20 0x006c0200 0x00000000 0xf8225e00: 0x00000000 0x00090001 0x006fc67b frame 7 0xf8225e24 intr_mux 0xf8225e10: 0xc04252fd 0xcadf9000 0x006c0200 0x00000000 0xf8225e20: 0xc2ee216c 0xf8225e90 0xc040e1d7 0xc6048460 ------------------------------------ It's between frame 7 (intr_mux()) and frame 5 (trap()), that I begin guessing at the sequence of events. Based on the 0xcadf9000 at 0xf8225e14 I speculate that iir_intr was the last interrupt routine called. Here is the intrec * list passed to intr_mux() set $P=(intrec *)0xc6048460 (kgdb) intrecwalk $P $186 = {mask = 0x6c0200, handler = 0xc01b7b90 <iir_intr>, argument = 0xcadf9000, next = 0xc60483e0, name = 0xcadf3b80 "iir0", intr = 0x9, maskptr = 0xc0552b54, flags = 0x0} $187 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcadfc000, next = 0xc6048260, name = 0xcadf3a60 "em0", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $188 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcadfd000, next = 0xcae01e60, name = 0xcadf3950 "em1", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $189 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcadff000, next = 0xcae01ce0, name = 0xcadf3810 "em2", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $190 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcae03000, next = 0xcae01ba0, name = 0xcadf3700 "em3", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $191 = {mask = 0x630212, handler = 0xc03b6570 <ics_intr>, argument = 0x0, next = 0xcae01b00, name = 0xcadf3620 "ics0", intr = 0x9, maskptr = 0xc0552b48, flags = 0x0} $192 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcae04000, next = 0xcae01a00, name = 0xcadf3520 "em4", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $193 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcae06000, next = 0xcae01880, name = 0xcadf3410 "em5", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $194 = {mask = 0x660200, handler = 0xc03ef350 <em_intr>, argument = 0xcae07000, next = 0xcae01760, name = 0xcadf3300 "em6", intr = 0x9, maskptr = 0xc0552b50, flags = 0x0} $195 = {mask = 0x68c640, handler = 0xc03d74b0 <uhci_intr>, argument = 0xcae09000, next = 0x0, name = 0xcadf32c0 "uhci0", intr = 0x9, maskptr = 0xc0552b4c, flags = 0x0} ----------------------------------------------- Since I think its iir_intr that was called, I poke around in the stack frames between frame 7 and 5. At 0xf8225dd8 I see the value 0xc01b7e11 (kgdb) x 0xc01b7e11 0xc01b7e11 <iir_intr+641>: 0x830cc483 (kgdb) disass <iir_intr+641>: ... ... ... 0xc01b7dff <iir_intr+623>: test %ebx,%ebx 0xc01b7e01 <iir_intr+625>: je 0xc01b7e14 <iir_intr+644> 0xc01b7e03 <iir_intr+627>: push %ebx 0xc01b7e04 <iir_intr+628>: lea 0x4e(%esi),%eax 0xc01b7e07 <iir_intr+631>: push %eax 0xc01b7e08 <iir_intr+632>: mov 0xffffffe8(%ebp),%edx 0xc01b7e0b <iir_intr+635>: push %edx 0xc01b7e0c <iir_intr+636>: call 0xc041b4d8 <bcopy> 0xc01b7e11 <iir_intr+641>: add $0xc,%esp 0xc01b7e14 <iir_intr+644>: cmpl $0x0,0x42(%esi) ----------------------------------------------------- Therefore, 0xcadf9000 is the struct gdt_softc * argument to iir_intr() (kgdb) set $SC=(struct gdt_softc *)0xcadf9000 (kgdb) p *$SC $217 = {sc_hanum = 0x0, sc_class = 0x5, sc_bus = 0x4, sc_slot = 0x8, sc_device = 0x600, sc_subdevice = 0x1af, sc_fw_vers = 0x22a, sc_init_level = 0x6, sc_state = 0x0, sc_dev = 0xcadf4000, sc_dpmemt = 0x1, sc_dpmemh = 0xf31c7000, sc_dpmembase = 0xf8000000, sc_parent_dmat = 0xcadfad00, sc_buffer_dmat = 0xcadfacc0, sc_gccb_dmat = 0xcadfac80, sc_gccb_dmamap = 0x0, sc_gccb_busbase = 0x1d000, sc_gccbs = 0xf51c7000, sc_free_gccb = { slh_first = 0xf51e1860}, sc_pending_gccb = {slh_first = 0xf5211440}, sc_ccb_queue = {tqh_first = 0x0, tqh_last = 0xcadf9050}, sc_ucmd_queue = {tqh_first = 0x0, tqh_last = 0xcadf9058}, sc_ic_all_size = 0x2fc0, sc_cmd_len = 0x24, sc_cmd_off = 0x24, sc_cmd_cnt = 0x1, sc_cmd = "\000\000\000\000d\000\000\000\002\000\000\000¿#\235\000\200\000\000\000ÿÿÿÿ\001\000\000\000\000\000\006â\000\000\001", '\000' <repeats 288 times>, sc_info = 0x0, sc_info2 = 0x0, sc_status = 0x1000, sc_service = 0x0, sc_bus_cnt = 0x3, sc_virt_bus = 0x2, sc_bus_id = "\a\a\000\000\000", sc_more_proc = 0x0, sc_hdr = {{ hd_present = 0x1, hd_is_logdrv = 0x0, hd_is_arraydrv = 0x0, hd_is_master = 0x0, hd_is_parity = 0x0, hd_is_hotfix = 0x0, hd_master_no = 0x0, hd_lock = 0x0, hd_heads = 0xff, hd_secs = 0x3f, hd_devtype = 0x0, hd_size = 0x88efe6a, hd_ldr_no = 0x0, hd_rw_attribs = 0x0, hd_start_sec = 0x0}, {hd_present = 0x0, hd_is_logdrv = 0x0, hd_is_arraydrv = 0x0, hd_is_master = 0x0, hd_is_parity = 0x0, hd_is_hotfix = 0x0, hd_master_no = 0x0, hd_lock = 0x0, hd_heads = 0x0, hd_secs = 0x0, hd_devtype = 0x0, hd_size = 0x0, hd_ldr_no = 0x0, hd_rw_attribs = 0x0, hd_start_sec = 0x0} <repeats 99 times>}, sc_raw_feat = 0x1, sc_cache_feat = 0x101, sc_dvr = {size = 0x0, eu = {stream = '\000' <repeats 15 times>, driver = {ionode = 0x0, service = 0x0, index = 0x0}, async = {ionode = 0x0, service = 0x0, status = 0x0, info = 0x0, scsi_coord = "\000\000"}, sync = {ionode = 0x0, service = 0x0, status = 0x0, info = 0x0, hostdrive = 0x0, scsi_coord = "\000\000", sense_key = 0x0}, test = {l1 = 0x0, l2 = 0x0, l3 = 0x0, l4 = 0x0}}, severity = 0x0, event_string = '\000' <repeats 255 times>}, sims = {0xcadfac00, 0xcadfab40, 0xcadfaa80, 0x0, 0x0, 0x0}, paths = { 0xcadf3c20, 0xcadf3bf0, 0xcadf3bc0, 0x0, 0x0, 0x0}, sc_copy_cmd = 0xc01b90d4 <gdt_mpr_copy_cmd>, sc_get_status = 0xc01b9190 <gdt_mpr_get_status>, sc_intr = 0xc01b91b4 <gdt_mpr_intr>, sc_release_event = 0xc01b92d0 <gdt_mpr_release_event>, sc_set_sema0 = 0xc01b92f0 <gdt_mpr_set_sema0>, sc_test_busy = 0xc01b9310 <gdt_mpr_test_busy>, links = {tqe_next = 0x0, tqe_prev = 0xc04ffe80}} ----------------------------------------------------- Now I try to figure which iir_intr() code path was executed. Only the case GDT_GCF_IOCTL: code path leads to a bcopy(). I walked all the struct gdt_ccb * in gdt->sc_gccbs[], Only 1 has a non-zero gccb->gc_flags value; its value is 4 (GDT_GCF_IOCTL) (kgdb) set $SCBS=(struct gdt_ccb *)&$SC->sc_gccbs[121] (kgdb) p *$SCBS $218 = {gc_scratch = "\001\000\0013", '\000' <repeats 3067 times>, gc_ccb = 0xcb16c400, gc_ucmd = 0xcb9f9000, gc_dmamap = 0x0, gc_map_flag = 0x1, gc_timeout = 0x0, gc_state = 0x0, gc_service = 0x9, gc_cmd_index = 0x7b, gc_flags = 0x4, sle = {sle_next = 0x0}} --------------------------------------- Down to the bcopy(): the bcopy() decission is made off of values in gc_ucmd, and nothing good can come from using most of these values: (kgdb) set $UCMD=(gdt_ucmd_t *)$SCBS->gc_ucmd (kgdb) p *$UCMD $219 = {io_node = 0xc0de, service = 0xdead, timeout = 0xc05076a0, status = 0x1, info = 0x0, BoardNode = 0xc0ded8b2, CommandIndex = 0xc0dedead, OpCode = 0xdead, u = {cache = {DeviceNo = 0xc0de, BlockNo = 0xc0dedead, BlockCnt = 0xc0dedead, DestAddr = 0xc0dedead}, ioctl = {param_size = 0xc0de, subfunc = 0xc0dedead, channel = 0xc0dedead, p_param = 0xc0dedead}, raw = {reserved = 0xc0de, direction = 0xc0dedead, mdisc_time = 0xc0dedead, mcon_time = 0xc0dedead, sdata = 0xc0dedead, sdlen = 0xc0dedead, clen = 0xc0dedead, cmd = "ÞÞÀÞÞÀÞÞÀ", target = 0xad, lun = 0xde, bus = 0x1, priority = 0x0, sense_len = 0x0, sense_data = 0x0, link_p = 0x10}}, data = "\001\000\0013", '\000' <repeats 3067 times>, complete_flag = 0xcb16c400, links = { tqe_next = 0xcb9f9000, tqe_prev = 0x0}} ---------------------------- Not knowing anything about iir/scsi, it appears to me that gdt->sc_gccbs[121]->gc_ucmd has been freed and yet is still referenced and in use. How is this ddt_ucmd_t * gc_ucmd data managed ? Is it actively malloc()ed and free()d ? Any clues or pointers will be appreciated. -- Dave Baukus dbaukus@chiaro.com Chiaro Networks Ltd. Richardson, Texas USA
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?42C06470.9080700>