Date: Sun, 23 Aug 1998 16:46:30 -0700 (PDT) From: Matthew Dillon <dillon@backplane.com> To: current@FreeBSD.ORG Subject: kern/7557, More on inode deadlock (was Re: Bizarre deadlock) Message-ID: <199808232346.QAA09537@apollo.backplane.com>
next in thread | raw e-mail | index | archive | help
I have submitted an update to my kern/7557 PR. Except it hasn't seemed to have made it into the system. Oh well.. here's the jist: I managed to get two full debug crash dumps from the inode lockup problem I reported in kern/7557. A pattern has emerged. Specifically, in all crashes a ps shows that two processes are stuck on a busy page and busy bp, as shown below. The key appears to be a deadlock somewhere, and although it isn't these two processes specifically that are deadlocking, I believe they are involved somehow. Together they (I think) hold a shared lock (lockcnt = 2) on the associated inode which locks up the remainder of the system when the system tries to get a lock on that inode. I do not know what has actually busied the bp and vm_page_t involved in these two process's sleep, but I'm guessing that whatever it is is in a deadlock situation waiting for the inode while these processes have a shared lock on the inode and are waiting on the page and bp. If anyone knows the bp/vm system better, perhaps they can figure the deadlock out from here. My test SMP box running -current gets deadlocked once every few days or so from this. -Matt original report: 9896 0xfa87c1c0 0xfa8fc000 8 202 202 000105 3 pgtblk 0xf0f6f78c diablo 9890 0xfa796f00 0xfa84c000 8 202 202 000105 3 getblk 0xf6d16868 diablo new information: nntp3:/var/crash# ps -M vmcore.7 -N kernel.7 -axl | egrep 'pgtblk|getblk' 8 280 198 1 -18 0 44312 0 pgtblk D ?? 0:00.00 (diablo) 8 319 198 1 -2 0 44312 0 getblk D ?? 0:00.00 (diablo) nntp3:/var/crash# ps -M vmcore.6 -N kernel.6 -axl | egrep 'pgtblk|getblk' 8 10400 198 0 -2 0 43780 0 getblk D ?? 0:00.00 (diablo) 8 10419 198 0 -18 0 43788 0 pgtblk D ?? 0:00.00 (diablo) (kgdb) proc 319 (kgdb) back #0 mi_switch () at ../../kern/kern_synch.c:661 #1 0xf0119fb1 in tsleep (ident=0xf6e400b0, priority=0x14, wmesg=0xf013432f "getblk", timo=0x0) at ../../kern/kern_synch.c:435 #2 0xf01343dd in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0, slptimeo=0x0) at ../../kern/vfs_bio.c:1437 #3 0xf01366fb in cluster_read (vp=0xfc24d180, filesize=0x4034b2, lblkno=0xc, size=0x2000, cred=0x0, totread=0x10000, seqcount=0x8, bpp=0xfc10fd60) at ../../kern/vfs_cluster.c:114 #4 0xf01ac721 in ffs_read (ap=0xfc10fe18) at ../../ufs/ufs/ufs_readwrite.c:168 #5 0xf01ad2bd in ffs_getpages (ap=0xfc10fe70) at vnode_if.h:303 #6 0xf01c386a in vnode_pager_getpages (object=0xfc3f0220, m=0xfc10ff1c, count=0x2, reqpage=0x0) at vnode_if.h:1067 #7 0xf01c2587 in vm_pager_get_pages (object=0xfc3f0220, m=0xfc10ff1c, count=0x2, reqpage=0x0) at ../../vm/vm_pager.c:256 #8 0xf01b6f34 in vm_fault (map=0xfc073380, vaddr=0x22897000, fault_type=0x1, fault_flags=0x0) at ../../vm/vm_fault.c:424 #9 0xf01daca2 in trap_pfault (frame=0xfc10ffbc, usermode=0x1) at ../../i386/i386/trap.c:753 #10 0xf01da7e3 in trap (frame={tf_es = 0xefbf0027, tf_ds = 0xfc100027, tf_edi = 0x1, tf_esi = 0x17fea, tf_ebp = 0xefbfd58c, tf_isp = 0xfc10ffe4, tf_ebx = 0x18000, tf_edx = 0x2287f000, tf_ecx = 0x0, tf_eax = 0x9cf7f, tf_trapno = 0xc, tf_err = 0x4, tf_eip = 0x414c, tf_cs = 0x1f, tf_eflags = 0x10297, tf_esp = 0xefbfd520, tf_ss = 0x27}) at ../../i386/i386/trap.c:317 #11 0x414c in ?? () #12 0x276e in ?? () #13 0x1ee1 in ?? () #14 0x1809 in ?? () #15 0x107e in ?? () (kgdb) frame 2 #2 0xf01343dd in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0, slptimeo=0x0) at ../../kern/vfs_bio.c:1437 1437 if (!tsleep(bp, (kgdb) print bp $11 = (struct buf *) 0xf6e400b0 (kgdb) print *bp $12 = { b_hash = { le_next = 0x0, le_prev = 0xf6e2f0f8 }, b_vnbufs = { le_next = 0xf6e4f258, le_prev = 0xfc24d1b0 }, b_freelist = { tqe_next = 0xf6d77f08, tqe_prev = 0xf0202158 }, b_act = { tqe_next = 0x0, tqe_prev = 0xf1ca0e14 }, b_proc = 0x0, b_flags = 0x20800030, b_qindex = 0x0, b_usecount = 0x6, b_error = 0x0, b_bufsize = 0x0, b_bufsize = 0x0, b_bcount = 0x0, b_resid = 0x0, b_dev = 0xffffffff, b_data = 0xf95ae000 <Address 0xf95ae000 out of bounds>, b_kvabase = 0xf95ae000 <Address 0xf95ae000 out of bounds>, b_kvasize = 0x2000, b_lblkno = 0xc, b_blkno = 0xc, b_offset = 0x0000000000018000, b_iodone = 0, b_iodone_chain = 0x0, b_vp = 0xfc24d180, b_dirtyoff = 0x0, b_dirtyend = 0x0, b_rcred = 0x0, b_wcred = 0x0, b_validoff = 0x0, b_validend = 0x0, b_pblkno = 0x9804d0, b_saveaddr = 0x0, b_savekva = 0x0, b_driver1 = 0x0, b_driver2 = 0x0, b_spc = 0x0, b_cluster = { cluster_head = { tqh_first = 0xf6d77f08, tqh_last = 0xf6d850e8 }, cluster_entry = { tqe_next = 0xf6d77f08, tqe_prev = 0xf6d850e8 } }, b_pages = {0x0 <repeats 32 times>}, b_npages = 0x0, b_dep = { lh_first = 0x0 } } (kgdb) proc 280 (kgdb) back #0 mi_switch () at ../../kern/kern_synch.c:661 #1 0xf0119fb1 in tsleep (ident=0xf0e19ba0, priority=0x4, wmesg=0xf01346d2 "pgtblk", timo=0x0) at ../../kern/kern_synch.c:435 #2 0xf0134afa in allocbuf (bp=0xf6e400b0, size=0x2000) at ../../kern/vfs_bio.c:1799 #3 0xf0134612 in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0, slptimeo=0x0) at ../../kern/vfs_bio.c:1557 #4 0xf0136a5e in cluster_read (vp=0xfc24d180, filesize=0x4034b2, lblkno=0xc, size=0x2000, cred=0x0, totread=0xc000, seqcount=0x8, bpp=0xfc0f6d60) at ../../kern/vfs_cluster.c:235 #5 0xf01ac721 in ffs_read (ap=0xfc0f6e18) at ../../ufs/ufs/ufs_readwrite.c:168 #6 0xf01ad2bd in ffs_getpages (ap=0xfc0f6e70) at vnode_if.h:303 #7 0xf01c386a in vnode_pager_getpages (object=0xfc3f0220, m=0xfc0f6f1c, count=0x2, reqpage=0x0) at vnode_if.h:1067 #8 0xf01c2587 in vm_pager_get_pages (object=0xfc3f0220, m=0xfc0f6f1c, count=0x2, reqpage=0x0) at ../../vm/vm_pager.c:256 #9 0xf01b6f34 in vm_fault (map=0xfc0738c0, vaddr=0x22891000, fault_type=0x1, fault_flags=0x0) at ../../vm/vm_fault.c:424 #10 0xf01daca2 in trap_pfault (frame=0xfc0f6fbc, usermode=0x1) at ../../i386/i386/trap.c:753 #11 0xf01da7e3 in trap (frame={tf_es = 0xefbf0027, tf_ds = 0xfc0f0027, tf_edi = 0x1, tf_esi = 0x11fd1, tf_ebp = 0xefbfd58c, tf_isp = 0xfc0f6fe4, tf_ebx = 0x12000, tf_edx = 0x2287f000, tf_ecx = 0x0, tf_eax = 0x9cf7f, tf_trapno = 0xc, tf_err = 0x4, tf_eip = 0x414c, tf_cs = 0x1f, tf_eflags = 0x10297, tf_esp = 0xefbfd520, tf_ss = 0x27}) at ../../i386/i386/trap.c:317 #12 0x414c in ?? () #13 0x276e in ?? () #14 0x1ee1 in ?? () #15 0x1809 in ?? () #16 0x107e in ?? () (kgdb) (kgdb) print bp $14 = (struct buf *) 0xf6e400b0 (this is the same bp) Matthew Dillon Engineering, HiWay Technologies, Inc. & BEST Internet Communications <dillon@backplane.com> (Please include original email in any response) To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-current" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199808232346.QAA09537>