Date: Sun, 23 Aug 1998 16:46:30 -0700 (PDT) From: Matthew Dillon <dillon@backplane.com> To: current@FreeBSD.ORG Subject: kern/7557, More on inode deadlock (was Re: Bizarre deadlock) Message-ID: <199808232346.QAA09537@apollo.backplane.com>
next in thread | raw e-mail | index | archive | help
I have submitted an update to my kern/7557 PR. Except it hasn't
seemed to have made it into the system. Oh well.. here's the
jist:
I managed to get two full debug crash dumps from the inode
lockup problem I reported in kern/7557. A pattern has
emerged. Specifically, in all crashes a ps shows that
two processes are stuck on a busy page and busy bp, as shown
below.
The key appears to be a deadlock somewhere, and although it isn't
these two processes specifically that are deadlocking, I believe
they are involved somehow. Together they (I think) hold a shared
lock (lockcnt = 2) on the associated inode which locks up the
remainder of the system when the system tries to get a lock on
that inode. I do not know what has actually busied the bp and
vm_page_t involved in these two process's sleep, but I'm guessing
that whatever it is is in a deadlock situation waiting for the
inode while these processes have a shared lock on the inode and
are waiting on the page and bp.
If anyone knows the bp/vm system better, perhaps they can figure
the deadlock out from here. My test SMP box running -current
gets deadlocked once every few days or so from this.
-Matt
original report:
9896 0xfa87c1c0 0xfa8fc000 8 202 202 000105 3 pgtblk 0xf0f6f78c diablo
9890 0xfa796f00 0xfa84c000 8 202 202 000105 3 getblk 0xf6d16868 diablo
new information:
nntp3:/var/crash# ps -M vmcore.7 -N kernel.7 -axl | egrep 'pgtblk|getblk'
8 280 198 1 -18 0 44312 0 pgtblk D ?? 0:00.00 (diablo)
8 319 198 1 -2 0 44312 0 getblk D ?? 0:00.00 (diablo)
nntp3:/var/crash# ps -M vmcore.6 -N kernel.6 -axl | egrep 'pgtblk|getblk'
8 10400 198 0 -2 0 43780 0 getblk D ?? 0:00.00 (diablo)
8 10419 198 0 -18 0 43788 0 pgtblk D ?? 0:00.00 (diablo)
(kgdb) proc 319
(kgdb) back
#0 mi_switch () at ../../kern/kern_synch.c:661
#1 0xf0119fb1 in tsleep (ident=0xf6e400b0, priority=0x14,
wmesg=0xf013432f "getblk", timo=0x0) at ../../kern/kern_synch.c:435
#2 0xf01343dd in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0,
slptimeo=0x0) at ../../kern/vfs_bio.c:1437
#3 0xf01366fb in cluster_read (vp=0xfc24d180, filesize=0x4034b2, lblkno=0xc,
size=0x2000, cred=0x0, totread=0x10000, seqcount=0x8, bpp=0xfc10fd60)
at ../../kern/vfs_cluster.c:114
#4 0xf01ac721 in ffs_read (ap=0xfc10fe18) at ../../ufs/ufs/ufs_readwrite.c:168
#5 0xf01ad2bd in ffs_getpages (ap=0xfc10fe70) at vnode_if.h:303
#6 0xf01c386a in vnode_pager_getpages (object=0xfc3f0220, m=0xfc10ff1c,
count=0x2, reqpage=0x0) at vnode_if.h:1067
#7 0xf01c2587 in vm_pager_get_pages (object=0xfc3f0220, m=0xfc10ff1c,
count=0x2, reqpage=0x0) at ../../vm/vm_pager.c:256
#8 0xf01b6f34 in vm_fault (map=0xfc073380, vaddr=0x22897000, fault_type=0x1,
fault_flags=0x0) at ../../vm/vm_fault.c:424
#9 0xf01daca2 in trap_pfault (frame=0xfc10ffbc, usermode=0x1)
at ../../i386/i386/trap.c:753
#10 0xf01da7e3 in trap (frame={tf_es = 0xefbf0027, tf_ds = 0xfc100027,
tf_edi = 0x1, tf_esi = 0x17fea, tf_ebp = 0xefbfd58c,
tf_isp = 0xfc10ffe4, tf_ebx = 0x18000, tf_edx = 0x2287f000,
tf_ecx = 0x0, tf_eax = 0x9cf7f, tf_trapno = 0xc, tf_err = 0x4,
tf_eip = 0x414c, tf_cs = 0x1f, tf_eflags = 0x10297, tf_esp = 0xefbfd520,
tf_ss = 0x27}) at ../../i386/i386/trap.c:317
#11 0x414c in ?? ()
#12 0x276e in ?? ()
#13 0x1ee1 in ?? ()
#14 0x1809 in ?? ()
#15 0x107e in ?? ()
(kgdb) frame 2
#2 0xf01343dd in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0,
slptimeo=0x0) at ../../kern/vfs_bio.c:1437
1437 if (!tsleep(bp,
(kgdb) print bp
$11 = (struct buf *) 0xf6e400b0
(kgdb) print *bp
$12 = {
b_hash = {
le_next = 0x0,
le_prev = 0xf6e2f0f8
},
b_vnbufs = {
le_next = 0xf6e4f258,
le_prev = 0xfc24d1b0
},
b_freelist = {
tqe_next = 0xf6d77f08,
tqe_prev = 0xf0202158
},
b_act = {
tqe_next = 0x0,
tqe_prev = 0xf1ca0e14
},
b_proc = 0x0,
b_flags = 0x20800030,
b_qindex = 0x0,
b_usecount = 0x6,
b_error = 0x0,
b_bufsize = 0x0,
b_bufsize = 0x0,
b_bcount = 0x0,
b_resid = 0x0,
b_dev = 0xffffffff,
b_data = 0xf95ae000 <Address 0xf95ae000 out of bounds>,
b_kvabase = 0xf95ae000 <Address 0xf95ae000 out of bounds>,
b_kvasize = 0x2000,
b_lblkno = 0xc,
b_blkno = 0xc,
b_offset = 0x0000000000018000,
b_iodone = 0,
b_iodone_chain = 0x0,
b_vp = 0xfc24d180,
b_dirtyoff = 0x0,
b_dirtyend = 0x0,
b_rcred = 0x0,
b_wcred = 0x0,
b_validoff = 0x0,
b_validend = 0x0,
b_pblkno = 0x9804d0,
b_saveaddr = 0x0,
b_savekva = 0x0,
b_driver1 = 0x0,
b_driver2 = 0x0,
b_spc = 0x0,
b_cluster = {
cluster_head = {
tqh_first = 0xf6d77f08,
tqh_last = 0xf6d850e8
},
cluster_entry = {
tqe_next = 0xf6d77f08,
tqe_prev = 0xf6d850e8
}
},
b_pages = {0x0 <repeats 32 times>},
b_npages = 0x0,
b_dep = {
lh_first = 0x0
}
}
(kgdb) proc 280
(kgdb) back
#0 mi_switch () at ../../kern/kern_synch.c:661
#1 0xf0119fb1 in tsleep (ident=0xf0e19ba0, priority=0x4,
wmesg=0xf01346d2 "pgtblk", timo=0x0) at ../../kern/kern_synch.c:435
#2 0xf0134afa in allocbuf (bp=0xf6e400b0, size=0x2000)
at ../../kern/vfs_bio.c:1799
#3 0xf0134612 in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0,
slptimeo=0x0) at ../../kern/vfs_bio.c:1557
#4 0xf0136a5e in cluster_read (vp=0xfc24d180, filesize=0x4034b2, lblkno=0xc,
size=0x2000, cred=0x0, totread=0xc000, seqcount=0x8, bpp=0xfc0f6d60)
at ../../kern/vfs_cluster.c:235
#5 0xf01ac721 in ffs_read (ap=0xfc0f6e18) at ../../ufs/ufs/ufs_readwrite.c:168
#6 0xf01ad2bd in ffs_getpages (ap=0xfc0f6e70) at vnode_if.h:303
#7 0xf01c386a in vnode_pager_getpages (object=0xfc3f0220, m=0xfc0f6f1c,
count=0x2, reqpage=0x0) at vnode_if.h:1067
#8 0xf01c2587 in vm_pager_get_pages (object=0xfc3f0220, m=0xfc0f6f1c,
count=0x2, reqpage=0x0) at ../../vm/vm_pager.c:256
#9 0xf01b6f34 in vm_fault (map=0xfc0738c0, vaddr=0x22891000, fault_type=0x1,
fault_flags=0x0) at ../../vm/vm_fault.c:424
#10 0xf01daca2 in trap_pfault (frame=0xfc0f6fbc, usermode=0x1)
at ../../i386/i386/trap.c:753
#11 0xf01da7e3 in trap (frame={tf_es = 0xefbf0027, tf_ds = 0xfc0f0027,
tf_edi = 0x1, tf_esi = 0x11fd1, tf_ebp = 0xefbfd58c,
tf_isp = 0xfc0f6fe4, tf_ebx = 0x12000, tf_edx = 0x2287f000,
tf_ecx = 0x0, tf_eax = 0x9cf7f, tf_trapno = 0xc, tf_err = 0x4,
tf_eip = 0x414c, tf_cs = 0x1f, tf_eflags = 0x10297, tf_esp = 0xefbfd520,
tf_ss = 0x27}) at ../../i386/i386/trap.c:317
#12 0x414c in ?? ()
#13 0x276e in ?? ()
#14 0x1ee1 in ?? ()
#15 0x1809 in ?? ()
#16 0x107e in ?? ()
(kgdb)
(kgdb) print bp
$14 = (struct buf *) 0xf6e400b0 (this is the same bp)
Matthew Dillon Engineering, HiWay Technologies, Inc. & BEST Internet
Communications
<dillon@backplane.com> (Please include original email in any response)
To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199808232346.QAA09537>
