Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 23 Aug 1998 16:46:30 -0700 (PDT)
From:      Matthew Dillon <dillon@backplane.com>
To:        current@FreeBSD.ORG
Subject:   kern/7557,  More on inode deadlock (was Re: Bizarre deadlock)
Message-ID:  <199808232346.QAA09537@apollo.backplane.com>

next in thread | raw e-mail | index | archive | help
    I have submitted an update to my kern/7557 PR.  Except it hasn't 
    seemed to have made it into the system.  Oh well.. here's the
    jist:

    I managed to get two full debug crash dumps from the inode
    lockup problem I reported in kern/7557.   A pattern has
    emerged.  Specifically, in all crashes a ps shows that
    two processes are stuck on a busy page and busy bp, as shown
    below.  

    The key appears to be a deadlock somewhere, and although it isn't
    these two processes specifically that are deadlocking, I believe
    they are involved somehow.  Together they (I think) hold a shared
    lock (lockcnt = 2) on the associated inode which locks up the
    remainder of the system when the system tries to get a lock on
    that inode.  I do not know what has actually busied the bp and
    vm_page_t involved in these two process's sleep, but I'm guessing
    that whatever it is is in a deadlock situation waiting for the 
    inode while these processes have a shared lock on the inode and
    are waiting on the page and bp. 

    If anyone knows the bp/vm system better, perhaps they can figure
    the deadlock out from here.  My test SMP box running -current
    gets deadlocked once every few days or so from this.

					-Matt

original report:
    9896 0xfa87c1c0 0xfa8fc000    8   202   202 000105  3  pgtblk 0xf0f6f78c diablo
    9890 0xfa796f00 0xfa84c000    8   202   202 000105  3  getblk 0xf6d16868 diablo

new information:

nntp3:/var/crash# ps -M vmcore.7 -N kernel.7 -axl | egrep 'pgtblk|getblk'
    8   280   198   1 -18  0 44312    0 pgtblk D     ??    0:00.00  (diablo)
    8   319   198   1  -2  0 44312    0 getblk D     ??    0:00.00  (diablo)

nntp3:/var/crash# ps -M vmcore.6 -N kernel.6 -axl | egrep 'pgtblk|getblk'
    8 10400   198   0  -2  0 43780    0 getblk D     ??    0:00.00  (diablo)
    8 10419   198   0 -18  0 43788    0 pgtblk D     ??    0:00.00  (diablo)

(kgdb) proc 319
(kgdb) back
#0  mi_switch () at ../../kern/kern_synch.c:661
#1  0xf0119fb1 in tsleep (ident=0xf6e400b0, priority=0x14, 
    wmesg=0xf013432f "getblk", timo=0x0) at ../../kern/kern_synch.c:435
#2  0xf01343dd in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0, 
    slptimeo=0x0) at ../../kern/vfs_bio.c:1437
#3  0xf01366fb in cluster_read (vp=0xfc24d180, filesize=0x4034b2, lblkno=0xc, 
    size=0x2000, cred=0x0, totread=0x10000, seqcount=0x8, bpp=0xfc10fd60)
    at ../../kern/vfs_cluster.c:114
#4  0xf01ac721 in ffs_read (ap=0xfc10fe18) at ../../ufs/ufs/ufs_readwrite.c:168
#5  0xf01ad2bd in ffs_getpages (ap=0xfc10fe70) at vnode_if.h:303
#6  0xf01c386a in vnode_pager_getpages (object=0xfc3f0220, m=0xfc10ff1c, 
    count=0x2, reqpage=0x0) at vnode_if.h:1067
#7  0xf01c2587 in vm_pager_get_pages (object=0xfc3f0220, m=0xfc10ff1c, 
    count=0x2, reqpage=0x0) at ../../vm/vm_pager.c:256
#8  0xf01b6f34 in vm_fault (map=0xfc073380, vaddr=0x22897000, fault_type=0x1, 
    fault_flags=0x0) at ../../vm/vm_fault.c:424
#9  0xf01daca2 in trap_pfault (frame=0xfc10ffbc, usermode=0x1)
    at ../../i386/i386/trap.c:753
#10 0xf01da7e3 in trap (frame={tf_es = 0xefbf0027, tf_ds = 0xfc100027, 
      tf_edi = 0x1, tf_esi = 0x17fea, tf_ebp = 0xefbfd58c, 
      tf_isp = 0xfc10ffe4, tf_ebx = 0x18000, tf_edx = 0x2287f000, 
      tf_ecx = 0x0, tf_eax = 0x9cf7f, tf_trapno = 0xc, tf_err = 0x4, 
      tf_eip = 0x414c, tf_cs = 0x1f, tf_eflags = 0x10297, tf_esp = 0xefbfd520, 
      tf_ss = 0x27}) at ../../i386/i386/trap.c:317
#11 0x414c in ?? ()
#12 0x276e in ?? ()
#13 0x1ee1 in ?? ()
#14 0x1809 in ?? ()
#15 0x107e in ?? ()

(kgdb) frame 2
#2  0xf01343dd in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0, 
    slptimeo=0x0) at ../../kern/vfs_bio.c:1437
1437                            if (!tsleep(bp,
(kgdb) print bp
$11 = (struct buf *) 0xf6e400b0
(kgdb) print *bp
$12 = {
  b_hash = {
    le_next = 0x0, 
    le_prev = 0xf6e2f0f8
  }, 
  b_vnbufs = {
    le_next = 0xf6e4f258, 
    le_prev = 0xfc24d1b0
  }, 
  b_freelist = {
    tqe_next = 0xf6d77f08, 
    tqe_prev = 0xf0202158
  }, 
  b_act = {
    tqe_next = 0x0, 
    tqe_prev = 0xf1ca0e14
  }, 
  b_proc = 0x0, 
  b_flags = 0x20800030, 
  b_qindex = 0x0, 
  b_usecount = 0x6, 
  b_error = 0x0, 
  b_bufsize = 0x0, 
  b_bufsize = 0x0, 
  b_bcount = 0x0, 
  b_resid = 0x0, 
  b_dev = 0xffffffff, 
  b_data = 0xf95ae000 <Address 0xf95ae000 out of bounds>, 
  b_kvabase = 0xf95ae000 <Address 0xf95ae000 out of bounds>, 
  b_kvasize = 0x2000, 
  b_lblkno = 0xc, 
  b_blkno = 0xc, 
  b_offset = 0x0000000000018000, 
  b_iodone = 0, 
  b_iodone_chain = 0x0, 
  b_vp = 0xfc24d180, 
  b_dirtyoff = 0x0, 
  b_dirtyend = 0x0, 
  b_rcred = 0x0, 
  b_wcred = 0x0, 
  b_validoff = 0x0, 
  b_validend = 0x0, 
  b_pblkno = 0x9804d0, 
  b_saveaddr = 0x0, 
  b_savekva = 0x0, 
  b_driver1 = 0x0, 
  b_driver2 = 0x0, 
  b_spc = 0x0, 
  b_cluster = {
    cluster_head = {
      tqh_first = 0xf6d77f08, 
      tqh_last = 0xf6d850e8
    }, 
    cluster_entry = {
      tqe_next = 0xf6d77f08, 
      tqe_prev = 0xf6d850e8
    }
  }, 
  b_pages = {0x0 <repeats 32 times>}, 
  b_npages = 0x0, 
  b_dep = {
    lh_first = 0x0
  }
}
(kgdb) proc 280
(kgdb) back
#0  mi_switch () at ../../kern/kern_synch.c:661
#1  0xf0119fb1 in tsleep (ident=0xf0e19ba0, priority=0x4, 
    wmesg=0xf01346d2 "pgtblk", timo=0x0) at ../../kern/kern_synch.c:435
#2  0xf0134afa in allocbuf (bp=0xf6e400b0, size=0x2000)
    at ../../kern/vfs_bio.c:1799
#3  0xf0134612 in getblk (vp=0xfc24d180, blkno=0xc, size=0x2000, slpflag=0x0, 
    slptimeo=0x0) at ../../kern/vfs_bio.c:1557
#4  0xf0136a5e in cluster_read (vp=0xfc24d180, filesize=0x4034b2, lblkno=0xc, 
    size=0x2000, cred=0x0, totread=0xc000, seqcount=0x8, bpp=0xfc0f6d60)
    at ../../kern/vfs_cluster.c:235
#5  0xf01ac721 in ffs_read (ap=0xfc0f6e18) at ../../ufs/ufs/ufs_readwrite.c:168
#6  0xf01ad2bd in ffs_getpages (ap=0xfc0f6e70) at vnode_if.h:303
#7  0xf01c386a in vnode_pager_getpages (object=0xfc3f0220, m=0xfc0f6f1c, 
    count=0x2, reqpage=0x0) at vnode_if.h:1067
#8  0xf01c2587 in vm_pager_get_pages (object=0xfc3f0220, m=0xfc0f6f1c, 
    count=0x2, reqpage=0x0) at ../../vm/vm_pager.c:256
#9  0xf01b6f34 in vm_fault (map=0xfc0738c0, vaddr=0x22891000, fault_type=0x1, 
    fault_flags=0x0) at ../../vm/vm_fault.c:424
#10 0xf01daca2 in trap_pfault (frame=0xfc0f6fbc, usermode=0x1)
    at ../../i386/i386/trap.c:753
#11 0xf01da7e3 in trap (frame={tf_es = 0xefbf0027, tf_ds = 0xfc0f0027, 
      tf_edi = 0x1, tf_esi = 0x11fd1, tf_ebp = 0xefbfd58c, 
      tf_isp = 0xfc0f6fe4, tf_ebx = 0x12000, tf_edx = 0x2287f000, 
      tf_ecx = 0x0, tf_eax = 0x9cf7f, tf_trapno = 0xc, tf_err = 0x4, 
      tf_eip = 0x414c, tf_cs = 0x1f, tf_eflags = 0x10297, tf_esp = 0xefbfd520, 
      tf_ss = 0x27}) at ../../i386/i386/trap.c:317
#12 0x414c in ?? ()
#13 0x276e in ?? ()
#14 0x1ee1 in ?? ()
#15 0x1809 in ?? ()
#16 0x107e in ?? ()
(kgdb) 
(kgdb) print bp
$14 = (struct buf *) 0xf6e400b0         (this is the same bp)

    Matthew Dillon  Engineering, HiWay Technologies, Inc. & BEST Internet 
                    Communications
    <dillon@backplane.com> (Please include original email in any response)    

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?199808232346.QAA09537>