Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 25 Nov 2002 17:50:17 -0800 (PST)
From:      Nate Lawson <nate@root.org>
To:        current@freebsd.org
Cc:        rwatson@freebsd.org
Subject:   More info on blocked procs with make -j3 buildworld
Message-ID:  <Pine.BSF.4.21.0211251701010.84019-100000@root.org>

next in thread | raw e-mail | index | archive | help
More info.  Someone should bite one of these times.  So far I've got no
takers so if interested, please help.

I've got a box that was operational but no more procs (can't log in).  It
looks like the nightly 'make -j3 buildworld' hung it.  I left it in ddb
with hopes someone will ask the right questions.

There are about 100 cron processes sleeping in "ppwait" and "thrd_sleep".
Their traces are all the same:

cron 1
----
mi_switch(c24d31a0,44,c037b496,cc,c03fc118) at mi_switch+0x1e1
msleep(c0b740b4,c03f7928,44,c038c5d9,0) at msleep+0x453
acquire(c0b740b4,1000000,600,e3,10595) at acquire+0xa7
lockmgr(c0b740b4,2,0,c24d31a0,246) at lockmgr+0x378
_vm_map_lock(c0b74078,c038c47c,1d0,c02207e0,c24d1ca8) at _vm_map_lock+0x5b
kmem_alloc_wait(c0b74078,11000,c03784b8,cb,0) at kmem_alloc_wait+0x38
kern_execve(c24d31a0,80590c6,bfbff6ec,8052160,0) at kern_execve+0x1de
execve(c24d31a0,cdc67d10,c039575a,409,3) at execve+0x30
syscall(2f,2f,2f,805213a,bfbff6ec) at syscall+0x28e

cron 2
----
mi_switch(c24d3270,5c,c037b496,cc,c24d4a20) at mi_switch+0x1e1
msleep(c24d1a80,c24d1928,5c,c0378708,0) at msleep+0x453
fork1(c24d3270,80000034,0,cdc6acd4,c24d1ae8) at fork1+0x11bb
vfork(c24d3270,cdc6ad10,c039575a,409,0) at vfork+0x52
syscall(2f,2f,2f,0,8052040) at syscall+0x28e

There are a few procs that look interesting that are also blocked.  I've
shortened their pids for easy reference.

Interesting Procs
----
1 cc	wait
2 cc	wait
3 cc	wait
4 cc1	ufs
5 cc	thrd_sleep
6 cc1	thrd_sleep
7 as	ufs
8 cc	ufs
9 as	thrd_sleep

4, 7 (8 very similar)
mi_switch(c1a2d410,50,c037b496,cc,1) at mi_switch+0x1e1
msleep(c226168c,c03f8010,50,c038b03e,0) at msleep+0x453
acquire(c226168c,1000040,700,101,104d2) at acquire+0xa7
lockmgr(c226168c,1030041,c22615c8,c1a2d410,cd2cab34) at lockmgr+0x16d
vop_stdlock(cd2cab50,cd2cab70,c028902b,cd2cab50,c0382bd2) at vop_stdlock+0x2c
### 281         return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
### 282             ap->a_td, "vop_stdlock", vp->filename, vp->line));
ufs_vnoperate(cd2cab50,c0382bd2,815,cd2cab70,c0220731) at ufs_vnoperate+0x18
vn_lock(c22615c8,1030041,c1a2d410,815,18b) at vn_lock+0xeb
vget(c22615c8,1020041,c1a2d410,432,18b) at vget+0xd6
vnode_pager_lock(c1f5e5b0,8230000,1,cd2cac60,cd2cac50) at vnode_pager_lock+0x95
vm_fault(c19c4b28,8230000,1,0,c1a2d410) at vm_fault+0x2cb
trap_pfault(cd2cad48,1,8230ab0,299,8230ab0) at trap_pfault+0xef
trap(2f,2f,2f,bfbff664,2d) at trap+0x287
calltrap() at calltrap+0x5

5
mi_switch(c1926a90,44,c037b496,cc,251) at mi_switch+0x1e1
msleep(c0b740b4,c03f7928,44,c038c5d9,0) at msleep+0x453
acquire(c0b740b4,1000000,600,e3,104d3) at acquire+0xa7
lockmgr(c0b740b4,2,0,c1926a90,c1926a90) at lockmgr+0x378
_vm_map_lock_read(c0b74078,c038c63c,a72,1380116,0) at _vm_map_lock_read+0x5b
### 366     error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
vm_map_lookup(cce35880,c7f50000,1,cce35884,cce35874) at vm_map_lookup+0x38
vm_fault(c0832000,c7f50000,1,0,c1926a90) at vm_fault+0xa5
trap_pfault(cce3596c,0,c7f50000,1000000,c7f50000) at trap_pfault+0x161
trap(18,c1920010,10,80c7000,c7f50000) at trap+0x41d
calltrap() at calltrap+0x5
--- trap 0xc, eip = 0xc033c5ee, esp = 0xcce359ac, ebp = 0xcce359fc ---
generic_copyout(c1a2ca80,c1e6572c,c220d128,c1f5ef08,7d2e0) at generic_copyout+0x36
exec_elf32_imgact(cce35b88,0,c03784b8,101,ffffffff) at exec_elf32_imgact+0x297
kern_execve(c1926a90,806b340,80734b8,8070380,0) at kern_execve+0x3f2
execve(c1926a90,cce35d10,c039575a,409,3) at execve+0x30
syscall(2f,2f,2f,0,1) at syscall+0x28e
Xint0x80_syscall() at Xint0x80_syscall+0x1d

6, 9
mi_switch(c1926750,44,c037b496,cc,c03fc208) at mi_switch+0x1e1
msleep(c0b740b4,c03f7928,44,c038c5d9,0) at msleep+0x453
acquire(c0b740b4,1000000,600,e3,104d4) at acquire+0xa7
lockmgr(c0b740b4,2,0,c1926750,c1926750) at lockmgr+0x378
_vm_map_lock(c0b74078,c038c47c,1ed,c1b95800,c1926750) at _vm_map_lock+0x5b
### 366     error = lockmgr(&map->lock, LK_EXCLUSIVE, NULL, curthread);
kmem_free_wakeup(c0b74078,c7f0a000,11000,0,0) at kmem_free_wakeup+0x2a
kern_execve(c1926750,806b300,8073400,8070300,0) at kern_execve+0xd57
execve(c1926750,cce29d10,c039575a,409,3) at execve+0x30
syscall(2f,2f,2f,0,0) at syscall+0x28e
Xint0x80_syscall() at Xint0x80_syscall+0x1d

8 (same as 4, 7 above the copyout trap)
mi_switch(c1b5f340,50,c037b496,cc,c04026a0) at mi_switch+0x1e1
msleep(c226168c,c03f8010,50,c038b03e,0) at msleep+0x453
acquire(c226168c,1000040,700,101,104d6) at acquire+0xa7
lockmgr(c226168c,1030041,c22615c8,c1b5f340,cd38c758) at lockmgr+0x16d
vop_stdlock(cd38c774,cd38c794,c028902b,cd38c774,c0382bd2) at vop_stdlock+0x2c
### 281         return (debuglockmgr(vp->v_vnlock, ap->a_flags, VI_MTX(vp),
### 282             ap->a_td, "vop_stdlock", vp->filename, vp->line));
ufs_vnoperate(cd38c774,c0382bd2,815,cd38c794,c0220731) at ufs_vnoperate+0x18
vn_lock(c22615c8,1030041,c1b5f340,815,18b) at vn_lock+0xeb
vget(c22615c8,1020041,c1b5f340,432,18b) at vget+0xd6
vnode_pager_lock(c1f5e5b0,c7f3e000,1,cd38c884,cd38c874) at vnode_pager_lock+0x95
vm_fault(c0832000,c7f3e000,1,0,c1b5f340) at vm_fault+0x2cb
trap_pfault(cd38c96c,0,c7f3e000,1000000,c7f3e000) at trap_pfault+0x161
trap(18,c1b50010,10,82de000,c7f3e000) at trap+0x41d
calltrap() at calltrap+0x5
--- trap 0xc, eip = 0xc033c5ee, esp = 0xcd38c9ac, ebp = 0xcd38c9fc ---
generic_copyout(c1a21a80,c1b98bf4,c22615c8,c1f5e5b0,294000) at generic_copyout+0x36
exec_elf32_imgact(cd38cb88,0,c03784b8,101,ffffffff) at exec_elf32_imgact+0x297
kern_execve(c1b5f340,806b300,8073400,8070300,0) at kern_execve+0x3f2
execve(c1b5f340,cd38cd10,c039575a,409,3) at execve+0x30
syscall(2f,2f,2f,0,1) at syscall+0x28e
Xint0x80_syscall() at Xint0x80_syscall+0x1d

----

I'm guessing that 5 is blocking 6, 9.  It seems that 4, 7, 8 are all fine and
waiting for the vnode correctly.  But I don't know this code area or theory
so someone please reply.

Thanks,
Nate


To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-current" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?Pine.BSF.4.21.0211251701010.84019-100000>