Date: Fri, 12 May 2017 18:59:39 +0000 From: bugzilla-noreply@freebsd.org To: freebsd-bugs@FreeBSD.org Subject: [Bug 219224] sync hanging in _cv_wait zil_commit zfs_sync sys_sync Message-ID: <bug-219224-8-lg5EP8aqnz@https.bugs.freebsd.org/bugzilla/> In-Reply-To: <bug-219224-8@https.bugs.freebsd.org/bugzilla/> References: <bug-219224-8@https.bugs.freebsd.org/bugzilla/>
next in thread | previous in thread | raw e-mail | index | archive | help
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=3D219224 --- Comment #2 from Tino Reinhardt <t@sdf.org> --- Created attachment 182550 --> https://bugs.freebsd.org/bugzilla/attachment.cgi?id=3D182550&action= =3Dedit "procstat -kk -a" output I'm now on tools-2-b# uname -a FreeBSD tools-2-b 11.0-RELEASE-p10 FreeBSD 11.0-RELEASE-p10 #0 r318236: Fri= May 12 13:52:37 CEST 2017 root@tools-2-b:/usr/obj/usr/src/sys/DEBUG amd64 This is GENERIC plus: options INVARIANTS options INVARIANT_SUPPORT options WITNESS options WITNESS_SKIPSPIN options DEBUG_LOCKS options DEBUG_VFS_LOCKS options DIAGNOSTIC While running sync(2) in a loop, cf-agent hangs quickly: tools-2-b# ktrace cf-agent load: 20.62 cmd: cf-agent 5360 [zilog->zl_cv_batch[0]] 162.96r 0.01u 0.06s= 0% 8740k tools-2-b# kdump -f ktrace.out | tail 5360 cf-agent CALL close(0x5) 5360 cf-agent RET close 0 5360 cf-agent CALL munmap(0x8006b3000,0x10) 5360 cf-agent RET munmap 0 5360 cf-agent CALL munmap(0x8006b4000,0x10) 5360 cf-agent RET munmap 0 5360 cf-agent CALL fcntl(0x4,F_SETLK,0xffffd450) 5360 cf-agent RET fcntl 0 5360 cf-agent CALL unlink(0x7fffffffd080) 5360 cf-agent NAMI "/tmp/SEMDMDBrvXw+F@)aP1" tools-2-b# procstat -kk 5360 PID TID COMM TDNAME KSTACK 5360 102508 cf-agent - mi_switch+0x167 sleepq_switch+0xe7 sleepq_wait+0x43 _cv_wait+0x1e4 zil_commit+0x85 zfs_freebsd_putpages+0x7d6 VOP_PUTPAGES_APV+0x104 vnode_pager_putpages+0x89 vm_pageout_flush+0x12d vm_object_page_collect_flush+0x23a vm_object_page_clean+0x1be vm_object_terminate+0xa9 vnode_destroy_vobject+0= x79 zfs_freebsd_reclaim+0x63 VOP_RECLAIM_APV+0x104 vgonel+0x2bd vrecycle+0x4a zfs_freebsd_inactive+0xd tools-2-b# ps ax | grep sync 27 - DL 0:00.09 [syncer] 15560 0 S+ 0:00.00 grep sync 5177 1 RN 7:16.42 ./syncloop tools-2-b# kill 5177 tools-2-b# kill -9 5177 tools-2-b# procstat -kk 5177 PID TID COMM TDNAME KSTACK=20=20=20=20=20=20=20= =20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20 5177 102515 syncloop - zfs_zget+0x20f zfs_get_data+= 0x7c zil_commit+0x926 zfs_sync+0xad sys_sync+0xe6 amd64_syscall+0x2f6 Xfast_syscall+0xfb=20 tools-2-b# procstat -kk 5177 PID TID COMM TDNAME KSTACK=20=20=20=20=20=20=20= =20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20 5177 102515 syncloop - zfs_zget+0x1b4 zfs_get_data+= 0x7c zil_commit+0x926 zfs_sync+0xad sys_sync+0xe6 amd64_syscall+0x2f6 Xfast_syscall+0xfb=20 tools-2-b# procstat -kk 5177 PID TID COMM TDNAME KSTACK=20=20=20=20=20=20=20= =20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20 5177 102515 syncloop - _sx_slock+0x73 dnode_verify+= 0xe2 dnode_hold_impl+0x53e dmu_bonus_hold+0x25 zfs_zget+0xcf zfs_get_data+0x7c zil_commit+0x926 zfs_sync+0xad sys_sync+0xe6 amd64_syscall+0x2f6 Xfast_syscall+0xfb=20 tools-2-b# procstat -kk 5177 PID TID COMM TDNAME KSTACK=20=20=20=20=20=20=20= =20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20 5177 102515 syncloop - dnode_hold_impl+0x53e dmu_bonus_hold+0x25 zfs_zget+0xcf zfs_get_data+0x7c zil_commit+0x926 zfs_sync+0xad sys_sync+0xe6 amd64_syscall+0x2f6 Xfast_syscall+0xfb=20 tools-2-b# procstat -kk -a > procstat-kk-a.txt tools-2-b# kgdb -q /usr/lib/debug/boot/kernel/kernel.debug /dev/mem Reading symbols from /usr/lib/debug/boot/kernel/zfs.ko.debug...done. Loaded symbols for /usr/lib/debug/boot/kernel/zfs.ko.debug Reading symbols from /usr/lib/debug/boot/kernel/opensolaris.ko.debug...done. Loaded symbols for /usr/lib/debug/boot/kernel/opensolaris.ko.debug Reading symbols from /usr/lib/debug/boot/kernel/carp.ko.debug...done. Loaded symbols for /usr/lib/debug/boot/kernel/carp.ko.debug Reading symbols from /usr/lib/debug/boot/kernel/if_lagg.ko.debug...done. Loaded symbols for /usr/lib/debug/boot/kernel/if_lagg.ko.debug Reading symbols from /usr/lib/debug/boot/kernel/ums.ko.debug...done. Loaded symbols for /usr/lib/debug/boot/kernel/ums.ko.debug Reading symbols from /usr/lib/debug/boot/kernel/pf.ko.debug...done. Loaded symbols for /usr/lib/debug/boot/kernel/pf.ko.debug #0 sched_switch (td=3D0xfffff801b1245500, newtd=3D0xfffff80115531000,=20 flags=3D<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1973 1973 cpuid =3D PCPU_GET(cpuid); (kgdb) tid 102508 [Switching to thread 1672 (Thread 102508)]#0 sched_switch ( td=3D0xfffff8055a385000, newtd=3D0xfffff80115533000,=20 flags=3D<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1973 1973 cpuid =3D PCPU_GET(cpuid); Current language: auto; currently minimal #0 sched_switch (td=3D0xfffff8055a385000, newtd=3D0xfffff80115533000,=20 flags=3D<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1973 #1 0xffffffff80a497d7 in mi_switch (flags=3D260, newtd=3D0x0) at /usr/src/sys/kern/kern_synch.c:455 #2 0xffffffff80a8dac7 in sleepq_switch (wchan=3D<value optimized out>, pri= =3D0) at /usr/src/sys/kern/subr_sleepqueue.c:566 #3 0xffffffff80a8d993 in sleepq_wait (wchan=3D0xfffff801b112f8f8, pri=3D0) at /usr/src/sys/kern/subr_sleepqueue.c:646 #4 0xffffffff809e11b4 in _cv_wait (cvp=3D<value optimized out>,=20 lock=3D<value optimized out>) at /usr/src/sys/kern/kern_condvar.c:144 #5 0xffffffff82238545 in zil_commit (zilog=3D<value optimized out>,=20 foid=3D<value optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c:1574 #6 0xffffffff8226c746 in zfs_freebsd_putpages (ap=3D<value optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:= 4767 #7 0xffffffff81005e74 in VOP_PUTPAGES_APV (vop=3D<value optimized out>,=20 a=3D<value optimized out>) at vnode_if.c:2930 #8 0xffffffff80d3a349 in vnode_pager_putpages (object=3D<value optimized o= ut>,=20 m=3D0xfffffe201e33a3f0, count=3D<value optimized out>, flags=3D0,=20 rtvals=3D0xfffffe201e33a360) at vnode_if.h:1224 #9 0xffffffff80d2f67d in vm_pageout_flush (mc=3D<value optimized out>,=20 count=3D<value optimized out>, flags=3D1, mreq=3D0, prunlen=3D0xfffffe2= 01e33a4ac,=20 eio=3D0xfffffe201e33a530) at vm_pager.h:126 #10 0xffffffff80d26fca in vm_object_page_collect_flush ( object=3D<value optimized out>, p=3D<value optimized out>, pagerflags= =3D1,=20 flags=3D0, clearobjflags=3D<value optimized out>, eio=3D0xfffffe201e33a= 530) at /usr/src/sys/vm/vm_object.c:979 #11 0xffffffff80d26cbe in vm_object_page_clean (object=3D<value optimized o= ut>,=20 start=3D<value optimized out>, end=3D<value optimized out>,=20 flags=3D<value optimized out>) at /usr/src/sys/vm/vm_object.c:905 #12 0xffffffff80d26829 in vm_object_terminate (object=3D<value optimized ou= t>) at /usr/src/sys/vm/vm_object.c:739 #13 0xffffffff80d3a869 in vnode_destroy_vobject (vp=3D0xfffff804e7292000) at /usr/src/sys/vm/vnode_pager.c:173 #14 0xffffffff822761e3 in zfs_freebsd_reclaim (ap=3D<value optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:= 5367 #15 0xffffffff810047a4 in VOP_RECLAIM_APV (vop=3D<value optimized out>,=20 a=3D<value optimized out>) at vnode_if.c:2021 #16 0xffffffff80afdf0d in vgonel (vp=3D<value optimized out>) at vnode_if.h= :830 #17 0xffffffff80afe45a in vrecycle (vp=3D0xfffff804e7292000) at /usr/src/sys/kern/vfs_subr.c:3087 #18 0xffffffff8227616d in zfs_freebsd_inactive (ap=3D<value optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:= 5349 #19 0xffffffff810045f4 in VOP_INACTIVE_APV (vop=3D<value optimized out>,=20 a=3D<value optimized out>) at vnode_if.c:1955 #20 0xffffffff80afca9b in vinactive (vp=3D0xfffff804e7292000,=20 td=3D0xfffff8055a385000) at vnode_if.h:807 #21 0xffffffff80afd2d8 in vputx (vp=3D<value optimized out>,=20 func=3D<value optimized out>) at /usr/src/sys/kern/vfs_subr.c:2682 #22 0xffffffff80b05ae2 in kern_unlinkat (td=3D<value optimized out>, fd=3D-= 100,=20 path=3D0x7fffffffd080 <Error reading address 0x7fffffffd080: Bad addres= s>,=20 pathseg=3DUIO_USERSPACE, oldinum=3D<value optimized out>) at /usr/src/sys/kern/vfs_syscalls.c:1774 #23 0xffffffff80ea4426 in amd64_syscall (td=3D0xfffff8055a385000,=20 traced=3D<value optimized out>) at subr_syscall.c:135 #24 0xffffffff80e8497b in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:396 #25 0x0000000802a72f7a in ?? () Previous frame inner to this frame (corrupt stack?) (kgdb) frame 4 #4 0xffffffff809e11b4 in _cv_wait (cvp=3D<value optimized out>,=20 lock=3D<value optimized out>) at /usr/src/sys/kern/kern_condvar.c:144 144 sleepq_wait(cvp, 0); (kgdb) p *cvp $1 =3D {cv_description =3D 0xffffffff81c81848 "I.=3D\201????",=20 cv_waiters =3D -411880152} (kgdb) frame 5 #5 0xffffffff82238545 in zil_commit (zilog=3D<value optimized out>,=20 foid=3D<value optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c:1574 1574 cv_wait(&zilog->zl_cv_batch[mybatch & 1], &zilog->zl_lock); (kgdb) p *zilog $2 =3D {zl_lock =3D {lock_object =3D {lo_name =3D 0xffffffff81c81848 "I.=3D= \201????",=20 lo_flags =3D 3883087144, lo_data =3D 4294965252, lo_witness =3D 0x0},= =20 sx_lock =3D 18446735298679485752}, zl_dmu_pool =3D 0x0,=20 zl_spa =3D 0xffffffff81bb7cc8, zl_header =3D 0x0, zl_os =3D 0xfffff804e76= 65e80,=20 zl_get_data =3D 0, zl_root_zio =3D 0x0, zl_lr_seq =3D 0,=20 zl_commit_lr_seq =3D 18446741874700784480,=20 zl_destroy_txg =3D 18446735282269118464, zl_replayed_seq =3D 0xfffff8055a= 385068,=20 zl_replaying_seq =3D 18446735284892366080, zl_suspend =3D 0, zl_cv_writer= =3D { cv_description =3D 0x1906cffffffff <Error reading address 0x1906cffffff= ff: Bad address>, cv_waiters =3D 0}, zl_cv_suspend =3D {cv_description =3D 0x0,= =20 cv_waiters =3D 0}, zl_suspending =3D 0 '\0', zl_keep_first =3D 0 '\0',= =20 zl_replay =3D 0 '\0', zl_stop_sync =3D 0 '\0', zl_writer =3D 0 '\0',=20 zl_logbias =3D 0 '\0', zl_sync =3D 0 '\0', zl_parse_error =3D 0,=20 zl_parse_blk_seq =3D 18446735300605006016,=20 zl_parse_lr_seq =3D 18446735298679485736, zl_parse_blk_count =3D 1,=20 zl_parse_lr_count =3D 17179869439, zl_next_batch =3D 2, zl_com_batch =3D = 0,=20 zl_cv_batch =3D 0xfffff8055a3850f8, zl_itxg =3D 0xfffff8055a385118,=20 zl_itx_commit_list =3D {list_size =3D 206226633, list_offset =3D 2, list_= head =3D { list_next =3D 0x8, list_prev =3D 0x0}}, zl_itx_list_sz =3D 15863,=20 zl_cur_used =3D 63452, zl_lwb_list =3D {list_size =3D 79316, list_offset = =3D 0,=20 list_head =3D {list_next =3D 0xc4ac4c9, list_prev =3D 0x0}}, zl_vdev_lo= ck =3D { lock_object =3D {lo_name =3D 0x0, lo_flags =3D 0, lo_data =3D 0,=20 lo_witness =3D 0x0}, sx_lock =3D 270582939648}, zl_vdev_tree =3D { avl_root =3D 0x0, avl_compar =3D 0, avl_offset =3D 4, avl_numnodes =3D = 0,=20 avl_size =3D 0}, zl_clean_taskq =3D 0x612d666300000000, zl_bp_tree =3D { avl_root =3D 0x746e6567, avl_compar =3D 0, avl_offset =3D 0, avl_numnod= es =3D 0,=20 avl_size =3D 0}, zl_replay_time =3D 0, zl_replay_blks =3D 0, zl_old_hea= der =3D { zh_claim_txg =3D 0, zh_replay_seq =3D 0, zh_log =3D { blk_dva =3D 0xfffff8055a3852e0, blk_prop =3D 0,=20 blk_pad =3D 0xfffff8055a385318, blk_phys_birth =3D 0, blk_birth =3D 2= ,=20 blk_fill =3D 18446735284884707568, blk_cksum =3D { zc_word =3D 0xfffff8055a385340}}, zh_claim_blk_seq =3D 0, zh_flags = =3D 0,=20 zh_claim_lr_seq =3D 0, zh_pad =3D 0xfffff8055a385378},=20 zl_prev_blks =3D 0xfffff8055a385390, zl_prev_rotor =3D 0, zl_dirty_link = =3D { tn_next =3D 0xfffff8055a3853d8,=20 tn_member =3D 0xfffff8055a3853f8 "@=D6=A8\200????"}} (kgdb) --=20 You are receiving this mail because: You are the assignee for the bug.=
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bug-219224-8-lg5EP8aqnz>