Date: Fri, 26 Feb 2016 12:52:25 +0000 From: bugzilla-noreply@freebsd.org To: freebsd-fs@FreeBSD.org Subject: [Bug 204764] Filesystem deadlock, process in vodead state Message-ID: <bug-204764-3630-3BuPvHr9ke@https.bugs.freebsd.org/bugzilla/> In-Reply-To: <bug-204764-3630@https.bugs.freebsd.org/bugzilla/> References: <bug-204764-3630@https.bugs.freebsd.org/bugzilla/>
next in thread | previous in thread | raw e-mail | index | archive | help
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=3D204764 johans <johan@300.nl> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |johan@300.nl --- Comment #15 from johans <johan@300.nl> --- We've just ran into the same bug on one of our machines. # procstat -kk 52827 PID TID COMM TDNAME KSTACK=20=20=20=20=20=20=20= =20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20 52827 100187 ruby21 - mi_switch+0xe1 thread_suspend_switch+0x170 thread_single+0x4e5 exit1+0xbe sigexit+0x925 postsig+0x286 ast+0x427 doreti_ast+0x1f=20 52827 100389 ruby21 - mi_switch+0xe1 sleepq_wait+0= x3a _sleep+0x287 vnode_create_vobject+0x100 zfs_freebsd_open+0xf5 VOP_OPEN_APV+= 0xa1 vn_open_vnode+0x234 vn_open_cred+0x33e kern_openat+0x26f amd64_syscall+0x357 Xfast_syscall+0xfb=20 (kgdb) print *object $10 =3D { lock =3D { lock_object =3D { lo_name =3D 0xffffffff80ff8196 "vm object",=20 lo_flags =3D 90374144,=20 lo_data =3D 0,=20 lo_witness =3D 0x0 },=20 rw_lock =3D 1 },=20 object_list =3D { tqe_next =3D 0xfffff80135aaa600,=20 tqe_prev =3D 0xfffff80135aaa420 },=20 shadow_head =3D { lh_first =3D 0x0 },=20 shadow_list =3D { le_next =3D 0x0,=20 le_prev =3D 0xfffff8019f85f030 },=20 memq =3D { tqh_first =3D 0x0,=20 tqh_last =3D 0xfffff80135aaa548 },=20 rtree =3D { rt_root =3D 0,=20 rt_flags =3D 0 '\0' },=20 size =3D 0,=20 generation =3D 1,=20 ref_count =3D 0,=20 shadow_count =3D 0,=20 memattr =3D 6 '\006',=20 type =3D 2 '\002',=20 flags =3D 24584,=20 pg_color =3D 1569,=20 paging_in_progress =3D 0,=20 resident_page_count =3D 0,=20 backing_object =3D 0x0,=20 backing_object_offset =3D 0,=20 pager_object_list =3D { tqe_next =3D 0x0,=20 tqe_prev =3D 0x0 },=20 rvq =3D { lh_first =3D 0x0 },=20 cache =3D { rt_root =3D 0,=20 rt_flags =3D 0 '\0' },=20 handle =3D 0xfffff8010bc08760,=20 un_pager =3D { vnp =3D { vnp_size =3D 0,=20 writemappings =3D 0 },=20 devp =3D { devp_pglist =3D { tqh_first =3D 0x0,=20 tqh_last =3D 0x0 },=20 ops =3D 0x0,=20 dev =3D 0x0 },=20 sgp =3D { sgp_pglist =3D { tqh_first =3D 0x0,=20 tqh_last =3D 0x0 } },=20 swp =3D { swp_tmpfs =3D 0x0,=20 swp_bcount =3D 0 } },=20 cred =3D 0x0,=20 charge =3D 0 } (kgdb) p *vp $11 =3D { v_tag =3D 0xffffffff81acefb6 "zfs",=20 v_op =3D 0xffffffff81ae12f0,=20 v_data =3D 0xfffff80031109450,=20 v_mount =3D 0xfffff8000801a330,=20 v_nmntvnodes =3D { tqe_next =3D 0xfffff801c912a1d8,=20 tqe_prev =3D 0xfffff8022bfa0780 },=20 v_un =3D { vu_mount =3D 0x0,=20 vu_socket =3D 0x0,=20 vu_cdev =3D 0x0,=20 vu_fifoinfo =3D 0x0 },=20 v_hashlist =3D { le_next =3D 0x0,=20 le_prev =3D 0x0 },=20 v_cache_src =3D { lh_first =3D 0x0 },=20 v_cache_dst =3D { tqh_first =3D 0xfffff801a260f4d0,=20 tqh_last =3D 0xfffff801a260f4f0 },=20 v_cache_dd =3D 0x0,=20 v_lock =3D { lock_object =3D { lo_name =3D 0xffffffff81acefb6 "zfs",=20 lo_flags =3D 117112832,=20 lo_data =3D 0,=20 lo_witness =3D 0x0 },=20 lk_lock =3D 1,=20 lk_exslpfail =3D 0,=20 lk_timo =3D 51,=20 lk_pri =3D 96 },=20 v_interlock =3D { lock_object =3D { lo_name =3D 0xffffffff80fd2f2d "vnode interlock",=20 lo_flags =3D 16973824,=20 lo_data =3D 0,=20 lo_witness =3D 0x0 },=20 mtx_lock =3D 4 },=20 v_vnlock =3D 0xfffff8010bc087c8,=20 v_actfreelist =3D { tqe_next =3D 0xfffff80079317000,=20 tqe_prev =3D 0xfffff80092a9e820 },=20 v_bufobj =3D { bo_lock =3D { lock_object =3D { lo_name =3D 0xffffffff80fd2f3d "bufobj interlock",=20 lo_flags =3D 86179840,=20 lo_data =3D 0,=20 lo_witness =3D 0x0 },=20 rw_lock =3D 1 },=20 bo_ops =3D 0xffffffff8149bff0,=20 bo_object =3D 0xfffff80135aaa500,=20 bo_synclist =3D { le_next =3D 0x0,=20 le_prev =3D 0x0 },=20 bo_private =3D 0xfffff8010bc08760,=20 __bo_vnode =3D 0xfffff8010bc08760,=20 bo_clean =3D { bv_hd =3D { tqh_first =3D 0x0,=20 tqh_last =3D 0xfffff8010bc08880 },=20 bv_root =3D { pt_root =3D 0 },=20 bv_cnt =3D 0 },=20 bo_dirty =3D { bv_hd =3D { tqh_first =3D 0x0,=20 tqh_last =3D 0xfffff8010bc088a0 },=20 bv_root =3D { pt_root =3D 0 },=20 bv_cnt =3D 0 },=20 bo_numoutput =3D 0,=20 bo_flag =3D 0,=20 bo_bsize =3D 131072 },=20 v_pollinfo =3D 0x0,=20 v_label =3D 0x0,=20 v_lockf =3D 0x0,=20 v_rl =3D { rl_waiters =3D { tqh_first =3D 0x0,=20 tqh_last =3D 0xfffff8010bc088e8 },=20 rl_currdep =3D 0x0 },=20 v_cstart =3D 0,=20 v_lasta =3D 0,=20 v_lastw =3D 0,=20 v_clen =3D 0,=20 v_holdcnt =3D 2,=20 v_usecount =3D 2,=20 v_iflag =3D 512,=20 v_vflag =3D 0,=20 v_writecount =3D 0,=20 v_hash =3D 17547399,=20 v_type =3D VREG } (kgdb) print vp->v_cache_dst->tqh_first->nc_name $14 =3D 0xfffff801a260f512 "puppet20160226-52827-x6ea8y" The file where things go wrong for us is: /tmp/puppet20160226-52827-x6ea8y Performing a cat on this file also hangs on vodead. Any relevant waiting channels: # ps -o lwp -laxwwwSH | awk '{ if ($10 !~ "^(-|ttyin|lockf|wait|select|kqread|tx\->)") print; }'=20 LWP UID PID PPID CPU PRI NI VSZ RSS MWCHAN STAT TT=20=20= =20=20=20=20=20=20 TIME COMMAND 100000 0 0 0 0 -16 0 0 285088 swapin DLs -=20=20= =20=20=20 0:41.09 [kernel/swapper] 100048 0 0 0 0 -92 0 0 285088 vtbslp DLs -=20=20= =20=20=20 0:00.00 [kernel/virtio_ballo] 100018 0 2 0 0 -16 0 0 16 crypto_w DL -=20=20= =20=20=20 0:00.00 [crypto] 100019 0 3 0 0 -16 0 0 16 crypto_r DL -=20=20= =20=20=20 0:00.00 [crypto returns] 100057 0 5 0 0 20 0 0 6176 arc_recl DL -=20=20= =20=20 13:07.13 [zfskern/arc_reclaim] 100058 0 5 0 0 -8 0 0 6176 l2arc_fe DL -=20=20= =20=20=20 0:40.44 [zfskern/l2arc_feed_] 102486 0 5 0 0 -8 0 0 6176 zio->io_ DL -=20=20= =20=20=20 8:24.66 [zfskern/txg_thread_] 100062 0 7 0 0 -16 0 0 32 psleep DL -=20=20= =20=20 17:00.73 [pagedaemon/pagedaem] 100068 0 7 0 0 -16 0 0 32 umarcl DL -=20=20= =20=20=20 0:00.00 [pagedaemon/uma] 100063 0 8 0 0 -16 0 0 16 psleep DL -=20=20= =20=20=20 0:00.00 [vmdaemon] 100064 0 9 0 0 155 0 0 16 pgzero DL -=20=20= =20=20=20 0:00.17 [pagezero] 100001 0 10 0 0 -16 0 0 16 audit_wo DL -=20=20= =20=20 14:36.35 [audit] 100065 0 16 0 0 -16 0 0 16 psleep DL -=20=20= =20=20=20 0:51.50 [bufdaemon] 100066 0 17 0 0 -16 0 0 16 vlruwt DL -=20=20= =20=20=20 1:27.70 [vnlru] 100067 0 18 0 0 16 0 0 16 syncer DL -=20=20= =20=20 63:11.82 [syncer] 100406 65534 921 1 0 52 0 71104 33604 uwait Is -=20=20= =20=20=20 0:00.00 /usr/local/bin/memcached -l 127.0.0.1 -m 1024 -p 11211 -C -d -u nob= ody -P /var/run/memcached/memcached.pid 100387 0 958 1 0 23 0 16612 2224 nanslp Is -=20=20= =20=20 97:24.14 /usr/sbin/cron -s 100394 0 1194 0 0 -16 0 0 16 pftm DL -=20=20= =20=20 30:48.46 [pf purge] 100388 0 1201 1 0 52 0 14700 2256 sbwait Is -=20=20= =20=20=20 0:00.00 pflogd: [priv] (pflogd) 100375 64 1202 1201 0 20 0 14700 2312 bpf S -=20=20= =20=20=20 5:36.59 pflogd: [running] -s 116 -i pflog0 -f /dev/null (pflogd) 100389 0 52827 1 0 52 0 213452 112968 vodead T -=20=20= =20=20=20 0:08.69 ruby21: puppet agent: applying configuration (ruby21) 100400 0 55604 0 0 -16 0 0 16 ftcl DL -=20=20= =20=20=20 0:00.00 [ftcleanup] 101076 0 58064 1 0 20 0 16588 2168 auditd Is -=20=20= =20=20=20 0:07.02 /usr/sbin/auditd 101349 0 37651 37601 0 20 0 12356 2132 vodead D+ 0-=20= =20=20=20 0:00.00 cat puppet20160226-52827-x6ea8y This is the first time we're seeing this. --=20 You are receiving this mail because: You are the assignee for the bug.=
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bug-204764-3630-3BuPvHr9ke>