Date: Fri, 26 Feb 2016 12:52:25 +0000 From: bugzilla-noreply@freebsd.org To: freebsd-fs@FreeBSD.org Subject: [Bug 204764] Filesystem deadlock, process in vodead state Message-ID: <bug-204764-3630-3BuPvHr9ke@https.bugs.freebsd.org/bugzilla/> In-Reply-To: <bug-204764-3630@https.bugs.freebsd.org/bugzilla/> References: <bug-204764-3630@https.bugs.freebsd.org/bugzilla/>
index | next in thread | previous in thread | raw e-mail
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=204764 johans <johan@300.nl> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |johan@300.nl --- Comment #15 from johans <johan@300.nl> --- We've just ran into the same bug on one of our machines. # procstat -kk 52827 PID TID COMM TDNAME KSTACK 52827 100187 ruby21 - mi_switch+0xe1 thread_suspend_switch+0x170 thread_single+0x4e5 exit1+0xbe sigexit+0x925 postsig+0x286 ast+0x427 doreti_ast+0x1f 52827 100389 ruby21 - mi_switch+0xe1 sleepq_wait+0x3a _sleep+0x287 vnode_create_vobject+0x100 zfs_freebsd_open+0xf5 VOP_OPEN_APV+0xa1 vn_open_vnode+0x234 vn_open_cred+0x33e kern_openat+0x26f amd64_syscall+0x357 Xfast_syscall+0xfb (kgdb) print *object $10 = { lock = { lock_object = { lo_name = 0xffffffff80ff8196 "vm object", lo_flags = 90374144, lo_data = 0, lo_witness = 0x0 }, rw_lock = 1 }, object_list = { tqe_next = 0xfffff80135aaa600, tqe_prev = 0xfffff80135aaa420 }, shadow_head = { lh_first = 0x0 }, shadow_list = { le_next = 0x0, le_prev = 0xfffff8019f85f030 }, memq = { tqh_first = 0x0, tqh_last = 0xfffff80135aaa548 }, rtree = { rt_root = 0, rt_flags = 0 '\0' }, size = 0, generation = 1, ref_count = 0, shadow_count = 0, memattr = 6 '\006', type = 2 '\002', flags = 24584, pg_color = 1569, paging_in_progress = 0, resident_page_count = 0, backing_object = 0x0, backing_object_offset = 0, pager_object_list = { tqe_next = 0x0, tqe_prev = 0x0 }, rvq = { lh_first = 0x0 }, cache = { rt_root = 0, rt_flags = 0 '\0' }, handle = 0xfffff8010bc08760, un_pager = { vnp = { vnp_size = 0, writemappings = 0 }, devp = { devp_pglist = { tqh_first = 0x0, tqh_last = 0x0 }, ops = 0x0, dev = 0x0 }, sgp = { sgp_pglist = { tqh_first = 0x0, tqh_last = 0x0 } }, swp = { swp_tmpfs = 0x0, swp_bcount = 0 } }, cred = 0x0, charge = 0 } (kgdb) p *vp $11 = { v_tag = 0xffffffff81acefb6 "zfs", v_op = 0xffffffff81ae12f0, v_data = 0xfffff80031109450, v_mount = 0xfffff8000801a330, v_nmntvnodes = { tqe_next = 0xfffff801c912a1d8, tqe_prev = 0xfffff8022bfa0780 }, v_un = { vu_mount = 0x0, vu_socket = 0x0, vu_cdev = 0x0, vu_fifoinfo = 0x0 }, v_hashlist = { le_next = 0x0, le_prev = 0x0 }, v_cache_src = { lh_first = 0x0 }, v_cache_dst = { tqh_first = 0xfffff801a260f4d0, tqh_last = 0xfffff801a260f4f0 }, v_cache_dd = 0x0, v_lock = { lock_object = { lo_name = 0xffffffff81acefb6 "zfs", lo_flags = 117112832, lo_data = 0, lo_witness = 0x0 }, lk_lock = 1, lk_exslpfail = 0, lk_timo = 51, lk_pri = 96 }, v_interlock = { lock_object = { lo_name = 0xffffffff80fd2f2d "vnode interlock", lo_flags = 16973824, lo_data = 0, lo_witness = 0x0 }, mtx_lock = 4 }, v_vnlock = 0xfffff8010bc087c8, v_actfreelist = { tqe_next = 0xfffff80079317000, tqe_prev = 0xfffff80092a9e820 }, v_bufobj = { bo_lock = { lock_object = { lo_name = 0xffffffff80fd2f3d "bufobj interlock", lo_flags = 86179840, lo_data = 0, lo_witness = 0x0 }, rw_lock = 1 }, bo_ops = 0xffffffff8149bff0, bo_object = 0xfffff80135aaa500, bo_synclist = { le_next = 0x0, le_prev = 0x0 }, bo_private = 0xfffff8010bc08760, __bo_vnode = 0xfffff8010bc08760, bo_clean = { bv_hd = { tqh_first = 0x0, tqh_last = 0xfffff8010bc08880 }, bv_root = { pt_root = 0 }, bv_cnt = 0 }, bo_dirty = { bv_hd = { tqh_first = 0x0, tqh_last = 0xfffff8010bc088a0 }, bv_root = { pt_root = 0 }, bv_cnt = 0 }, bo_numoutput = 0, bo_flag = 0, bo_bsize = 131072 }, v_pollinfo = 0x0, v_label = 0x0, v_lockf = 0x0, v_rl = { rl_waiters = { tqh_first = 0x0, tqh_last = 0xfffff8010bc088e8 }, rl_currdep = 0x0 }, v_cstart = 0, v_lasta = 0, v_lastw = 0, v_clen = 0, v_holdcnt = 2, v_usecount = 2, v_iflag = 512, v_vflag = 0, v_writecount = 0, v_hash = 17547399, v_type = VREG } (kgdb) print vp->v_cache_dst->tqh_first->nc_name $14 = 0xfffff801a260f512 "puppet20160226-52827-x6ea8y" The file where things go wrong for us is: /tmp/puppet20160226-52827-x6ea8y Performing a cat on this file also hangs on vodead. Any relevant waiting channels: # ps -o lwp -laxwwwSH | awk '{ if ($10 !~ "^(-|ttyin|lockf|wait|select|kqread|tx\->)") print; }' LWP UID PID PPID CPU PRI NI VSZ RSS MWCHAN STAT TT TIME COMMAND 100000 0 0 0 0 -16 0 0 285088 swapin DLs - 0:41.09 [kernel/swapper] 100048 0 0 0 0 -92 0 0 285088 vtbslp DLs - 0:00.00 [kernel/virtio_ballo] 100018 0 2 0 0 -16 0 0 16 crypto_w DL - 0:00.00 [crypto] 100019 0 3 0 0 -16 0 0 16 crypto_r DL - 0:00.00 [crypto returns] 100057 0 5 0 0 20 0 0 6176 arc_recl DL - 13:07.13 [zfskern/arc_reclaim] 100058 0 5 0 0 -8 0 0 6176 l2arc_fe DL - 0:40.44 [zfskern/l2arc_feed_] 102486 0 5 0 0 -8 0 0 6176 zio->io_ DL - 8:24.66 [zfskern/txg_thread_] 100062 0 7 0 0 -16 0 0 32 psleep DL - 17:00.73 [pagedaemon/pagedaem] 100068 0 7 0 0 -16 0 0 32 umarcl DL - 0:00.00 [pagedaemon/uma] 100063 0 8 0 0 -16 0 0 16 psleep DL - 0:00.00 [vmdaemon] 100064 0 9 0 0 155 0 0 16 pgzero DL - 0:00.17 [pagezero] 100001 0 10 0 0 -16 0 0 16 audit_wo DL - 14:36.35 [audit] 100065 0 16 0 0 -16 0 0 16 psleep DL - 0:51.50 [bufdaemon] 100066 0 17 0 0 -16 0 0 16 vlruwt DL - 1:27.70 [vnlru] 100067 0 18 0 0 16 0 0 16 syncer DL - 63:11.82 [syncer] 100406 65534 921 1 0 52 0 71104 33604 uwait Is - 0:00.00 /usr/local/bin/memcached -l 127.0.0.1 -m 1024 -p 11211 -C -d -u nobody -P /var/run/memcached/memcached.pid 100387 0 958 1 0 23 0 16612 2224 nanslp Is - 97:24.14 /usr/sbin/cron -s 100394 0 1194 0 0 -16 0 0 16 pftm DL - 30:48.46 [pf purge] 100388 0 1201 1 0 52 0 14700 2256 sbwait Is - 0:00.00 pflogd: [priv] (pflogd) 100375 64 1202 1201 0 20 0 14700 2312 bpf S - 5:36.59 pflogd: [running] -s 116 -i pflog0 -f /dev/null (pflogd) 100389 0 52827 1 0 52 0 213452 112968 vodead T - 0:08.69 ruby21: puppet agent: applying configuration (ruby21) 100400 0 55604 0 0 -16 0 0 16 ftcl DL - 0:00.00 [ftcleanup] 101076 0 58064 1 0 20 0 16588 2168 auditd Is - 0:07.02 /usr/sbin/auditd 101349 0 37651 37601 0 20 0 12356 2132 vodead D+ 0- 0:00.00 cat puppet20160226-52827-x6ea8y This is the first time we're seeing this. -- You are receiving this mail because: You are the assignee for the bug.help
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bug-204764-3630-3BuPvHr9ke>
