Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 26 Feb 2016 12:52:25 +0000
From:      bugzilla-noreply@freebsd.org
To:        freebsd-fs@FreeBSD.org
Subject:   [Bug 204764] Filesystem deadlock, process in vodead state
Message-ID:  <bug-204764-3630-3BuPvHr9ke@https.bugs.freebsd.org/bugzilla/>
In-Reply-To: <bug-204764-3630@https.bugs.freebsd.org/bugzilla/>
References:  <bug-204764-3630@https.bugs.freebsd.org/bugzilla/>

next in thread | previous in thread | raw e-mail | index | archive | help
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=3D204764

johans <johan@300.nl> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |johan@300.nl

--- Comment #15 from johans <johan@300.nl> ---
We've just ran into the same bug on one of our machines.

# procstat -kk 52827
  PID    TID COMM             TDNAME           KSTACK=20=20=20=20=20=20=20=
=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20=20
52827 100187 ruby21           -                mi_switch+0xe1
thread_suspend_switch+0x170 thread_single+0x4e5 exit1+0xbe sigexit+0x925
postsig+0x286 ast+0x427 doreti_ast+0x1f=20
52827 100389 ruby21           -                mi_switch+0xe1 sleepq_wait+0=
x3a
_sleep+0x287 vnode_create_vobject+0x100 zfs_freebsd_open+0xf5 VOP_OPEN_APV+=
0xa1
vn_open_vnode+0x234 vn_open_cred+0x33e kern_openat+0x26f amd64_syscall+0x357
Xfast_syscall+0xfb=20


(kgdb) print *object
$10 =3D {
  lock =3D {
    lock_object =3D {
      lo_name =3D 0xffffffff80ff8196 "vm object",=20
      lo_flags =3D 90374144,=20
      lo_data =3D 0,=20
      lo_witness =3D 0x0
    },=20
    rw_lock =3D 1
  },=20
  object_list =3D {
    tqe_next =3D 0xfffff80135aaa600,=20
    tqe_prev =3D 0xfffff80135aaa420
  },=20
  shadow_head =3D {
    lh_first =3D 0x0
  },=20
  shadow_list =3D {
    le_next =3D 0x0,=20
    le_prev =3D 0xfffff8019f85f030
  },=20
  memq =3D {
    tqh_first =3D 0x0,=20
    tqh_last =3D 0xfffff80135aaa548
  },=20
  rtree =3D {
    rt_root =3D 0,=20
    rt_flags =3D 0 '\0'
  },=20
  size =3D 0,=20
  generation =3D 1,=20
  ref_count =3D 0,=20
  shadow_count =3D 0,=20
  memattr =3D 6 '\006',=20
  type =3D 2 '\002',=20
  flags =3D 24584,=20
  pg_color =3D 1569,=20
  paging_in_progress =3D 0,=20
  resident_page_count =3D 0,=20
  backing_object =3D 0x0,=20
  backing_object_offset =3D 0,=20
  pager_object_list =3D {
    tqe_next =3D 0x0,=20
    tqe_prev =3D 0x0
  },=20
  rvq =3D {
    lh_first =3D 0x0
  },=20
  cache =3D {
    rt_root =3D 0,=20
    rt_flags =3D 0 '\0'
  },=20
  handle =3D 0xfffff8010bc08760,=20
  un_pager =3D {
    vnp =3D {
      vnp_size =3D 0,=20
      writemappings =3D 0
    },=20
    devp =3D {
      devp_pglist =3D {
        tqh_first =3D 0x0,=20
        tqh_last =3D 0x0
      },=20
      ops =3D 0x0,=20
      dev =3D 0x0
    },=20
    sgp =3D {
      sgp_pglist =3D {
        tqh_first =3D 0x0,=20
        tqh_last =3D 0x0
      }
    },=20
    swp =3D {
      swp_tmpfs =3D 0x0,=20
      swp_bcount =3D 0
    }
  },=20
  cred =3D 0x0,=20
  charge =3D 0
}

(kgdb) p *vp
$11 =3D {
  v_tag =3D 0xffffffff81acefb6 "zfs",=20
  v_op =3D 0xffffffff81ae12f0,=20
  v_data =3D 0xfffff80031109450,=20
  v_mount =3D 0xfffff8000801a330,=20
  v_nmntvnodes =3D {
    tqe_next =3D 0xfffff801c912a1d8,=20
    tqe_prev =3D 0xfffff8022bfa0780
  },=20
  v_un =3D {
    vu_mount =3D 0x0,=20
    vu_socket =3D 0x0,=20
    vu_cdev =3D 0x0,=20
    vu_fifoinfo =3D 0x0
  },=20
  v_hashlist =3D {
    le_next =3D 0x0,=20
    le_prev =3D 0x0
  },=20
  v_cache_src =3D {
    lh_first =3D 0x0
  },=20
  v_cache_dst =3D {
    tqh_first =3D 0xfffff801a260f4d0,=20
    tqh_last =3D 0xfffff801a260f4f0
  },=20
  v_cache_dd =3D 0x0,=20
  v_lock =3D {
    lock_object =3D {
      lo_name =3D 0xffffffff81acefb6 "zfs",=20
      lo_flags =3D 117112832,=20
      lo_data =3D 0,=20
      lo_witness =3D 0x0
    },=20
    lk_lock =3D 1,=20
    lk_exslpfail =3D 0,=20
    lk_timo =3D 51,=20
    lk_pri =3D 96
  },=20
  v_interlock =3D {
    lock_object =3D {
      lo_name =3D 0xffffffff80fd2f2d "vnode interlock",=20
      lo_flags =3D 16973824,=20
      lo_data =3D 0,=20
      lo_witness =3D 0x0
    },=20
    mtx_lock =3D 4
  },=20
  v_vnlock =3D 0xfffff8010bc087c8,=20
  v_actfreelist =3D {
    tqe_next =3D 0xfffff80079317000,=20
    tqe_prev =3D 0xfffff80092a9e820
  },=20
  v_bufobj =3D {
    bo_lock =3D {
      lock_object =3D {
        lo_name =3D 0xffffffff80fd2f3d "bufobj interlock",=20
        lo_flags =3D 86179840,=20
        lo_data =3D 0,=20
        lo_witness =3D 0x0
      },=20
      rw_lock =3D 1
    },=20
    bo_ops =3D 0xffffffff8149bff0,=20
    bo_object =3D 0xfffff80135aaa500,=20
    bo_synclist =3D {
      le_next =3D 0x0,=20
      le_prev =3D 0x0
    },=20
    bo_private =3D 0xfffff8010bc08760,=20
    __bo_vnode =3D 0xfffff8010bc08760,=20
    bo_clean =3D {
      bv_hd =3D {
        tqh_first =3D 0x0,=20
        tqh_last =3D 0xfffff8010bc08880
      },=20
      bv_root =3D {
        pt_root =3D 0
      },=20
      bv_cnt =3D 0
    },=20
    bo_dirty =3D {
      bv_hd =3D {
        tqh_first =3D 0x0,=20
        tqh_last =3D 0xfffff8010bc088a0
      },=20
      bv_root =3D {
        pt_root =3D 0
      },=20
      bv_cnt =3D 0
    },=20
    bo_numoutput =3D 0,=20
    bo_flag =3D 0,=20
    bo_bsize =3D 131072
  },=20
  v_pollinfo =3D 0x0,=20
  v_label =3D 0x0,=20
  v_lockf =3D 0x0,=20
  v_rl =3D {
    rl_waiters =3D {
      tqh_first =3D 0x0,=20
      tqh_last =3D 0xfffff8010bc088e8
    },=20
    rl_currdep =3D 0x0
  },=20
  v_cstart =3D 0,=20
  v_lasta =3D 0,=20
  v_lastw =3D 0,=20
  v_clen =3D 0,=20
  v_holdcnt =3D 2,=20
  v_usecount =3D 2,=20
  v_iflag =3D 512,=20
  v_vflag =3D 0,=20
  v_writecount =3D 0,=20
  v_hash =3D 17547399,=20
  v_type =3D VREG
}

(kgdb) print vp->v_cache_dst->tqh_first->nc_name
$14 =3D 0xfffff801a260f512 "puppet20160226-52827-x6ea8y"

The file where things go wrong for us is: /tmp/puppet20160226-52827-x6ea8y

Performing a cat on this file also hangs on vodead.

Any relevant waiting channels:

# ps -o lwp -laxwwwSH | awk '{ if ($10 !~
"^(-|ttyin|lockf|wait|select|kqread|tx\->)") print; }'=20
   LWP   UID   PID  PPID CPU  PRI  NI     VSZ    RSS MWCHAN   STAT TT=20=20=
=20=20=20=20=20=20
TIME COMMAND
100000     0     0     0   0  -16   0       0 285088 swapin   DLs   -=20=20=
=20=20=20
0:41.09 [kernel/swapper]
100048     0     0     0   0  -92   0       0 285088 vtbslp   DLs   -=20=20=
=20=20=20
0:00.00 [kernel/virtio_ballo]
100018     0     2     0   0  -16   0       0     16 crypto_w DL    -=20=20=
=20=20=20
0:00.00 [crypto]
100019     0     3     0   0  -16   0       0     16 crypto_r DL    -=20=20=
=20=20=20
0:00.00 [crypto returns]
100057     0     5     0   0   20   0       0   6176 arc_recl DL    -=20=20=
=20=20
13:07.13 [zfskern/arc_reclaim]
100058     0     5     0   0   -8   0       0   6176 l2arc_fe DL    -=20=20=
=20=20=20
0:40.44 [zfskern/l2arc_feed_]
102486     0     5     0   0   -8   0       0   6176 zio->io_ DL    -=20=20=
=20=20=20
8:24.66 [zfskern/txg_thread_]
100062     0     7     0   0  -16   0       0     32 psleep   DL    -=20=20=
=20=20
17:00.73 [pagedaemon/pagedaem]
100068     0     7     0   0  -16   0       0     32 umarcl   DL    -=20=20=
=20=20=20
0:00.00 [pagedaemon/uma]
100063     0     8     0   0  -16   0       0     16 psleep   DL    -=20=20=
=20=20=20
0:00.00 [vmdaemon]
100064     0     9     0   0  155   0       0     16 pgzero   DL    -=20=20=
=20=20=20
0:00.17 [pagezero]
100001     0    10     0   0  -16   0       0     16 audit_wo DL    -=20=20=
=20=20
14:36.35 [audit]
100065     0    16     0   0  -16   0       0     16 psleep   DL    -=20=20=
=20=20=20
0:51.50 [bufdaemon]
100066     0    17     0   0  -16   0       0     16 vlruwt   DL    -=20=20=
=20=20=20
1:27.70 [vnlru]
100067     0    18     0   0   16   0       0     16 syncer   DL    -=20=20=
=20=20
63:11.82 [syncer]
100406 65534   921     1   0   52   0   71104  33604 uwait    Is    -=20=20=
=20=20=20
0:00.00 /usr/local/bin/memcached -l 127.0.0.1 -m 1024 -p 11211 -C -d -u nob=
ody
-P /var/run/memcached/memcached.pid
100387     0   958     1   0   23   0   16612   2224 nanslp   Is    -=20=20=
=20=20
97:24.14 /usr/sbin/cron -s
100394     0  1194     0   0  -16   0       0     16 pftm     DL    -=20=20=
=20=20
30:48.46 [pf purge]
100388     0  1201     1   0   52   0   14700   2256 sbwait   Is    -=20=20=
=20=20=20
0:00.00 pflogd: [priv] (pflogd)
100375    64  1202  1201   0   20   0   14700   2312 bpf      S     -=20=20=
=20=20=20
5:36.59 pflogd: [running] -s 116 -i pflog0 -f /dev/null (pflogd)
100389     0 52827     1   0   52   0  213452 112968 vodead   T     -=20=20=
=20=20=20
0:08.69 ruby21: puppet agent: applying configuration (ruby21)
100400     0 55604     0   0  -16   0       0     16 ftcl     DL    -=20=20=
=20=20=20
0:00.00 [ftcleanup]
101076     0 58064     1   0   20   0   16588   2168 auditd   Is    -=20=20=
=20=20=20
0:07.02 /usr/sbin/auditd
101349     0 37651 37601   0   20   0   12356   2132 vodead   D+    0-=20=
=20=20=20
0:00.00 cat puppet20160226-52827-x6ea8y

This is the first time we're seeing this.

--=20
You are receiving this mail because:
You are the assignee for the bug.=



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bug-204764-3630-3BuPvHr9ke>