Skip site navigation (1)Skip section navigation (2)
Date:      Tue, 22 Jul 2014 14:15:01 -0700
From:      Bryan Drewery <bdrewery@FreeBSD.org>
To:        fs@FreeBSD.org
Subject:   NFS DEADLKRES panic with getfsstat() on unresponsive mount
Message-ID:  <53CED455.1030701@FreeBSD.org>

next in thread | raw e-mail | index | archive | help
I had an NFS server go offline for about an hour and had a client panic 
in deadlkres after a 'zfs list -o mounted' ran which called 
getfsstat(2). It seems reasonable that this could be a false-positive. 
Is deadlkres known to have acceptable false-positives? If so, is there a 
way to mark this one to not cause a panic?

This was r268621.

Some debugging is below from me figuring out it was not ZFS, but my ruby 
zfs snapshot script causing the issue.

> panic: deadlkres: possible deadlock detected for 0xfffff8029ff37490, blocked for 1800185 ticks
> (kgdb) p *(struct thread*)0xfffff8029ff37490
> $1 = {td_lock = 0xffffffff815fc7d0, td_proc = 0xfffff80f93f29000, td_plist = {tqe_next = 0x0, tqe_prev = 0xfffff80f93f29010}, td_runq = {tqe_next = 0x0, tqe_prev = 0xffffffff815d23a0},
>   td_slpq = {tqe_next = 0xfffff8039d61a000, tqe_prev = 0xfffff8015e99dc80}, td_lockq = {tqe_next = 0x0, tqe_prev = 0xfffffe1247e7c6e8}, td_hash = {le_next = 0x0,
>     le_prev = 0xfffffe00008d8ae0}, td_cpuset = 0xfffff8000e3813a8, td_sel = 0xfffff8000e6c2580, td_sleepqueue = 0x0, td_turnstile = 0xfffff800431fccc0, td_rlqe = 0xfffff80038b68398,
>   td_umtxq = 0xfffff8000e718880, td_tid = 101724, td_sigqueue = {sq_signals = {__bits = {0, 0, 0, 0}}, sq_kill = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0,
>       tqh_last = 0xfffff8029ff37548}, sq_proc = 0xfffff80f93f29000, sq_flags = 1}, td_lend_user_pri = 255 '�', td_flags = 8196, td_inhibitors = 2, td_pflags = 0, td_dupfd = 0,
>   td_sqqueue = 0, td_wchan = 0xfffff800aa101d50, td_wmesg = 0xffffffff80f8e15e "newnfs", td_lastcpu = 14 '\016', td_oncpu = 255 '�', td_owepreempt = 0 '\0', td_tsqueue = 0 '\0',
>   td_locks = 0, td_rw_rlocks = 0, td_lk_slocks = 0, td_stopsched = 0, td_blocked = 0x0, td_lockname = 0x0, td_contested = {lh_first = 0x0}, td_sleeplocks = 0xffffffff8187b9e8,
>   td_intr_nesting_level = 0, td_pinned = 0, td_ucred = 0xfffff8004306e200, td_estcpu = 0, td_slptick = 171026084, td_blktick = 0, td_swvoltick = 171026084, td_cow = 69, td_ru = {
>     ru_utime = {tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0}, ru_maxrss = 3936, ru_ixrss = 21896, ru_idrss = 1904, ru_isrss = 30464, ru_minflt = 339, ru_majflt = 0,
>     ru_nswap = 0, ru_inblock = 25393, ru_oublock = 0, ru_msgsnd = 0, ru_msgrcv = 0, ru_nsignals = 0, ru_nvcsw = 3, ru_nivcsw = 23}, td_rux = {rux_runtime = 4239501908, rux_uticks = 7,
>     rux_sticks = 231, rux_iticks = 0, rux_uu = 0, rux_su = 0, rux_tu = 0}, td_incruntime = 0, td_runtime = 4239501908, td_pticks = 0, td_sticks = 0, td_iticks = 0, td_uticks = 0,
>   td_intrval = 0, td_oldsigmask = {__bits = {0, 0, 0, 0}}, td_generation = 26, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags = 4}, td_xsig = 0, td_profil_addr = 0, td_profil_ticks = 0,
>   td_name = "zfs", '\0' <repeats 16 times>, td_fpop = 0x0, td_dbgflags = 0, td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev = 0x0}, ksi_info = {si_signo = 0, si_errno = 0, si_code = 0,
>       si_pid = 0, si_uid = 0, si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, sival_ptr = 0x0, sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, _timer = {
>           _timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {_band = 0}, __spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}}, ksi_flags = 0, ksi_sigq = 0x0},
>   td_ng_outbound = 0, td_osd = {osd_nslots = 2, osd_slots = 0xfffff80b2cd28070, osd_next = {le_next = 0xfffff80038623c40, le_prev = 0xfffff8039d61a330}}, td_map_def_user = 0x0,
>   td_dbg_forked = 0, td_vp_reserv = 0, td_no_sleeping = 0, td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}}, td_rqindex = 7 '\a', td_base_pri = 152 '\230',
>   td_priority = 152 '\230', td_pri_class = 3 '\003', td_user_pri = 177 '�', td_base_user_pri = 177 '�', td_pcb = 0xfffffe12481fdcc0, td_state = TDS_INHIBITED, td_uretoff = {tdu_retval = {
>       0, 1}, tdu_off = 0}, td_slpcallout = {c_links = {le = {le_next = 0x0, le_prev = 0xfffffe0000a156a0}, sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0, tqe_prev = 0xfffffe0000a156a0}},
>     c_time = 733781545914723, c_precision = 134217727, c_arg = 0xfffff8029ff37490, c_func = 0xffffffff809589b0 <sleepq_timeout>, c_lock = 0x0, c_flags = 272, c_cpu = 4},
>   td_frame = 0xfffffe12481fdc00, td_kstack_obj = 0xfffff80f7ec07600, td_kstack = 18446741953205739520, td_kstack_pages = 4, td_critnest = 1, td_md = {md_spinlock_count = 1,
>     md_saved_flags = 582, md_spurflt_addr = 0}, td_sched = 0xfffff8029ff378f8, td_ar = 0x0, td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}}, td_dtrace = 0xfffff8001bb5e000, td_errno = 0,
>   td_vnet = 0x0, td_vnet_lpush = 0x0, td_intr_frame = 0x0, td_rfppwait_p = 0xfffff800aab78000, td_ma = 0x0, td_ma_cnt = 0}
> Current language:  auto; currently minimal
> (kgdb) p $1.td_tid
> $2 = 101724
> (kgdb) p $1.td_name
> $3 = "zfs", '\0' <repeats 16 times>
> (kgdb) thread 611
> [Switching to thread 611 (Thread 101724)]#0  sched_switch (td=0xfffff8029ff37490, newtd=<value optimized out>, flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1931
> 1931                    cpuid = PCPU_GET(cpuid);
> (kgdb) bt
> #0  sched_switch (td=0xfffff8029ff37490, newtd=<value optimized out>, flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1931
> #1  0xffffffff8091ba89 in mi_switch (flags=260, newtd=0x0) at /usr/src/sys/kern/kern_synch.c:493
> #2  0xffffffff80958ea2 in sleepq_switch (wchan=<value optimized out>, pri=<value optimized out>) at /usr/src/sys/kern/subr_sleepqueue.c:552
> #3  0xffffffff80958d03 in sleepq_wait (wchan=0xfffff800aa101d50, pri=96) at /usr/src/sys/kern/subr_sleepqueue.c:631
> #4  0xffffffff808f092a in sleeplk (lk=<value optimized out>, flags=<value optimized out>, ilk=<value optimized out>, wmesg=<value optimized out>, pri=<value optimized out>,
>     timo=<value optimized out>) at /usr/src/sys/kern/kern_lock.c:225
> #5  0xffffffff808efff1 in __lockmgr_args (lk=0xfffff800aa101d50, flags=<value optimized out>, ilk=0xfffff800aa101d80, wmesg=<value optimized out>, pri=<value optimized out>,
>     timo=<value optimized out>, file=0xffffffff80fb8028 "/usr/src/sys/kern/vfs_subr.c", line=2135) at /usr/src/sys/kern/kern_lock.c:931
> #6  0xffffffff80832b02 in nfs_lock1 (ap=<value optimized out>) at lockmgr.h:97
> #7  0xffffffff80e4887c in VOP_LOCK1_APV (vop=<value optimized out>, a=<value optimized out>) at vnode_if.c:2082
> #8  0xffffffff809cf57a in _vn_lock (vp=0xfffff800aa101ce8, flags=<value optimized out>, file=0xffffffff80fb8028 "/usr/src/sys/kern/vfs_subr.c", line=2135) at vnode_if.h:859
> #9  0xffffffff809bf977 in vget (vp=0xfffff800aa101ce8, flags=524544, td=0xfffff8029ff37490) at /usr/src/sys/kern/vfs_subr.c:2135
> #10 0xffffffff809b3661 in vfs_hash_get (mp=0xfffff80038d3b000, hash=245677930, flags=<value optimized out>, td=0xfffff8029ff37490, vpp=0xfffffe12481fd670,
>     fn=0xffffffff8083a290 <newnfs_vncmpf>) at /usr/src/sys/kern/vfs_hash.c:88
> #11 0xffffffff80835f2d in ncl_nget (mntp=0xfffff80038d3b000, fhp=0xfffff800aa1cf0a4 "\233�\tM006\236n", fhsize=28, npp=0xfffffe12481fd6e0, lkflags=<value optimized out>)
>     at /usr/src/sys/fs/nfsclient/nfs_clnode.c:114
> #12 0xffffffff80838b6d in nfs_statfs (mp=0xfffff80038d3b000, sbp=0xfffff80038d3b0b8) at /usr/src/sys/fs/nfsclient/nfs_clvfsops.c:288
> #13 0xffffffff809b9e5e in __vfs_statfs (mp=0x0, sbp=0xfffff80038d3b0b8) at /usr/src/sys/kern/vfs_mount.c:1724
> #14 0xffffffff809c72be in kern_getfsstat (td=0xfffff8029ff37490, buf=<value optimized out>, bufsize=<value optimized out>, bufseg=UIO_USERSPACE, flags=<value optimized out>)
>     at /usr/src/sys/kern/vfs_syscalls.c:511
> #15 0xffffffff80d280fa in amd64_syscall (td=0xfffff8029ff37490, traced=0) at subr_syscall.c:133
> #16 0xffffffff80d0a64b in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:407
> #17 0x000000080254955a in ?? ()

> (kgdb) down
> #13 0xffffffff809b9e5e in __vfs_statfs (mp=0x0, sbp=0xfffff80038d3b0b8) at /usr/src/sys/kern/vfs_mount.c:1724
> 1724            error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat);
> (kgdb) p *sbp
> $5 = {f_version = 537068824, f_type = 58, f_flags = 268435472, f_bsize = 512, f_iosize = 65536, f_blocks = 3365960256, f_bfree = 3281287109, f_bavail = 3281287109, f_files = 3281308671,
>   f_ffree = 3281287109, f_syncwrites = 0, f_asyncwrites = 0, f_syncreads = 0, f_asyncreads = 0, f_spare = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, f_namemax = 255, f_owner = 0, f_fsid = {val = {
>       973143809, 58}}, f_charspare = '\0' <repeats 79 times>, f_fstypename = "nfs", '\0' <repeats 12 times>,
>   f_mntfromname = "tank:/tank/distfiles/freebsd\000\000\000\000/tank/distfiles/freebsd\000\000\000\000\000\000\000\000\000zroot/cache/ccache\000�",
>   f_mntonname = "/mnt/distfiles", '\0' <repeats 73 times>}
> (kgdb) p $1.td_proc.p_pptr.p_comm
> $17 = "ruby", '\0' <repeats 15 times>


The NFS mountpoint /mnt/distfiles is on top of a ZFS /mnt.

I also am sharing / from the client, but nothing is connecting to it.

> # cat /etc/exports
> V4: / -network 10.10.0.0/16

This zfs(1) call was initiated by my ruby snapshot script which runs 
'zfs list -o mounted' which called the getfsstat(2).


-- 
Regards,
Bryan Drewery



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?53CED455.1030701>