Date: Tue, 22 Jul 2014 14:15:01 -0700 From: Bryan Drewery <bdrewery@FreeBSD.org> To: fs@FreeBSD.org Subject: NFS DEADLKRES panic with getfsstat() on unresponsive mount Message-ID: <53CED455.1030701@FreeBSD.org>
next in thread | raw e-mail | index | archive | help
I had an NFS server go offline for about an hour and had a client panic in deadlkres after a 'zfs list -o mounted' ran which called getfsstat(2). It seems reasonable that this could be a false-positive. Is deadlkres known to have acceptable false-positives? If so, is there a way to mark this one to not cause a panic? This was r268621. Some debugging is below from me figuring out it was not ZFS, but my ruby zfs snapshot script causing the issue. > panic: deadlkres: possible deadlock detected for 0xfffff8029ff37490, blocked for 1800185 ticks > (kgdb) p *(struct thread*)0xfffff8029ff37490 > $1 = {td_lock = 0xffffffff815fc7d0, td_proc = 0xfffff80f93f29000, td_plist = {tqe_next = 0x0, tqe_prev = 0xfffff80f93f29010}, td_runq = {tqe_next = 0x0, tqe_prev = 0xffffffff815d23a0}, > td_slpq = {tqe_next = 0xfffff8039d61a000, tqe_prev = 0xfffff8015e99dc80}, td_lockq = {tqe_next = 0x0, tqe_prev = 0xfffffe1247e7c6e8}, td_hash = {le_next = 0x0, > le_prev = 0xfffffe00008d8ae0}, td_cpuset = 0xfffff8000e3813a8, td_sel = 0xfffff8000e6c2580, td_sleepqueue = 0x0, td_turnstile = 0xfffff800431fccc0, td_rlqe = 0xfffff80038b68398, > td_umtxq = 0xfffff8000e718880, td_tid = 101724, td_sigqueue = {sq_signals = {__bits = {0, 0, 0, 0}}, sq_kill = {__bits = {0, 0, 0, 0}}, sq_list = {tqh_first = 0x0, > tqh_last = 0xfffff8029ff37548}, sq_proc = 0xfffff80f93f29000, sq_flags = 1}, td_lend_user_pri = 255 '�', td_flags = 8196, td_inhibitors = 2, td_pflags = 0, td_dupfd = 0, > td_sqqueue = 0, td_wchan = 0xfffff800aa101d50, td_wmesg = 0xffffffff80f8e15e "newnfs", td_lastcpu = 14 '\016', td_oncpu = 255 '�', td_owepreempt = 0 '\0', td_tsqueue = 0 '\0', > td_locks = 0, td_rw_rlocks = 0, td_lk_slocks = 0, td_stopsched = 0, td_blocked = 0x0, td_lockname = 0x0, td_contested = {lh_first = 0x0}, td_sleeplocks = 0xffffffff8187b9e8, > td_intr_nesting_level = 0, td_pinned = 0, td_ucred = 0xfffff8004306e200, td_estcpu = 0, td_slptick = 171026084, td_blktick = 0, td_swvoltick = 171026084, td_cow = 69, td_ru = { > ru_utime = {tv_sec = 0, tv_usec = 0}, ru_stime = {tv_sec = 0, tv_usec = 0}, ru_maxrss = 3936, ru_ixrss = 21896, ru_idrss = 1904, ru_isrss = 30464, ru_minflt = 339, ru_majflt = 0, > ru_nswap = 0, ru_inblock = 25393, ru_oublock = 0, ru_msgsnd = 0, ru_msgrcv = 0, ru_nsignals = 0, ru_nvcsw = 3, ru_nivcsw = 23}, td_rux = {rux_runtime = 4239501908, rux_uticks = 7, > rux_sticks = 231, rux_iticks = 0, rux_uu = 0, rux_su = 0, rux_tu = 0}, td_incruntime = 0, td_runtime = 4239501908, td_pticks = 0, td_sticks = 0, td_iticks = 0, td_uticks = 0, > td_intrval = 0, td_oldsigmask = {__bits = {0, 0, 0, 0}}, td_generation = 26, td_sigstk = {ss_sp = 0x0, ss_size = 0, ss_flags = 4}, td_xsig = 0, td_profil_addr = 0, td_profil_ticks = 0, > td_name = "zfs", '\0' <repeats 16 times>, td_fpop = 0x0, td_dbgflags = 0, td_dbgksi = {ksi_link = {tqe_next = 0x0, tqe_prev = 0x0}, ksi_info = {si_signo = 0, si_errno = 0, si_code = 0, > si_pid = 0, si_uid = 0, si_status = 0, si_addr = 0x0, si_value = {sival_int = 0, sival_ptr = 0x0, sigval_int = 0, sigval_ptr = 0x0}, _reason = {_fault = {_trapno = 0}, _timer = { > _timerid = 0, _overrun = 0}, _mesgq = {_mqd = 0}, _poll = {_band = 0}, __spare__ = {__spare1__ = 0, __spare2__ = {0, 0, 0, 0, 0, 0, 0}}}}, ksi_flags = 0, ksi_sigq = 0x0}, > td_ng_outbound = 0, td_osd = {osd_nslots = 2, osd_slots = 0xfffff80b2cd28070, osd_next = {le_next = 0xfffff80038623c40, le_prev = 0xfffff8039d61a330}}, td_map_def_user = 0x0, > td_dbg_forked = 0, td_vp_reserv = 0, td_no_sleeping = 0, td_dom_rr_idx = 0, td_sigmask = {__bits = {0, 0, 0, 0}}, td_rqindex = 7 '\a', td_base_pri = 152 '\230', > td_priority = 152 '\230', td_pri_class = 3 '\003', td_user_pri = 177 '�', td_base_user_pri = 177 '�', td_pcb = 0xfffffe12481fdcc0, td_state = TDS_INHIBITED, td_uretoff = {tdu_retval = { > 0, 1}, tdu_off = 0}, td_slpcallout = {c_links = {le = {le_next = 0x0, le_prev = 0xfffffe0000a156a0}, sle = {sle_next = 0x0}, tqe = {tqe_next = 0x0, tqe_prev = 0xfffffe0000a156a0}}, > c_time = 733781545914723, c_precision = 134217727, c_arg = 0xfffff8029ff37490, c_func = 0xffffffff809589b0 <sleepq_timeout>, c_lock = 0x0, c_flags = 272, c_cpu = 4}, > td_frame = 0xfffffe12481fdc00, td_kstack_obj = 0xfffff80f7ec07600, td_kstack = 18446741953205739520, td_kstack_pages = 4, td_critnest = 1, td_md = {md_spinlock_count = 1, > md_saved_flags = 582, md_spurflt_addr = 0}, td_sched = 0xfffff8029ff378f8, td_ar = 0x0, td_lprof = {{lh_first = 0x0}, {lh_first = 0x0}}, td_dtrace = 0xfffff8001bb5e000, td_errno = 0, > td_vnet = 0x0, td_vnet_lpush = 0x0, td_intr_frame = 0x0, td_rfppwait_p = 0xfffff800aab78000, td_ma = 0x0, td_ma_cnt = 0} > Current language: auto; currently minimal > (kgdb) p $1.td_tid > $2 = 101724 > (kgdb) p $1.td_name > $3 = "zfs", '\0' <repeats 16 times> > (kgdb) thread 611 > [Switching to thread 611 (Thread 101724)]#0 sched_switch (td=0xfffff8029ff37490, newtd=<value optimized out>, flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1931 > 1931 cpuid = PCPU_GET(cpuid); > (kgdb) bt > #0 sched_switch (td=0xfffff8029ff37490, newtd=<value optimized out>, flags=<value optimized out>) at /usr/src/sys/kern/sched_ule.c:1931 > #1 0xffffffff8091ba89 in mi_switch (flags=260, newtd=0x0) at /usr/src/sys/kern/kern_synch.c:493 > #2 0xffffffff80958ea2 in sleepq_switch (wchan=<value optimized out>, pri=<value optimized out>) at /usr/src/sys/kern/subr_sleepqueue.c:552 > #3 0xffffffff80958d03 in sleepq_wait (wchan=0xfffff800aa101d50, pri=96) at /usr/src/sys/kern/subr_sleepqueue.c:631 > #4 0xffffffff808f092a in sleeplk (lk=<value optimized out>, flags=<value optimized out>, ilk=<value optimized out>, wmesg=<value optimized out>, pri=<value optimized out>, > timo=<value optimized out>) at /usr/src/sys/kern/kern_lock.c:225 > #5 0xffffffff808efff1 in __lockmgr_args (lk=0xfffff800aa101d50, flags=<value optimized out>, ilk=0xfffff800aa101d80, wmesg=<value optimized out>, pri=<value optimized out>, > timo=<value optimized out>, file=0xffffffff80fb8028 "/usr/src/sys/kern/vfs_subr.c", line=2135) at /usr/src/sys/kern/kern_lock.c:931 > #6 0xffffffff80832b02 in nfs_lock1 (ap=<value optimized out>) at lockmgr.h:97 > #7 0xffffffff80e4887c in VOP_LOCK1_APV (vop=<value optimized out>, a=<value optimized out>) at vnode_if.c:2082 > #8 0xffffffff809cf57a in _vn_lock (vp=0xfffff800aa101ce8, flags=<value optimized out>, file=0xffffffff80fb8028 "/usr/src/sys/kern/vfs_subr.c", line=2135) at vnode_if.h:859 > #9 0xffffffff809bf977 in vget (vp=0xfffff800aa101ce8, flags=524544, td=0xfffff8029ff37490) at /usr/src/sys/kern/vfs_subr.c:2135 > #10 0xffffffff809b3661 in vfs_hash_get (mp=0xfffff80038d3b000, hash=245677930, flags=<value optimized out>, td=0xfffff8029ff37490, vpp=0xfffffe12481fd670, > fn=0xffffffff8083a290 <newnfs_vncmpf>) at /usr/src/sys/kern/vfs_hash.c:88 > #11 0xffffffff80835f2d in ncl_nget (mntp=0xfffff80038d3b000, fhp=0xfffff800aa1cf0a4 "\233�\tM006\236n", fhsize=28, npp=0xfffffe12481fd6e0, lkflags=<value optimized out>) > at /usr/src/sys/fs/nfsclient/nfs_clnode.c:114 > #12 0xffffffff80838b6d in nfs_statfs (mp=0xfffff80038d3b000, sbp=0xfffff80038d3b0b8) at /usr/src/sys/fs/nfsclient/nfs_clvfsops.c:288 > #13 0xffffffff809b9e5e in __vfs_statfs (mp=0x0, sbp=0xfffff80038d3b0b8) at /usr/src/sys/kern/vfs_mount.c:1724 > #14 0xffffffff809c72be in kern_getfsstat (td=0xfffff8029ff37490, buf=<value optimized out>, bufsize=<value optimized out>, bufseg=UIO_USERSPACE, flags=<value optimized out>) > at /usr/src/sys/kern/vfs_syscalls.c:511 > #15 0xffffffff80d280fa in amd64_syscall (td=0xfffff8029ff37490, traced=0) at subr_syscall.c:133 > #16 0xffffffff80d0a64b in Xfast_syscall () at /usr/src/sys/amd64/amd64/exception.S:407 > #17 0x000000080254955a in ?? () > (kgdb) down > #13 0xffffffff809b9e5e in __vfs_statfs (mp=0x0, sbp=0xfffff80038d3b0b8) at /usr/src/sys/kern/vfs_mount.c:1724 > 1724 error = mp->mnt_op->vfs_statfs(mp, &mp->mnt_stat); > (kgdb) p *sbp > $5 = {f_version = 537068824, f_type = 58, f_flags = 268435472, f_bsize = 512, f_iosize = 65536, f_blocks = 3365960256, f_bfree = 3281287109, f_bavail = 3281287109, f_files = 3281308671, > f_ffree = 3281287109, f_syncwrites = 0, f_asyncwrites = 0, f_syncreads = 0, f_asyncreads = 0, f_spare = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, f_namemax = 255, f_owner = 0, f_fsid = {val = { > 973143809, 58}}, f_charspare = '\0' <repeats 79 times>, f_fstypename = "nfs", '\0' <repeats 12 times>, > f_mntfromname = "tank:/tank/distfiles/freebsd\000\000\000\000/tank/distfiles/freebsd\000\000\000\000\000\000\000\000\000zroot/cache/ccache\000�", > f_mntonname = "/mnt/distfiles", '\0' <repeats 73 times>} > (kgdb) p $1.td_proc.p_pptr.p_comm > $17 = "ruby", '\0' <repeats 15 times> The NFS mountpoint /mnt/distfiles is on top of a ZFS /mnt. I also am sharing / from the client, but nothing is connecting to it. > # cat /etc/exports > V4: / -network 10.10.0.0/16 This zfs(1) call was initiated by my ruby snapshot script which runs 'zfs list -o mounted' which called the getfsstat(2). -- Regards, Bryan Drewery
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?53CED455.1030701>