Skip site navigation (1)Skip section navigation (2)
Date:      Sun, 06 Oct 2013 00:56:48 +0200
From:      Andreas Longwitz <longwitz@incore.de>
To:        Andriy Gapon <avg@FreeBSD.org>
Cc:        freebsd-fs@FreeBSD.org
Subject:   Re: zfs panic during find(1) on zfs snapshot directory
Message-ID:  <52509930.2090608@incore.de>
In-Reply-To: <523621DD.7050600@FreeBSD.org>
References:  <522DF5A9.4070103@incore.de> <522E0118.5020106@FreeBSD.org> <522F25AE.1080309@incore.de> <523621DD.7050600@FreeBSD.org>

next in thread | previous in thread | raw e-mail | index | archive | help
Andriy Gapon wrote:
> 
> Please try this patch:
> http://people.freebsd.org/~avg/zfs-gfs-8.diff

Without your patch my Stable-8 crashes immediately when I run
 while true; do ls -l /prod/.zfs >/dev/null; done

Fatal trap 12: page fault while in kernel mode
cpuid = 1; apic id = 01
fault virtual address   = 0x18
fault code              = supervisor read data, page not present
instruction pointer     = 0x20:0xffffffff80e1d9cd
stack pointer           = 0x28:0xffffff82457757e0
frame pointer           = 0x28:0xffffff8245775810
code segment            = base 0x0, limit 0xfffff, type 0x1b
                        = DPL 0, pres 1, long 1, def32 0, gran 1
processor eflags        = interrupt enabled, resume, IOPL = 0
current process         = 87276 (ls)
[thread pid 87276 tid 100386 ]
Stopped at      gfs_vop_inactive+0x1d:  movl    0x18(%r13),%ecx

#10 0xffffffff805c2ba4 in calltrap () at
/usr/src/sys/amd64/amd64/exception.S:228
#11 0xffffffff80e1d9cd in gfs_vop_inactive (ap=0xffffff8245775850)   at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/gfs.c:1232
#12 0xffffffff80639638 in VOP_INACTIVE_APV (vop=0xffffffff80f3b3c0,
a=0xffffff8245775850) at vnode_if.c:1923
#13 0xffffffff80491671 in vinactive (vp=0xffffff0103ebdb10,
td=0xffffff0078da78e0) at vnode_if.h:807
#14 0xffffffff80496038 in vputx (vp=0xffffff0103ebdb10, func=2) at
/usr/src/sys/kern/vfs_subr.c:2347
#15 0xffffffff8049a4ea in kern_statat_vnhook (td=0xffffff0078da78e0,
flag=<value optimized out>,
    fd=<value optimized out>, path=<value optimized out>, pathseg=<value
optimized out>,
    sbp=0xffffff8245775a80, hook=0) at /usr/src/sys/kern/vfs_syscalls.c:2351
#16 0xffffffff8049a645 in kern_statat (td=<value optimized out>,
flag=<value optimized out>,
    fd=<value optimized out>, path=<value optimized out>, pathseg=<value
optimized out>,
    sbp=<value optimized out>) at /usr/src/sys/kern/vfs_syscalls.c:2320
#17 0xffffffff8049a70a in lstat (td=<value optimized out>,
uap=0xffffff8245775bb0)
    at /usr/src/sys/kern/vfs_syscalls.c:2383
#18 0xffffffff805db824 in amd64_syscall (td=0xffffff0078da78e0,
traced=0) at subr_syscall.c:114
#19 0xffffffff805c2e9c in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:387
#20 0x000000018098f97c in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) f 11
#11 0xffffffff80e1d9cd in gfs_vop_inactive (ap=0xffffff8245775850)
    at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/gfs.c:1232
1232            gfs_file_t *fp = vp->v_data;
(kgdb) info loc
vp = (vnode_t *) 0xffffff0103ebdb10
fp = (gfs_file_t *) 0x0
(kgdb) l
1227                    struct vnode *a_vp;
1228                    struct thread *a_td;
1229            } */ *ap;
1230    {
1231            vnode_t *vp = ap->a_vp;
1232            gfs_file_t *fp = vp->v_data;
1233
1234            if (fp->gfs_type == GFS_DIR)
1235                    gfs_dir_inactive(vp);

Then I applied your patch to my system with some minor changes, because
compile of zfs.ko was not clean:

- vrecycle(vp) --> vrecycle(vp, curthread)
- vn_lock --> zfs_vnode_lock
- zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, int flags, vnode_t **vpp)
  --> zfs_fhtovp(vfs_t *vfsp, fid_t *fidp, vnode_t **vpp)
- in zfsctl_root_lookup arg list: flags --> 0

After loading a new system with your patch integrated in zfs.ko I could
run several processes
 while true; do ls -l /prod/.zfs >/dev/null; done
without problems.
Starting one process "sudo find /prod/.zfs -mtime +30" gives immediately
a panic:

panic: __lockmgr_args: unknown lockmgr request 0x0

cpuid = 2
KDB: stack backtrace:
db_trace_self_wrapper() at db_trace_self_wrapper+0x2a
kdb_backtrace() at kdb_backtrace+0x37
panic() at panic+0x1ce
__lockmgr_args() at __lockmgr_args+0x21d
vop_stdlock() at vop_stdlock+0x39
VOP_LOCK1_APV() at VOP_LOCK1_APV+0x70
_vn_lock() at _vn_lock+0x47
gfs_dir_lookup() at gfs_dir_lookup+0x15e
zfsctl_root_lookup() at zfsctl_root_lookup+0x122
zfs_dirlook() at zfs_dirlook+0x297
zfs_lookup() at zfs_lookup+0x262
zfs_freebsd_lookup() at zfs_freebsd_lookup+0x81
VOP_CACHEDLOOKUP_APV() at VOP_CACHEDLOOKUP_APV+0x62
vfs_cache_lookup() at vfs_cache_lookup+0xf5
VOP_LOOKUP_APV() at VOP_LOOKUP_APV+0x62
lookup() at lookup+0x44c
namei() at namei+0x53d
vn_open_cred() at vn_open_cred+0x391
kern_openat() at kern_openat+0x16a
amd64_syscall() at amd64_syscall+0x1f4
Xfast_syscall() at Xfast_syscall+0xfc
--- syscall (5, FreeBSD ELF64, open), rip = 0x1807359fc, rsp =
0x7fffffffe828, rbp = 0xffffffff ---
KDB: enter: panic
[thread pid 91769 tid 100298 ]
Stopped at      kdb_enter+0x3b: movq    $0,0x48aae2(%rip)

Locked vnodes
0xffffff0002d91000: tag ufs, type VDIR
    usecount 1, writecount 0, refcount 4 mountedhere 0
    flags ()
    v_object 0xffffff0002de9ca8 ref 0 pages 3
    lock type ufs: SHARED (count 1)
        ino 635904, on dev mirror/gm0p9.journal

0xffffff010018ece8: tag zfs, type VDIR
    usecount 3, writecount 0, refcount 4 mountedhere 0
    flags ()
    v_object 0xffffff0073f17d80 ref 0 pages 0
    lock type zfs: SHARED (count 1)
show lockchain
thread 100298 (pid 91769, find) running on CPU 2

#10 0xffffffff803ff367 in panic (fmt=<value optimized out>) at
/usr/src/sys/kern/kern_shutdown.c:616
#11 0xffffffff803e74ad in __lockmgr_args (lk=0xffffff0073df2270,
flags=0, ilk=0xffffff0073df2298,
    wmesg=<value optimized out>, pri=982528, timo=1166294704,
    file=0xffffffff80d1faf8
"/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/gfs.c",
    line=984) at /usr/src/sys/kern/kern_lock.c:1194
#12 0xffffffff804833e9 in vop_stdlock (ap=<value optimized out>) at
lockmgr.h:94
#13 0xffffffff8063a870 in VOP_LOCK1_APV (vop=0xffffffff8082af60,
a=0xffffff82458441f0) at vnode_if.c:2052
#14 0xffffffff804a3137 in _vn_lock (vp=0xffffff0073df21d8, flags=0,
    file=0xffffffff80d1faf8
"/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/gfs.c",
    line=984) at vnode_if.h:859
#15 0xffffffff80c1d5be in gfs_dir_lookup (dvp=0xffffff00739fc588,
nm=0xffffffff80d31cb8 "snapshot",
    vpp=0xffffff8245844a10, cr=0xffffff0002345000, flags=0,
direntflags=0x0, realpnp=0x0)
    at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/gfs.c:984
#16 0xffffffff80cab1b2 in zfsctl_root_lookup (dvp=0xffffff00739fc588,
nm=0xffffffff80d31cb8 "snapshot",
    vpp=0xffffff8245844a10, pnp=0x0, flags=0, rdir=0x0,
cr=0xffffff0002345000, ct=0x0, direntflags=0x0, realpnp=0x0) at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c:544
#17 0xffffffff80cae837 in zfs_dirlook (dzp=0xffffff0073e7d450,
name=<value optimized out>,
    vpp=0xffffff8245844a10, flags=<value optimized out>, deflg=<value
optimized out>, rpnp=0x0)    at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_dir.c:400
#18 0xffffffff80cc0ac2 in zfs_lookup (dvp=0xffffff010018ece8,
nm=0xffffff82458444a0 "..",
    vpp=0xffffff8245844a10, cnp=0xffffff8245844a38, nameiop=0,
cr=0xffffff0073e09400, td=0xffffff007367b470,    flags=0) at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:1481
#19 0xffffffff80cc12f1 in zfs_freebsd_lookup (ap=0xffffff8245844610)
    at
/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c:5839
#20 0xffffffff8063a2b2 in VOP_CACHEDLOOKUP_APV (vop=0xffffffff80d3c100,
a=0xffffff8245844610)  at vnode_if.c:193
#21 0xffffffff80481215 in vfs_cache_lookup (ap=<value optimized out>) at
vnode_if.h:80
#22 0xffffffff8063a372 in VOP_LOOKUP_APV (vop=0xffffffff80d3c100,
a=0xffffff8245844710) at vnode_if.c:126
#23 0xffffffff8048898c in lookup (ndp=0xffffff82458449e0) at vnode_if.h:54
#24 0xffffffff80489b0d in namei (ndp=0xffffff82458449e0) at
/usr/src/sys/kern/vfs_lookup.c:269
#25 0xffffffff804a2641 in vn_open_cred (ndp=0xffffff82458449e0,
flagp=0xffffff82458449dc, cmode=0,
    vn_open_flags=<value optimized out>, cred=0xffffff0073e09400,
fp=0xffffff0002c682d0)
    at /usr/src/sys/kern/vfs_vnops.c:192
#26 0xffffffff804a161a in kern_openat (td=0xffffff007367b470, fd=-100,
    path=0x18074e069 <Address 0x18074e069 out of bounds>,
pathseg=UIO_USERSPACE, flags=1,
    mode=<value optimized out>) at /usr/src/sys/kern/vfs_syscalls.c:1077
#27 0xffffffff805db824 in amd64_syscall (td=0xffffff007367b470,
traced=0) at subr_syscall.c:114
#28 0xffffffff805c2e9c in Xfast_syscall () at
/usr/src/sys/amd64/amd64/exception.S:387
#29 0x00000001807359fc in ?? ()
Previous frame inner to this frame (corrupt stack?)
(kgdb) f 11
#11 0xffffffff803e74ad in __lockmgr_args (lk=0xffffff0073df2270,
flags=0, ilk=0xffffff0073df2298,
    wmesg=<value optimized out>, pri=982528, timo=1166294704,
    file=0xffffffff80d1faf8
"/usr/src/sys/modules/zfs/../../cddl/contrib/opensolaris/uts/common/fs/gfs.c",
    line=984) at /usr/src/sys/kern/kern_lock.c:1194
1194                    panic("%s: unknown lockmgr request 0x%x\n",
__func__, op);
(kgdb) info loc
_i = <value optimized out>
class = (struct lock_class *) 0x0
iwmesg = 0xffffffff80d2d91c "zfs"
tid = 0
v = <value optimized out>
x = <value optimized out>
op = 0
realexslp = <value optimized out>
error = <value optimized out>
ipri = 80
itimo = 51
queue = <value optimized out>
wakeup_swapper = <value optimized out>
__func__ = "__lockmgr_args"
(kgdb) list
1189                    }
1190                    break;
1191            default:
1192                    if (flags & LK_INTERLOCK)
1193                            class->lc_unlock(ilk);
1194                    panic("%s: unknown lockmgr request 0x%x\n",
__func__, op);
1195            }
1196
1197            if (flags & LK_INTERLOCK)
1198                    class->lc_unlock(ilk);

If you like I can give more info from the dumps.

>> kern/180060 
> I will try to look into this issue.

Would be fine, maybe you like also to look at
http://lists.freebsd.org/pipermail/freebsd-fs/2013-August/018008.html

Regards


-- 
Andreas Longwitz




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?52509930.2090608>