Date: Thu, 06 Dec 2018 08:29:51 +0000 From: bugzilla-noreply@freebsd.org To: fs@FreeBSD.org Subject: [Bug 229958] ctld / zfs deadlock with 'zfs rename ...' Message-ID: <bug-229958-3630-L9kfgJa6wb@https.bugs.freebsd.org/bugzilla/> In-Reply-To: <bug-229958-3630@https.bugs.freebsd.org/bugzilla/> References: <bug-229958-3630@https.bugs.freebsd.org/bugzilla/>
next in thread | previous in thread | raw e-mail | index | archive | help
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=3D229958 --- Comment #4 from Andriy Gapon <avg@FreeBSD.org> --- (In reply to emz from comment #1) I think that the problem is with this thread: (kgdb) tid 108285 (kgdb) bt #0 sched_switch (td=3D0xfffff80130a82000, newtd=3D0xfffff8048008c000, flags=3D<optimized out>) at /usr/src/sys/kern/sched_ule.c:2112 #1 0xffffffff80bac7c1 in mi_switch (flags=3D<optimized out>, newtd=3D0x0) = at /usr/src/sys/kern/kern_synch.c:439 #2 0xffffffff80bfa9cc in sleepq_wait (wchan=3D<unavailable>, pri=3D<unavai= lable>) at /usr/src/sys/kern/subr_sleepqueue.c:692 #3 0xffffffff80b3b6b2 in _cv_wait (cvp=3D0xfffff8001b148a98, lock=3D0xfffff8001b148a28) at /usr/src/sys/kern/kern_condvar.c:146 #4 0xffffffff8270f575 in txg_wait_synced (dp=3D0xfffff8001b148800, txg=3D3= 8513569) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:659 #5 0xffffffff827448ab in zil_close (zilog=3D0xfffff8036f227400) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c:2975 #6 0xffffffff8277f4b5 in zvol_last_close (zv=3D0xfffff80535721800) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:893 #7 0xffffffff8278089e in zvol_d_close (dev=3D<optimized out>, flags=3D1310= 75, fmt=3D<optimized out>, td=3D<optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:3138 #8 0xffffffff80a57f0a in devfs_close (ap=3D<optimized out>) at /usr/src/sys/fs/devfs/devfs_vnops.c:650 #9 0xffffffff811fefc8 in VOP_CLOSE_APV (vop=3D<optimized out>, a=3D0xfffffe010d6f9518) at vnode_if.c:534 #10 0xffffffff80c8068f in VOP_CLOSE (vp=3D<optimized out>, fflag=3D<optimiz= ed out>, cred=3D<optimized out>, td=3D<optimized out>) at ./vnode_if.h:225 #11 vn_close1 (vp=3D0xfffff803ba0705a0, flags=3D3, file_cred=3D0x0, td=3D0xfffff80130a82000, keep_ref=3Dfalse) at /usr/src/sys/kern/vfs_vnops.c= :454 #12 0xffffffff82c3b6d3 in ctl_be_block_close (be_lun=3D<optimized out>) at /usr/src/sys/cam/ctl/ctl_backend_block.c:2102 #13 ctl_be_block_rm (softc=3D<optimized out>, req=3D<optimized out>) at /usr/src/sys/cam/ctl/ctl_backend_block.c:2505 #14 ctl_be_block_ioctl (dev=3D<optimized out>, cmd=3D<optimized out>, addr=3D0xfffff80057162400 "block", flag=3D<optimized out>, td=3D<optimized = out>) at /usr/src/sys/cam/ctl/ctl_backend_block.c:1794 #15 0xffffffff82c34ba6 in ctl_ioctl (dev=3D0xfffff804a017f200, cmd=3D<optim= ized out>, addr=3D<optimized out>, flag=3D3, td=3D0xfffff80130a82000) at /usr/src/sys/cam/ctl/ctl.c:2967 #16 0xffffffff80a5810d in devfs_ioctl (ap=3D0xfffffe010d6f9718) at /usr/src/sys/fs/devfs/devfs_vnops.c:807 #17 0xffffffff811ffa6e in VOP_IOCTL_APV (vop=3D<optimized out>, a=3D0xfffffe010d6f9718) at vnode_if.c:1067 #18 0xffffffff80c7f0e4 in VOP_IOCTL (vp=3D<optimized out>, command=3D<optim= ized out>, data=3D<optimized out>, fflag=3D<unavailable>, cred=3D<optimized out>, td=3D<optimized out>) at ./vnode_if.h:448 #19 vn_ioctl (fp=3D0xfffff8001bd82370, com=3D<optimized out>, data=3D0xfffff80057162400, active_cred=3D0xfffff801b448e400, td=3D<unavaila= ble>) at /usr/src/sys/kern/vfs_vnops.c:1490 #20 0xffffffff80a586ef in devfs_ioctl_f (fp=3D<unavailable>, com=3D<unavail= able>, data=3D<unavailable>, cred=3D<unavailable>, td=3D0xfffff80130a82000) at /usr/src/sys/fs/devfs/devfs_vnops.c:765 #21 0xffffffff80c0a36d in fo_ioctl (fp=3D<optimized out>, com=3D<optimized = out>, active_cred=3D<unavailable>, td=3D<optimized out>, data=3D<optimized out>) = at /usr/src/sys/sys/file.h:330 #22 kern_ioctl (td=3D0xfffff80130a82000, fd=3D3, com=3D3244876065, data=3D<unavailable>) at /usr/src/sys/kern/sys_generic.c:800 #23 0xffffffff80c0a08e in sys_ioctl (td=3D0xfffff80130a82000, uap=3D0xfffff80130a823c0) at /usr/src/sys/kern/sys_generic.c:712 #24 0xffffffff81079f92 in syscallenter (td=3D<optimized out>) at /usr/src/sys/amd64/amd64/../../kern/subr_syscall.c:135 #25 amd64_syscall (td=3D0xfffff80130a82000, traced=3D0) at /usr/src/sys/amd64/amd64/trap.c:1154 The problem is that it holds the spa_namespace_lock while waiting for a TXG sync: (kgdb) p spa_namespace_lock $1 =3D {lock_object =3D {lo_name =3D 0xffffffff828037f2 <.L.str.48+1> "spa_namespace_lock", lo_flags =3D 577830912, lo_data =3D 0, lo_witness =3D= 0x0}, sx_lock =3D 18446735282727821316} (kgdb) p/x $1 $2 =3D {lock_object =3D {lo_name =3D 0xffffffff828037f2, lo_flags =3D 0x227= 10000, lo_data =3D 0x0, lo_witness =3D 0x0}, sx_lock =3D 0xfffff80130a82004} (kgdb) p ((struct thread *)0xfffff80130a82000)->td_tid $3 =3D 108285 (kgdb) tid 108285 Now let's take a look what the sync thread is doing: (kgdb) fr 4 #4 0xffffffff8270f575 in txg_wait_synced (dp=3D0xfffff8001b148800, txg=3D3= 8513569) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:659 (kgdb) p dp->dp_tx.tx_sync_thread->td_tid $10 =3D 101227 (kgdb) tid $10 (kgdb) bt #0 sched_switch (td=3D0xfffff8049493c000, newtd=3D0xfffff8000362e580, flags=3D<optimized out>) at /usr/src/sys/kern/sched_ule.c:2112 #1 0xffffffff80bac7c1 in mi_switch (flags=3D<optimized out>, newtd=3D0x0) = at /usr/src/sys/kern/kern_synch.c:439 #2 0xffffffff80bfa9cc in sleepq_wait (wchan=3D<unavailable>, pri=3D<unavai= lable>) at /usr/src/sys/kern/subr_sleepqueue.c:692 #3 0xffffffff80baac3c in _sx_xlock_hard (sx=3D0xffffffff8282e3b8 <spa_namespace_lock>, x=3D<optimized out>, opts=3D<optimized out>) at /usr/src/sys/kern/kern_sx.c:857 #4 0xffffffff827803ab in __sx_xlock (opts=3D0, line=3D0, sx=3D<optimized o= ut>, td=3D<optimized out>, file=3D<optimized out>) at /usr/src/sys/sys/sx.h:168 #5 zvol_rename_minors (oldname=3D0xfffff8024f1da800 "data/kvm/desktop/desktop-master-m40-lp@desktop3", newname=3D0xfffff80249a2= a400 "data/kvm/desktop/desktop-master-m40-lp@desktop1") at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:3045 #6 0xffffffff826d32bf in dsl_dataset_rename_snapshot_sync_impl (dp=3D0xfffff8001b148800, hds=3D0xfffff80108c93000, arg=3D<optimized out>) = at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c:2533 #7 0xffffffff826cf4d4 in dsl_dataset_rename_snapshot_sync (arg=3D0xfffffe010d7c6578, tx=3D<optimized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c:2557 #8 0xffffffff826d964e in dsl_sync_task_sync (dst=3D0xfffffe010d7c64a8, tx=3D0xfffff8068cdac300) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c:234 #9 0xffffffff826d7ffb in dsl_pool_sync (dp=3D<optimized out>, txg=3D<optim= ized out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c= :819 #10 0xffffffff82701675 in spa_sync (spa=3D<optimized out>, txg=3D<optimized= out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c:8088 #11 0xffffffff8270efc8 in txg_sync_thread (arg=3D0xfffff8001b148800) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:543 #12 0xffffffff80b61e23 in fork_exit (callout=3D0xffffffff8270edc0 <txg_sync_thread>, arg=3D0xfffff8001b148800, frame=3D0xfffffe00a6d5cac0) at /usr/src/sys/kern/kern_fork.c:1057 #13 <signal handler called> So, the sync thread is blocked on the spa_namespace_lock as suspected. --=20 You are receiving this mail because: You are the assignee for the bug.=
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bug-229958-3630-L9kfgJa6wb>