Skip site navigation (1)Skip section navigation (2)
Date:      Thu, 06 Dec 2018 08:29:51 +0000
From:      bugzilla-noreply@freebsd.org
To:        fs@FreeBSD.org
Subject:   [Bug 229958] ctld / zfs deadlock with 'zfs rename ...'
Message-ID:  <bug-229958-3630-L9kfgJa6wb@https.bugs.freebsd.org/bugzilla/>
In-Reply-To: <bug-229958-3630@https.bugs.freebsd.org/bugzilla/>
References:  <bug-229958-3630@https.bugs.freebsd.org/bugzilla/>

next in thread | previous in thread | raw e-mail | index | archive | help
https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=3D229958

--- Comment #4 from Andriy Gapon <avg@FreeBSD.org> ---
(In reply to emz from comment #1)
I think that the problem is with this thread:
(kgdb) tid 108285
(kgdb) bt
#0  sched_switch (td=3D0xfffff80130a82000, newtd=3D0xfffff8048008c000,
flags=3D<optimized out>) at /usr/src/sys/kern/sched_ule.c:2112
#1  0xffffffff80bac7c1 in mi_switch (flags=3D<optimized out>, newtd=3D0x0) =
at
/usr/src/sys/kern/kern_synch.c:439
#2  0xffffffff80bfa9cc in sleepq_wait (wchan=3D<unavailable>, pri=3D<unavai=
lable>)
at /usr/src/sys/kern/subr_sleepqueue.c:692
#3  0xffffffff80b3b6b2 in _cv_wait (cvp=3D0xfffff8001b148a98,
lock=3D0xfffff8001b148a28) at /usr/src/sys/kern/kern_condvar.c:146
#4  0xffffffff8270f575 in txg_wait_synced (dp=3D0xfffff8001b148800, txg=3D3=
8513569)
at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:659
#5  0xffffffff827448ab in zil_close (zilog=3D0xfffff8036f227400) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c:2975
#6  0xffffffff8277f4b5 in zvol_last_close (zv=3D0xfffff80535721800) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:893
#7  0xffffffff8278089e in zvol_d_close (dev=3D<optimized out>, flags=3D1310=
75,
fmt=3D<optimized out>, td=3D<optimized out>) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:3138
#8  0xffffffff80a57f0a in devfs_close (ap=3D<optimized out>) at
/usr/src/sys/fs/devfs/devfs_vnops.c:650
#9  0xffffffff811fefc8 in VOP_CLOSE_APV (vop=3D<optimized out>,
a=3D0xfffffe010d6f9518) at vnode_if.c:534
#10 0xffffffff80c8068f in VOP_CLOSE (vp=3D<optimized out>, fflag=3D<optimiz=
ed out>,
cred=3D<optimized out>, td=3D<optimized out>) at ./vnode_if.h:225
#11 vn_close1 (vp=3D0xfffff803ba0705a0, flags=3D3, file_cred=3D0x0,
td=3D0xfffff80130a82000, keep_ref=3Dfalse) at /usr/src/sys/kern/vfs_vnops.c=
:454
#12 0xffffffff82c3b6d3 in ctl_be_block_close (be_lun=3D<optimized out>) at
/usr/src/sys/cam/ctl/ctl_backend_block.c:2102
#13 ctl_be_block_rm (softc=3D<optimized out>, req=3D<optimized out>) at
/usr/src/sys/cam/ctl/ctl_backend_block.c:2505
#14 ctl_be_block_ioctl (dev=3D<optimized out>, cmd=3D<optimized out>,
addr=3D0xfffff80057162400 "block", flag=3D<optimized out>, td=3D<optimized =
out>) at
/usr/src/sys/cam/ctl/ctl_backend_block.c:1794
#15 0xffffffff82c34ba6 in ctl_ioctl (dev=3D0xfffff804a017f200, cmd=3D<optim=
ized
out>, addr=3D<optimized out>, flag=3D3, td=3D0xfffff80130a82000) at
/usr/src/sys/cam/ctl/ctl.c:2967
#16 0xffffffff80a5810d in devfs_ioctl (ap=3D0xfffffe010d6f9718) at
/usr/src/sys/fs/devfs/devfs_vnops.c:807
#17 0xffffffff811ffa6e in VOP_IOCTL_APV (vop=3D<optimized out>,
a=3D0xfffffe010d6f9718) at vnode_if.c:1067
#18 0xffffffff80c7f0e4 in VOP_IOCTL (vp=3D<optimized out>, command=3D<optim=
ized
out>, data=3D<optimized out>, fflag=3D<unavailable>, cred=3D<optimized out>,
td=3D<optimized out>) at ./vnode_if.h:448
#19 vn_ioctl (fp=3D0xfffff8001bd82370, com=3D<optimized out>,
data=3D0xfffff80057162400, active_cred=3D0xfffff801b448e400, td=3D<unavaila=
ble>) at
/usr/src/sys/kern/vfs_vnops.c:1490
#20 0xffffffff80a586ef in devfs_ioctl_f (fp=3D<unavailable>, com=3D<unavail=
able>,
data=3D<unavailable>, cred=3D<unavailable>, td=3D0xfffff80130a82000) at
/usr/src/sys/fs/devfs/devfs_vnops.c:765
#21 0xffffffff80c0a36d in fo_ioctl (fp=3D<optimized out>, com=3D<optimized =
out>,
active_cred=3D<unavailable>, td=3D<optimized out>, data=3D<optimized out>) =
at
/usr/src/sys/sys/file.h:330
#22 kern_ioctl (td=3D0xfffff80130a82000, fd=3D3, com=3D3244876065,
data=3D<unavailable>) at /usr/src/sys/kern/sys_generic.c:800
#23 0xffffffff80c0a08e in sys_ioctl (td=3D0xfffff80130a82000,
uap=3D0xfffff80130a823c0) at /usr/src/sys/kern/sys_generic.c:712
#24 0xffffffff81079f92 in syscallenter (td=3D<optimized out>) at
/usr/src/sys/amd64/amd64/../../kern/subr_syscall.c:135
#25 amd64_syscall (td=3D0xfffff80130a82000, traced=3D0) at
/usr/src/sys/amd64/amd64/trap.c:1154

The problem is that it holds the spa_namespace_lock while waiting for a TXG
sync:
(kgdb) p spa_namespace_lock
$1 =3D {lock_object =3D {lo_name =3D 0xffffffff828037f2 <.L.str.48+1>
"spa_namespace_lock", lo_flags =3D 577830912, lo_data =3D 0, lo_witness =3D=
 0x0},
sx_lock =3D 18446735282727821316}
(kgdb) p/x $1
$2 =3D {lock_object =3D {lo_name =3D 0xffffffff828037f2, lo_flags =3D 0x227=
10000,
lo_data =3D 0x0, lo_witness =3D 0x0}, sx_lock =3D 0xfffff80130a82004}
(kgdb) p ((struct thread *)0xfffff80130a82000)->td_tid
$3 =3D 108285
(kgdb) tid 108285

Now let's take a look what the sync thread is doing:
(kgdb) fr 4
#4  0xffffffff8270f575 in txg_wait_synced (dp=3D0xfffff8001b148800, txg=3D3=
8513569)
at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:659
(kgdb) p dp->dp_tx.tx_sync_thread->td_tid
$10 =3D 101227
(kgdb) tid $10
(kgdb) bt
#0  sched_switch (td=3D0xfffff8049493c000, newtd=3D0xfffff8000362e580,
flags=3D<optimized out>) at /usr/src/sys/kern/sched_ule.c:2112
#1  0xffffffff80bac7c1 in mi_switch (flags=3D<optimized out>, newtd=3D0x0) =
at
/usr/src/sys/kern/kern_synch.c:439
#2  0xffffffff80bfa9cc in sleepq_wait (wchan=3D<unavailable>, pri=3D<unavai=
lable>)
at /usr/src/sys/kern/subr_sleepqueue.c:692
#3  0xffffffff80baac3c in _sx_xlock_hard (sx=3D0xffffffff8282e3b8
<spa_namespace_lock>, x=3D<optimized out>, opts=3D<optimized out>) at
/usr/src/sys/kern/kern_sx.c:857
#4  0xffffffff827803ab in __sx_xlock (opts=3D0, line=3D0, sx=3D<optimized o=
ut>,
td=3D<optimized out>, file=3D<optimized out>) at /usr/src/sys/sys/sx.h:168
#5  zvol_rename_minors (oldname=3D0xfffff8024f1da800
"data/kvm/desktop/desktop-master-m40-lp@desktop3", newname=3D0xfffff80249a2=
a400
"data/kvm/desktop/desktop-master-m40-lp@desktop1")
    at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c:3045
#6  0xffffffff826d32bf in dsl_dataset_rename_snapshot_sync_impl
(dp=3D0xfffff8001b148800, hds=3D0xfffff80108c93000, arg=3D<optimized out>) =
at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c:2533
#7  0xffffffff826cf4d4 in dsl_dataset_rename_snapshot_sync
(arg=3D0xfffffe010d7c6578, tx=3D<optimized out>) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_dataset.c:2557
#8  0xffffffff826d964e in dsl_sync_task_sync (dst=3D0xfffffe010d7c64a8,
tx=3D0xfffff8068cdac300) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_synctask.c:234
#9  0xffffffff826d7ffb in dsl_pool_sync (dp=3D<optimized out>, txg=3D<optim=
ized
out>) at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dsl_pool.c=
:819
#10 0xffffffff82701675 in spa_sync (spa=3D<optimized out>, txg=3D<optimized=
 out>)
at /usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c:8088
#11 0xffffffff8270efc8 in txg_sync_thread (arg=3D0xfffff8001b148800) at
/usr/src/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c:543
#12 0xffffffff80b61e23 in fork_exit (callout=3D0xffffffff8270edc0
<txg_sync_thread>, arg=3D0xfffff8001b148800, frame=3D0xfffffe00a6d5cac0) at
/usr/src/sys/kern/kern_fork.c:1057
#13 <signal handler called>

So, the sync thread is blocked on the spa_namespace_lock as suspected.

--=20
You are receiving this mail because:
You are the assignee for the bug.=



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?bug-229958-3630-L9kfgJa6wb>