Date: Mon, 16 Dec 2013 11:08:11 +0100 (CET) From: krichy@tvnetwork.hu To: freebsd-fs@freebsd.org Subject: kern/184677 Message-ID: <alpine.DEB.2.10.1312161059150.7004@krichy.tvnetwork.hu>
next in thread | raw e-mail | index | archive | help
[-- Attachment #1 --] Dear devs, I've attached a patch, which makes the recursive lockmgr disappear, and makes the reported bug to disappear. I dont know if I followed any guidelines well, or not, but at least it works for me. Please some ZFS/FreeBSD fs expert review it, and fix it where it needed. But unfortunately, my original problem is still not solved, maybe the same as Ryan's: http://lists.freebsd.org/pipermail/freebsd-fs/2013-December/018707.html Tracing the problem down is that zfsctl_snapdir_lookup() tries to acquire spa_namespace_lock while when finishing a zfs send -R does a zfsdev_close(), and that also holds the same mutex. And this causes a deadlock scenario. I looked at illumos's code, and for some reason they use another mutex on zfsdev_close(), which therefore may not deadlock with zfsctl_snapdir_lookup(). But I am still investigating the problem. I would like to help making ZFS more stable on freebsd also with its whole functionality. I would be very thankful if some expert would give some advice, how to solve these bugs. PJD, Steven, Xin? Thanks in advance, Kojedzinszky Richard Euronet Magyarorszag Informatikai Zrt. [-- Attachment #2 --] commit 7965e01fca79187d815b8a86570f50547d00e531 Author: Richard Kojedzinszky <krichy@cflinux.hu> Date: Mon Dec 16 09:59:57 2013 +0100 ZFS lock ordering fix diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c index 94383d6..225521a 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c @@ -81,6 +81,8 @@ traverse(vnode_t **cvpp, int lktype) * progress on this vnode. */ + vn_lock(cvp, lktype); + for (;;) { /* * Reached the end of the mount chain? @@ -89,13 +91,7 @@ traverse(vnode_t **cvpp, int lktype) if (vfsp == NULL) break; error = vfs_busy(vfsp, 0); - /* - * tvp is NULL for *cvpp vnode, which we can't unlock. - */ - if (tvp != NULL) - vput(cvp); - else - vrele(cvp); + vput(cvp); if (error) return (error); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c index 59944a1..ce43fff 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c @@ -448,7 +448,7 @@ gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm) VN_HOLD(pvp); *vpp = pvp; } - vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); return (0); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c index 28ab1fa..9a05976 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c @@ -112,6 +112,30 @@ snapentry_compare(const void *a, const void *b) return (0); } +static void +snapdir_entry_remove_free(zfsctl_snapdir_t *sdp, zfs_snapentry_t *sep) +{ + avl_remove(&sdp->sd_snaps, sep); + kmem_free(sep->se_name, strlen(sep->se_name) + 1); + kmem_free(sep, sizeof (zfs_snapentry_t)); +} + +static zfsctl_snapdir_t* +snapshot_get_snapdir(vnode_t *vp, vnode_t **dvpp) +{ + gfs_dir_t *dp = vp->v_data; + *dvpp = dp->gfsd_file.gfs_parent; + zfsctl_snapdir_t *sdp; + + VN_HOLD(*dvpp); + VOP_UNLOCK(vp, 0); + vn_lock(*dvpp, LK_SHARED | LK_RETRY | LK_CANRECURSE); + sdp = (*dvpp)->v_data; + VOP_UNLOCK(*dvpp, 0); + + return (sdp); +} + #ifdef sun vnodeops_t *zfsctl_ops_root; vnodeops_t *zfsctl_ops_snapdir; @@ -1012,7 +1036,13 @@ zfsctl_snapdir_lookup(ap) /* * The snapshot was unmounted behind our backs, * try to remount it. + * Concurrent zfsctl_snapshot_inactive() would remove our entry + * so do this ourselves, and make a fresh new mount. */ + snapdir_entry_remove_free(sdp, sep); + vput(*vpp); + /* find new place for sep entry */ + avl_find(&sdp->sd_snaps, &search, &where); VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0); goto domount; } else { @@ -1028,6 +1058,7 @@ zfsctl_snapdir_lookup(ap) return (err); } +domount: /* * The requested snapshot is not currently mounted, look it up. */ @@ -1068,7 +1099,6 @@ zfsctl_snapdir_lookup(ap) avl_insert(&sdp->sd_snaps, sep, where); dmu_objset_rele(snap, FTAG); -domount: mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) + strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(nm) + 1; mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP); @@ -1350,9 +1380,7 @@ zfsctl_snapdir_inactive(ap) */ mutex_enter(&sdp->sd_lock); while ((sep = avl_first(&sdp->sd_snaps)) != NULL) { - avl_remove(&sdp->sd_snaps, sep); - kmem_free(sep->se_name, strlen(sep->se_name) + 1); - kmem_free(sep, sizeof (zfs_snapentry_t)); + snapdir_entry_remove_free(sdp, sep); } mutex_exit(&sdp->sd_lock); gfs_dir_inactive(vp); @@ -1463,17 +1491,19 @@ zfsctl_snapshot_inactive(ap) zfs_snapentry_t *sep, *next; int locked; vnode_t *dvp; + gfs_dir_t *dp; - if (vp->v_count > 0) - goto end; - - VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0); - sdp = dvp->v_data; - VOP_UNLOCK(dvp, 0); + /* This is for accessing the real parent directly, without a possible deadlock + * with zfsctl_snapdir_lookup(). The release of lock on vp and lock on dvp provides + * the same lock order as in zfsctl_snapshot_lookup(). + */ + sdp = snapshot_get_snapdir(vp, &dvp); if (!(locked = MUTEX_HELD(&sdp->sd_lock))) mutex_enter(&sdp->sd_lock); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + ASSERT(!vn_ismntpt(vp)); sep = avl_first(&sdp->sd_snaps); @@ -1481,9 +1511,7 @@ zfsctl_snapshot_inactive(ap) next = AVL_NEXT(&sdp->sd_snaps, sep); if (sep->se_root == vp) { - avl_remove(&sdp->sd_snaps, sep); - kmem_free(sep->se_name, strlen(sep->se_name) + 1); - kmem_free(sep, sizeof (zfs_snapentry_t)); + snapdir_entry_remove_free(sdp, sep); break; } sep = next; @@ -1494,7 +1522,6 @@ zfsctl_snapshot_inactive(ap) mutex_exit(&sdp->sd_lock); VN_RELE(dvp); -end: /* * Dispose of the vnode for the snapshot mount point. * This is safe to do because once this entry has been removed @@ -1595,20 +1622,18 @@ zfsctl_snapshot_lookup(ap) static int zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap) { - zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data; - vnode_t *dvp, *vp; + vnode_t *vp = ap->a_vp; + vnode_t *dvp; zfsctl_snapdir_t *sdp; zfs_snapentry_t *sep; int error; - ASSERT(zfsvfs->z_ctldir != NULL); - error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp, - NULL, 0, NULL, kcred, NULL, NULL, NULL); - if (error != 0) - return (error); - sdp = dvp->v_data; + sdp = snapshot_get_snapdir(vp, &dvp); mutex_enter(&sdp->sd_lock); + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + sep = avl_first(&sdp->sd_snaps); while (sep != NULL) { vp = sep->se_root;
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?alpine.DEB.2.10.1312161059150.7004>
