Date: Mon, 20 Jan 2014 16:30:34 +0100 From: krichy@cflinux.hu To: Richard Kojedzinszky <krichy@cflinux.hu> Cc: freebsd-fs@freebsd.org, freebsd-security@freebsd.org Subject: Re: ZFS .zfs DoS Message-ID: <e487c315b3f98e0757716896bed69895@cflinux.hu> In-Reply-To: <alpine.BSF.2.00.1401170304070.83798@pi.nmdps.net> References: <alpine.BSF.2.00.1401170304070.83798@pi.nmdps.net>
next in thread | previous in thread | raw e-mail | index | archive | help
[-- Attachment #1 --]
Dear users,
I've worked out a patch for my known issues, please somebody test them,
and give recommendations, fixes.
Regards,
2014-01-17 03:11 időpontban Richard Kojedzinszky ezt írta:
> Dear users,
>
> For a long time now I've been investigating problems relating FreeBSD
> ZFS .zfs handling, and found that I am not enough to fix issues. Until
> fixes arrive, unfortunately a regular user can DoS a FreeBSD system
> which has ZFS filesystems with the attached script. While the script
> expects a snapshot argument to be given, actually the first test case
> does not need that, only a mounted zfs filesystem is enough. For more
> of the tests a snapshot may be needed, and later ones need root
> account also.
>
> I would recommend that until this gets rewritten or fixed at all, one
> should disable access to .zfs at all with someting like I've attached.
>
> Regards,
> Kojedzinszky Richard
[-- Attachment #2 --]
commit f56d6596b79c9ba76851ee6bea225f22cc9f0a26
Author: Richard Kojedzinszky <krichy@cflinux.hu>
Date: Fri Jan 17 22:57:33 2014 +0100
ZFS/GFS handling fixes
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
index 94383d6..4cac053 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c
@@ -81,6 +81,8 @@ traverse(vnode_t **cvpp, int lktype)
* progress on this vnode.
*/
+ vn_lock(cvp, lktype);
+
for (;;) {
/*
* Reached the end of the mount chain?
@@ -89,13 +91,7 @@ traverse(vnode_t **cvpp, int lktype)
if (vfsp == NULL)
break;
error = vfs_busy(vfsp, 0);
- /*
- * tvp is NULL for *cvpp vnode, which we can't unlock.
- */
- if (tvp != NULL)
- vput(cvp);
- else
- vrele(cvp);
+ VOP_UNLOCK(cvp, 0);
if (error)
return (error);
@@ -107,6 +103,9 @@ traverse(vnode_t **cvpp, int lktype)
vfs_unbusy(vfsp);
if (error != 0)
return (error);
+
+ VN_RELE(cvp);
+
cvp = tvp;
}
diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
index a2532f8..c302a54 100644
--- a/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
+++ b/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c
@@ -194,10 +194,8 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
VI_LOCK(vp);
vp->v_iflag &= ~VI_MOUNT;
VI_UNLOCK(vp);
- vrele(vp);
vfs_unbusy(mp);
vfs_mount_destroy(mp);
- *vpp = NULL;
return (error);
}
@@ -228,7 +226,7 @@ mount_snapshot(kthread_t *td, vnode_t **vpp, const char *fstype, char *fspath,
vfs_event_signal(NULL, VQ_MOUNT, 0);
if (VFS_ROOT(mp, LK_EXCLUSIVE, &mvp))
panic("mount: lost mount");
- vput(vp);
+ VOP_UNLOCK(vp, 0);
vfs_unbusy(mp);
*vpp = mvp;
return (0);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
index 59944a1..29ec454 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c
@@ -448,7 +448,6 @@ gfs_lookup_dot(vnode_t **vpp, vnode_t *dvp, vnode_t *pvp, const char *nm)
VN_HOLD(pvp);
*vpp = pvp;
}
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
return (0);
}
@@ -485,6 +484,7 @@ gfs_file_create(size_t size, vnode_t *pvp, vfs_t *vfsp, vnodeops_t *ops)
fp = kmem_zalloc(size, KM_SLEEP);
error = getnewvnode("zfs", vfsp, ops, &vp);
ASSERT(error == 0);
+ VN_LOCK_ASHARE(vp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vp->v_data = (caddr_t)fp;
@@ -496,9 +496,9 @@ gfs_file_create(size_t size, vnode_t *pvp, vfs_t *vfsp, vnodeops_t *ops)
fp->gfs_size = size;
fp->gfs_type = GFS_FILE;
- vp->v_vflag |= VV_FORCEINSMQ;
+ vp->v_vflag |= VV_FORCEINSMQ | VV_INSMQHEAD;
error = insmntque(vp, vfsp);
- vp->v_vflag &= ~VV_FORCEINSMQ;
+ vp->v_vflag &= ~(VV_FORCEINSMQ | VV_INSMQHEAD);
KASSERT(error == 0, ("insmntque() failed: error %d", error));
/*
@@ -637,12 +637,7 @@ gfs_file_inactive(vnode_t *vp)
if (fp->gfs_parent == NULL || (vp->v_flag & V_XATTRDIR))
goto found;
- /*
- * XXX cope with a FreeBSD-specific race wherein the parent's
- * snapshot data can be freed before the parent is
- */
- if ((dp = fp->gfs_parent->v_data) == NULL)
- return (NULL);
+ dp = fp->gfs_parent->v_data;
/*
* First, see if this vnode is cached in the parent.
@@ -669,37 +664,44 @@ found:
if (vp->v_flag & V_XATTRDIR)
VI_LOCK(fp->gfs_parent);
#endif
- VI_LOCK(vp);
- /*
- * Really remove this vnode
- */
- data = vp->v_data;
- if (ge != NULL) {
+ if (vp->v_count == 0 || vp->v_iflag & VI_DOOMED) {
/*
- * If this was a statically cached entry, simply set the
- * cached vnode to NULL.
+ * Really remove this vnode
*/
- ge->gfse_vnode = NULL;
- }
- VI_UNLOCK(vp);
+ data = vp->v_data;
+ if (ge != NULL) {
+ /*
+ * If this was a statically cached entry, simply set the
+ * cached vnode to NULL.
+ */
+ ge->gfse_vnode = NULL;
+ }
+#ifdef TODO
+ if (vp->v_flag & V_XATTRDIR)
+ VI_UNLOCK(fp->gfs_parent);
+#endif
- /*
- * Free vnode and release parent
- */
- if (fp->gfs_parent) {
- if (dp)
- gfs_dir_unlock(dp);
- VOP_UNLOCK(vp, 0);
- VN_RELE(fp->gfs_parent);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ /*
+ * Free vnode and release parent
+ */
+ if (fp->gfs_parent) {
+ if (dp)
+ gfs_dir_unlock(dp);
+ VN_RELE(fp->gfs_parent);
+ } else {
+ ASSERT(vp->v_vfsp != NULL);
+ VFS_RELE(vp->v_vfsp);
+ }
} else {
- ASSERT(vp->v_vfsp != NULL);
- VFS_RELE(vp->v_vfsp);
- }
+ data = NULL;
#ifdef TODO
- if (vp->v_flag & V_XATTRDIR)
- VI_UNLOCK(fp->gfs_parent);
+ if (vp->v_flag & V_XATTRDIR)
+ VI_UNLOCK(fp->gfs_parent);
#endif
+ if (dp)
+ gfs_dir_unlock(dp);
+ }
+
return (data);
}
@@ -1230,16 +1232,15 @@ gfs_vop_inactive(ap)
{
vnode_t *vp = ap->a_vp;
gfs_file_t *fp = vp->v_data;
+ void *data;
if (fp->gfs_type == GFS_DIR)
- gfs_dir_inactive(vp);
+ data = gfs_dir_inactive(vp);
else
- gfs_file_inactive(vp);
+ data = gfs_file_inactive(vp);
- VI_LOCK(vp);
- vp->v_data = NULL;
- VI_UNLOCK(vp);
- kmem_free(fp, fp->gfs_size);
+ if (data != NULL)
+ kmem_free(data, fp->gfs_size);
return (0);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
index 28ab1fa..15a55d2 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c
@@ -612,7 +612,7 @@ zfsctl_freebsd_root_lookup(ap)
err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);
if (err == 0 && (nm[0] != '.' || nm[1] != '\0'))
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ err = vn_lock(*vpp, ap->a_cnp->cn_lkflags);
return (err);
}
@@ -975,8 +975,11 @@ zfsctl_snapdir_lookup(ap)
ZFS_ENTER(zfsvfs);
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
+ err = 0;
+ if (nm[0] != '.' || nm[1] != '\0')
+ err = vn_lock(*vpp, ap->a_cnp->cn_lkflags);
ZFS_EXIT(zfsvfs);
- return (0);
+ return (err);
}
if (flags & FIGNORECASE) {
@@ -1004,7 +1007,7 @@ zfsctl_snapdir_lookup(ap)
if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {
*vpp = sep->se_root;
VN_HOLD(*vpp);
- err = traverse(vpp, LK_EXCLUSIVE | LK_RETRY);
+ err = traverse(vpp, ap->a_cnp->cn_lkflags);
if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
@@ -1013,6 +1016,8 @@ zfsctl_snapdir_lookup(ap)
* The snapshot was unmounted behind our backs,
* try to remount it.
*/
+ VN_HOLD(*vpp);
+ VOP_UNLOCK(*vpp, 0);
VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0);
goto domount;
} else {
@@ -1064,7 +1069,6 @@ zfsctl_snapdir_lookup(ap)
sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
(void) strcpy(sep->se_name, nm);
*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
- VN_HOLD(*vpp);
avl_insert(&sdp->sd_snaps, sep, where);
dmu_objset_rele(snap, FTAG);
@@ -1091,7 +1095,6 @@ domount:
mutex_exit(&sdp->sd_lock);
ZFS_EXIT(zfsvfs);
-#ifdef illumos
/*
* If we had an error, drop our hold on the vnode and
* zfsctl_snapshot_inactive() will clean up.
@@ -1100,10 +1103,6 @@ domount:
VN_RELE(*vpp);
*vpp = NULL;
}
-#else
- if (err != 0)
- *vpp = NULL;
-#endif
return (err);
}
@@ -1130,8 +1129,11 @@ zfsctl_shares_lookup(ap)
strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
+ error = 0;
+ if (nm[0] != '.' || nm[1] != '\0')
+ error = vn_lock(*vpp, ap->a_cnp->cn_lkflags);
ZFS_EXIT(zfsvfs);
- return (0);
+ return (error);
}
if (zfsvfs->z_shares_dir == 0) {
@@ -1344,22 +1346,15 @@ zfsctl_snapdir_inactive(ap)
vnode_t *vp = ap->a_vp;
zfsctl_snapdir_t *sdp = vp->v_data;
zfs_snapentry_t *sep;
-
- /*
- * On forced unmount we have to free snapshots from here.
- */
- mutex_enter(&sdp->sd_lock);
- while ((sep = avl_first(&sdp->sd_snaps)) != NULL) {
- avl_remove(&sdp->sd_snaps, sep);
- kmem_free(sep->se_name, strlen(sep->se_name) + 1);
- kmem_free(sep, sizeof (zfs_snapentry_t));
+ void *private;
+
+ private = gfs_dir_inactive(vp);
+ if (private != NULL) {
+ ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
+ mutex_destroy(&sdp->sd_lock);
+ avl_destroy(&sdp->sd_snaps);
+ kmem_free(private, sizeof (zfsctl_snapdir_t));
}
- mutex_exit(&sdp->sd_lock);
- gfs_dir_inactive(vp);
- ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
- mutex_destroy(&sdp->sd_lock);
- avl_destroy(&sdp->sd_snaps);
- kmem_free(sdp, sizeof (zfsctl_snapdir_t));
return (0);
}
@@ -1441,7 +1436,6 @@ zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
&zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
- VN_HOLD(vp);
zcp = vp->v_data;
zcp->zc_id = objset;
VOP_UNLOCK(vp, 0);
@@ -1462,18 +1456,25 @@ zfsctl_snapshot_inactive(ap)
zfsctl_snapdir_t *sdp;
zfs_snapentry_t *sep, *next;
int locked;
- vnode_t *dvp;
+ gfs_dir_t *dp = vp->v_data;
+ vnode_t *dvp = dp->gfsd_file.gfs_parent;
- if (vp->v_count > 0)
- goto end;
-
- VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);
+ VN_HOLD(dvp);
+ VOP_UNLOCK(vp, 0);
sdp = dvp->v_data;
- VOP_UNLOCK(dvp, 0);
if (!(locked = MUTEX_HELD(&sdp->sd_lock)))
mutex_enter(&sdp->sd_lock);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+
+ if (vp->v_count > 0) {
+ if (!locked)
+ mutex_exit(&sdp->sd_lock);
+ VN_RELE(dvp);
+ return(0);
+ }
+
ASSERT(!vn_ismntpt(vp));
sep = avl_first(&sdp->sd_snaps);
@@ -1494,7 +1495,6 @@ zfsctl_snapshot_inactive(ap)
mutex_exit(&sdp->sd_lock);
VN_RELE(dvp);
-end:
/*
* Dispose of the vnode for the snapshot mount point.
* This is safe to do because once this entry has been removed
@@ -1588,7 +1588,7 @@ zfsctl_snapshot_lookup(ap)
error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", vpp,
NULL, 0, NULL, cr, NULL, NULL, NULL);
if (error == 0)
- vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY);
+ error = vn_lock(*vpp, ap->a_cnp->cn_lkflags);
return (error);
}
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
index 8eb8953..8ea7661 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c
@@ -2036,12 +2036,7 @@ zfs_umount(vfs_t *vfsp, int fflag)
*/
if (zfsvfs->z_ctldir != NULL)
zfsctl_destroy(zfsvfs);
- if (zfsvfs->z_issnap) {
- vnode_t *svp = vfsp->mnt_vnodecovered;
- if (svp->v_count >= 2)
- VN_RELE(svp);
- }
zfs_freevfs(vfsp);
return (0);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 91c64a3..29b6395 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1198,7 +1198,10 @@ insmntque1(struct vnode *vp, struct mount *mp,
}
vp->v_mount = mp;
MNT_REF(mp);
- TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
+ if (vp->v_vflag & VV_INSMQHEAD)
+ TAILQ_INSERT_HEAD(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
+ else
+ TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
("neg mount point vnode list size"));
mp->mnt_nvnodelistsize++;
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index b0cbcc0..8dfae3a 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -253,6 +253,7 @@ struct xvnode {
#define VV_DELETED 0x0400 /* should be removed */
#define VV_MD 0x0800 /* vnode backs the md device */
#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */
+#define VV_INSMQHEAD 0x2000 /* insert instead of appending to mnt_nvnodelist */
/*
* Vnode attributes. A field value of VNOVAL represents a field whose value
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?e487c315b3f98e0757716896bed69895>
