From owner-svn-src-stable@freebsd.org Thu Mar 23 08:14:43 2017 Return-Path: Delivered-To: svn-src-stable@mailman.ysv.freebsd.org Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:1900:2254:206a::19:1]) by mailman.ysv.freebsd.org (Postfix) with ESMTP id A04EFD19AC4; Thu, 23 Mar 2017 08:14:43 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org (repo.freebsd.org [IPv6:2610:1c1:1:6068::e6a:0]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (Client did not present a certificate) by mx1.freebsd.org (Postfix) with ESMTPS id 503691786; Thu, 23 Mar 2017 08:14:43 +0000 (UTC) (envelope-from avg@FreeBSD.org) Received: from repo.freebsd.org ([127.0.1.37]) by repo.freebsd.org (8.15.2/8.15.2) with ESMTP id v2N8EgXY094466; Thu, 23 Mar 2017 08:14:42 GMT (envelope-from avg@FreeBSD.org) Received: (from avg@localhost) by repo.freebsd.org (8.15.2/8.15.2/Submit) id v2N8EfWQ094460; Thu, 23 Mar 2017 08:14:41 GMT (envelope-from avg@FreeBSD.org) Message-Id: <201703230814.v2N8EfWQ094460@repo.freebsd.org> X-Authentication-Warning: repo.freebsd.org: avg set sender to avg@FreeBSD.org using -f From: Andriy Gapon Date: Thu, 23 Mar 2017 08:14:41 +0000 (UTC) To: src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-stable@freebsd.org, svn-src-stable-11@freebsd.org Subject: svn commit: r315842 - in stable/11/sys: cddl/compat/opensolaris/kern cddl/compat/opensolaris/sys cddl/contrib/opensolaris/uts/common/fs cddl/contrib/opensolaris/uts/common/fs/zfs cddl/contrib/opens... X-SVN-Group: stable-11 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit X-BeenThere: svn-src-stable@freebsd.org X-Mailman-Version: 2.1.23 Precedence: list List-Id: SVN commit messages for all the -stable branches of the src tree List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Thu, 23 Mar 2017 08:14:43 -0000 Author: avg Date: Thu Mar 23 08:14:41 2017 New Revision: 315842 URL: https://svnweb.freebsd.org/changeset/base/315842 Log: MFC r314048,r314194: reimplement zfsctl (.zfs) support Deleted: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/gfs.c stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/gfs.h Modified: stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c stable/11/sys/cddl/compat/opensolaris/sys/pathname.h stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vfsops.c stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_vnops.c stable/11/sys/conf/files stable/11/sys/modules/zfs/Makefile Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c ============================================================================== --- stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c Thu Mar 23 08:10:42 2017 (r315841) +++ stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_lookup.c Thu Mar 23 08:14:41 2017 (r315842) @@ -62,55 +62,3 @@ lookupnameat(char *dirname, enum uio_seg vn_lock(startvp, ltype | LK_RETRY); return (error); } - -int -traverse(vnode_t **cvpp, int lktype) -{ - vnode_t *cvp; - vnode_t *tvp; - vfs_t *vfsp; - int error; - - cvp = *cvpp; - tvp = NULL; - - /* - * If this vnode is mounted on, then we transparently indirect - * to the vnode which is the root of the mounted file system. - * Before we do this we must check that an unmount is not in - * progress on this vnode. - */ - - for (;;) { - /* - * Reached the end of the mount chain? - */ - vfsp = vn_mountedvfs(cvp); - if (vfsp == NULL) - break; - error = vfs_busy(vfsp, 0); - - /* - * tvp is NULL for *cvpp vnode, which we can't unlock. - */ - if (tvp != NULL) - vput(cvp); - else - vrele(cvp); - if (error) - return (error); - - /* - * The read lock must be held across the call to VFS_ROOT() to - * prevent a concurrent unmount from destroying the vfs. - */ - error = VFS_ROOT(vfsp, lktype, &tvp); - vfs_unbusy(vfsp); - if (error != 0) - return (error); - cvp = tvp; - } - - *cvpp = cvp; - return (0); -} Modified: stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c ============================================================================== --- stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c Thu Mar 23 08:10:42 2017 (r315841) +++ stable/11/sys/cddl/compat/opensolaris/kern/opensolaris_vfs.c Thu Mar 23 08:14:41 2017 (r315842) @@ -196,10 +196,17 @@ mount_snapshot(kthread_t *td, vnode_t ** td->td_ucred = cr; if (error != 0) { + /* + * Clear VI_MOUNT and decrement the use count "atomically", + * under the vnode lock. This is not strictly required, + * but makes it easier to reason about the life-cycle and + * ownership of the covered vnode. + */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VI_LOCK(vp); vp->v_iflag &= ~VI_MOUNT; VI_UNLOCK(vp); - vrele(vp); + vput(vp); vfs_unbusy(mp); vfs_freeopts(mp->mnt_optnew); vfs_mount_destroy(mp); Modified: stable/11/sys/cddl/compat/opensolaris/sys/pathname.h ============================================================================== --- stable/11/sys/cddl/compat/opensolaris/sys/pathname.h Thu Mar 23 08:10:42 2017 (r315841) +++ stable/11/sys/cddl/compat/opensolaris/sys/pathname.h Thu Mar 23 08:14:41 2017 (r315842) @@ -34,20 +34,9 @@ #include #include -typedef struct pathname { - char *pn_buf; /* underlying storage */ - char *pn_path; /* remaining pathname */ - size_t pn_pathlen; /* remaining length */ - size_t pn_bufsize; /* total size of pn_buf */ -} pathname_t; - -#define pn_alloc(pnp) panic("pn_alloc() called") -#define pn_free(pnp) panic("pn_free() called") - int lookupname(char *, enum uio_seg, enum symfollow, vnode_t **, vnode_t **); int lookupnameat(char *, enum uio_seg, enum symfollow, vnode_t **, vnode_t **, vnode_t *); -int traverse(vnode_t **, int); #endif /* _KERNEL */ Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h Thu Mar 23 08:10:42 2017 (r315841) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_ctldir.h Thu Mar 23 08:14:41 2017 (r315842) @@ -44,7 +44,7 @@ extern "C" { void zfsctl_create(zfsvfs_t *); void zfsctl_destroy(zfsvfs_t *); -vnode_t *zfsctl_root(znode_t *); +int zfsctl_root(zfsvfs_t *, int, vnode_t **); void zfsctl_init(void); void zfsctl_fini(void); boolean_t zfsctl_is_node(vnode_t *); @@ -53,10 +53,6 @@ int zfsctl_rename_snapshot(const char *f int zfsctl_destroy_snapshot(const char *snapname, int force); int zfsctl_umount_snapshots(vfs_t *, int, cred_t *); -int zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, - int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, - int *direntflags, pathname_t *realpnp); - int zfsctl_lookup_objset(vfs_t *vfsp, uint64_t objsetid, zfsvfs_t **zfsvfsp); #define ZFSCTL_INO_ROOT 0x1 Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Thu Mar 23 08:10:42 2017 (r315841) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_vfsops.h Thu Mar 23 08:14:41 2017 (r315842) @@ -68,7 +68,7 @@ struct zfsvfs { krwlock_t z_teardown_inactive_lock; list_t z_all_znodes; /* all vnodes in the fs */ kmutex_t z_znodes_lock; /* lock for z_all_znodes */ - vnode_t *z_ctldir; /* .zfs directory pointer */ + struct zfsctl_root *z_ctldir; /* .zfs directory pointer */ boolean_t z_show_ctldir; /* expose .zfs in the root dir */ boolean_t z_issnap; /* true if this is a snapshot */ boolean_t z_vscan; /* virus scan on/off */ Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c ============================================================================== --- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c Thu Mar 23 08:10:42 2017 (r315841) +++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c Thu Mar 23 08:14:41 2017 (r315842) @@ -70,136 +70,249 @@ #include #include #include -#include #include #include +#include #include #include #include -#include +#include #include "zfs_namecheck.h" -typedef struct zfsctl_node { - gfs_dir_t zc_gfs_private; - uint64_t zc_id; - timestruc_t zc_cmtime; /* ctime and mtime, always the same */ -} zfsctl_node_t; - -typedef struct zfsctl_snapdir { - zfsctl_node_t sd_node; - kmutex_t sd_lock; - avl_tree_t sd_snaps; -} zfsctl_snapdir_t; +/* + * "Synthetic" filesystem implementation. + */ -typedef struct { - char *se_name; - vnode_t *se_root; - avl_node_t se_node; -} zfs_snapentry_t; - -static int -snapentry_compare(const void *a, const void *b) -{ - const zfs_snapentry_t *sa = a; - const zfs_snapentry_t *sb = b; - int ret = strcmp(sa->se_name, sb->se_name); - - if (ret < 0) - return (-1); - else if (ret > 0) - return (1); - else - return (0); -} - -#ifdef illumos -vnodeops_t *zfsctl_ops_root; -vnodeops_t *zfsctl_ops_snapdir; -vnodeops_t *zfsctl_ops_snapshot; -vnodeops_t *zfsctl_ops_shares; -vnodeops_t *zfsctl_ops_shares_dir; - -static const fs_operation_def_t zfsctl_tops_root[]; -static const fs_operation_def_t zfsctl_tops_snapdir[]; -static const fs_operation_def_t zfsctl_tops_snapshot[]; -static const fs_operation_def_t zfsctl_tops_shares[]; -#else -static struct vop_vector zfsctl_ops_root; -static struct vop_vector zfsctl_ops_snapdir; -static struct vop_vector zfsctl_ops_snapshot; -static struct vop_vector zfsctl_ops_shares; -static struct vop_vector zfsctl_ops_shares_dir; -#endif +/* + * Assert that A implies B. + */ +#define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) || (B), (msg)); -static vnode_t *zfsctl_mknode_snapdir(vnode_t *); -static vnode_t *zfsctl_mknode_shares(vnode_t *); -static vnode_t *zfsctl_snapshot_mknode(vnode_t *, uint64_t objset); -static int zfsctl_unmount_snap(zfs_snapentry_t *, int, cred_t *); - -#ifdef illumos -static gfs_opsvec_t zfsctl_opsvec[] = { - { ".zfs", zfsctl_tops_root, &zfsctl_ops_root }, - { ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir }, - { ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot }, - { ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir }, - { ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares }, - { NULL } -}; -#endif +static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes"); + +typedef struct sfs_node { + char sn_name[ZFS_MAX_DATASET_NAME_LEN]; + uint64_t sn_parent_id; + uint64_t sn_id; +} sfs_node_t; /* - * Root directory elements. We only have two entries - * snapshot and shares. + * Check the parent's ID as well as the node's to account for a chance + * that IDs originating from different domains (snapshot IDs, artifical + * IDs, znode IDs) may clash. */ -static gfs_dirent_t zfsctl_root_entries[] = { - { "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE }, - { "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE }, - { NULL } -}; +static int +sfs_compare_ids(struct vnode *vp, void *arg) +{ + sfs_node_t *n1 = vp->v_data; + sfs_node_t *n2 = arg; + bool equal; + + equal = n1->sn_id == n2->sn_id && + n1->sn_parent_id == n2->sn_parent_id; + + /* Zero means equality. */ + return (!equal); +} + +static int +sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id, + uint64_t id, struct vnode **vpp) +{ + sfs_node_t search; + int err; + + search.sn_id = id; + search.sn_parent_id = parent_id; + err = vfs_hash_get(mp, (u_int)id, flags, curthread, vpp, + sfs_compare_ids, &search); + return (err); +} + +static int +sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id, + uint64_t id, struct vnode **vpp) +{ + int err; + + KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data")); + err = vfs_hash_insert(vp, (u_int)id, flags, curthread, vpp, + sfs_compare_ids, vp->v_data); + return (err); +} + +static void +sfs_vnode_remove(struct vnode *vp) +{ + vfs_hash_remove(vp); +} + +typedef void sfs_vnode_setup_fn(vnode_t *vp, void *arg); + +static int +sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id, + const char *tag, struct vop_vector *vops, + sfs_vnode_setup_fn setup, void *arg, + struct vnode **vpp) +{ + struct vnode *vp; + int error; + + error = sfs_vnode_get(mp, flags, parent_id, id, vpp); + if (error != 0 || *vpp != NULL) { + KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, + "sfs vnode with no data"); + return (error); + } + + /* Allocate a new vnode/inode. */ + error = getnewvnode(tag, mp, vops, &vp); + if (error != 0) { + *vpp = NULL; + return (error); + } + + /* + * Exclusively lock the vnode vnode while it's being constructed. + */ + lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); + error = insmntque(vp, mp); + if (error != 0) { + *vpp = NULL; + return (error); + } + + setup(vp, arg); + + error = sfs_vnode_insert(vp, flags, parent_id, id, vpp); + if (error != 0 || *vpp != NULL) { + KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL, + "sfs vnode with no data"); + return (error); + } + + *vpp = vp; + return (0); +} + +static void +sfs_print_node(sfs_node_t *node) +{ + printf("\tname = %s\n", node->sn_name); + printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id); + printf("\tid = %ju\n", (uintmax_t)node->sn_id); +} + +static sfs_node_t * +sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id) +{ + struct sfs_node *node; + + KASSERT(strlen(name) < sizeof(node->sn_name), + ("sfs node name is too long")); + KASSERT(size >= sizeof(*node), ("sfs node size is too small")); + node = malloc(size, M_SFSNODES, M_WAITOK | M_ZERO); + strlcpy(node->sn_name, name, sizeof(node->sn_name)); + node->sn_parent_id = parent_id; + node->sn_id = id; + + return (node); +} + +static void +sfs_destroy_node(sfs_node_t *node) +{ + free(node, M_SFSNODES); +} + +static void * +sfs_reclaim_vnode(vnode_t *vp) +{ + sfs_node_t *node; + void *data; + + sfs_vnode_remove(vp); + data = vp->v_data; + vp->v_data = NULL; + return (data); +} + +static int +sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap, + uio_t *uio, off_t *offp) +{ + struct dirent entry; + int error; + + /* Reset ncookies for subsequent use of vfs_read_dirent. */ + if (ap->a_ncookies != NULL) + *ap->a_ncookies = 0; + + if (uio->uio_resid < sizeof(entry)) + return (SET_ERROR(EINVAL)); + + if (uio->uio_offset < 0) + return (SET_ERROR(EINVAL)); + if (uio->uio_offset == 0) { + entry.d_fileno = id; + entry.d_type = DT_DIR; + entry.d_name[0] = '.'; + entry.d_name[1] = '\0'; + entry.d_namlen = 1; + entry.d_reclen = sizeof(entry); + error = vfs_read_dirent(ap, &entry, uio->uio_offset); + if (error != 0) + return (SET_ERROR(error)); + } + + if (uio->uio_offset < sizeof(entry)) + return (SET_ERROR(EINVAL)); + if (uio->uio_offset == sizeof(entry)) { + entry.d_fileno = parent_id; + entry.d_type = DT_DIR; + entry.d_name[0] = '.'; + entry.d_name[1] = '.'; + entry.d_name[2] = '\0'; + entry.d_namlen = 2; + entry.d_reclen = sizeof(entry); + error = vfs_read_dirent(ap, &entry, uio->uio_offset); + if (error != 0) + return (SET_ERROR(error)); + } -/* include . and .. in the calculation */ -#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \ - sizeof (gfs_dirent_t)) + 1) + if (offp != NULL) + *offp = 2 * sizeof(entry); + return (0); +} /* - * Initialize the various GFS pieces we'll need to create and manipulate .zfs - * directories. This is called from the ZFS init routine, and initializes the - * vnode ops vectors that we'll be using. + * .zfs inode namespace + * + * We need to generate unique inode numbers for all files and directories + * within the .zfs pseudo-filesystem. We use the following scheme: + * + * ENTRY ZFSCTL_INODE + * .zfs 1 + * .zfs/snapshot 2 + * .zfs/snapshot/ objectid(snap) */ +#define ZFSCTL_INO_SNAP(id) (id) + +static struct vop_vector zfsctl_ops_root; +static struct vop_vector zfsctl_ops_snapdir; +static struct vop_vector zfsctl_ops_snapshot; +static struct vop_vector zfsctl_ops_shares_dir; + void zfsctl_init(void) { -#ifdef illumos - VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0); -#endif } void zfsctl_fini(void) { -#ifdef illumos - /* - * Remove vfsctl vnode ops - */ - if (zfsctl_ops_root) - vn_freevnodeops(zfsctl_ops_root); - if (zfsctl_ops_snapdir) - vn_freevnodeops(zfsctl_ops_snapdir); - if (zfsctl_ops_snapshot) - vn_freevnodeops(zfsctl_ops_snapshot); - if (zfsctl_ops_shares) - vn_freevnodeops(zfsctl_ops_shares); - if (zfsctl_ops_shares_dir) - vn_freevnodeops(zfsctl_ops_shares_dir); - - zfsctl_ops_root = NULL; - zfsctl_ops_snapdir = NULL; - zfsctl_ops_snapshot = NULL; - zfsctl_ops_shares = NULL; - zfsctl_ops_shares_dir = NULL; -#endif /* illumos */ } boolean_t @@ -208,106 +321,114 @@ zfsctl_is_node(vnode_t *vp) return (vn_matchops(vp, zfsctl_ops_root) || vn_matchops(vp, zfsctl_ops_snapdir) || vn_matchops(vp, zfsctl_ops_snapshot) || - vn_matchops(vp, zfsctl_ops_shares) || vn_matchops(vp, zfsctl_ops_shares_dir)); } -/* - * Return the inode number associated with the 'snapshot' or - * 'shares' directory. - */ -/* ARGSUSED */ -static ino64_t -zfsctl_root_inode_cb(vnode_t *vp, int index) -{ - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - - ASSERT(index < 2); - - if (index == 0) - return (ZFSCTL_INO_SNAPDIR); +typedef struct zfsctl_root { + sfs_node_t node; + sfs_node_t *snapdir; + timestruc_t cmtime; +} zfsctl_root_t; - return (zfsvfs->z_shares_dir); -} /* - * Create the '.zfs' directory. This directory is cached as part of the VFS - * structure. This results in a hold on the vfs_t. The code in zfs_umount() - * therefore checks against a vfs_count of 2 instead of 1. This reference - * is removed when the ctldir is destroyed in the unmount. + * Create the '.zfs' directory. */ void zfsctl_create(zfsvfs_t *zfsvfs) { - vnode_t *vp, *rvp; - zfsctl_node_t *zcp; + zfsctl_root_t *dot_zfs; + sfs_node_t *snapdir; + vnode_t *rvp; uint64_t crtime[2]; ASSERT(zfsvfs->z_ctldir == NULL); - vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs, - &zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries, - zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL); - zcp = vp->v_data; - zcp->zc_id = ZFSCTL_INO_ROOT; + snapdir = sfs_alloc_node(sizeof(*snapdir), "snapshot", ZFSCTL_INO_ROOT, + ZFSCTL_INO_SNAPDIR); + dot_zfs = (zfsctl_root_t *)sfs_alloc_node(sizeof(*dot_zfs), ".zfs", 0, + ZFSCTL_INO_ROOT); + dot_zfs->snapdir = snapdir; VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0); VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs), - &crtime, sizeof (crtime))); - ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime); - VN_URELE(rvp); - - /* - * We're only faking the fact that we have a root of a filesystem for - * the sake of the GFS interfaces. Undo the flag manipulation it did - * for us. - */ - vp->v_vflag &= ~VV_ROOT; + &crtime, sizeof(crtime))); + ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime); + vput(rvp); - zfsvfs->z_ctldir = vp; - - VOP_UNLOCK(vp, 0); + zfsvfs->z_ctldir = dot_zfs; } /* * Destroy the '.zfs' directory. Only called when the filesystem is unmounted. - * There might still be more references if we were force unmounted, but only - * new zfs_inactive() calls can occur and they don't reference .zfs + * The nodes must not have any associated vnodes by now as they should be + * vflush-ed. */ void zfsctl_destroy(zfsvfs_t *zfsvfs) { - VN_RELE(zfsvfs->z_ctldir); + sfs_destroy_node(zfsvfs->z_ctldir->snapdir); + sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir); zfsvfs->z_ctldir = NULL; } -/* - * Given a root znode, retrieve the associated .zfs directory. - * Add a hold to the vnode and return it. - */ -vnode_t * -zfsctl_root(znode_t *zp) +static int +zfsctl_fs_root_vnode(struct mount *mp, void *arg __unused, int flags, + struct vnode **vpp) +{ + return (VFS_ROOT(mp, flags, vpp)); +} + +static void +zfsctl_common_vnode_setup(vnode_t *vp, void *arg) { - ASSERT(zfs_has_ctldir(zp)); - VN_HOLD(zp->z_zfsvfs->z_ctldir); - return (zp->z_zfsvfs->z_ctldir); + ASSERT_VOP_ELOCKED(vp, __func__); + + /* We support shared locking. */ + VN_LOCK_ASHARE(vp); + vp->v_type = VDIR; + vp->v_data = arg; } static int -zfsctl_common_print(ap) - struct vop_print_args /* { - struct vnode *a_vp; - } */ *ap; +zfsctl_root_vnode(struct mount *mp, void *arg __unused, int flags, + struct vnode **vpp) { - vnode_t *vp = ap->a_vp; - gfs_file_t *fp = vp->v_data; + void *node; + int err; - printf(" parent = %p\n", fp->gfs_parent); - printf(" type = %d\n", fp->gfs_type); - printf(" index = %d\n", fp->gfs_index); - printf(" ino = %ju\n", (uintmax_t)fp->gfs_ino); - return (0); + node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir; + err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root, + zfsctl_common_vnode_setup, node, vpp); + return (err); +} + +static int +zfsctl_snapdir_vnode(struct mount *mp, void *arg __unused, int flags, + struct vnode **vpp) +{ + void *node; + int err; + + node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir->snapdir; + err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs", + &zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp); + return (err); +} + +/* + * Given a root znode, retrieve the associated .zfs directory. + * Add a hold to the vnode and return it. + */ +int +zfsctl_root(zfsvfs_t *zfsvfs, int flags, vnode_t **vpp) +{ + vnode_t *vp; + int error; + + error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp); + return (error); } /* @@ -350,18 +471,8 @@ zfsctl_common_access(ap) { accmode_t accmode = ap->a_accmode; -#ifdef TODO - if (flags & V_ACE_MASK) { - if (accmode & ACE_ALL_WRITE_PERMS) - return (SET_ERROR(EACCES)); - } else { -#endif - if (accmode & VWRITE) - return (SET_ERROR(EACCES)); -#ifdef TODO - } -#endif - + if (accmode & VWRITE) + return (SET_ERROR(EACCES)); return (0); } @@ -372,6 +483,9 @@ static void zfsctl_common_getattr(vnode_t *vp, vattr_t *vap) { timestruc_t now; + sfs_node_t *node; + + node = vp->v_data; vap->va_uid = 0; vap->va_gid = 0; @@ -394,6 +508,11 @@ zfsctl_common_getattr(vnode_t *vp, vattr vap->va_atime = now; /* FreeBSD: Reset chflags(2) flags. */ vap->va_flags = 0; + + vap->va_nodeid = node->sn_id; + + /* At least '.' and '..'. */ + vap->va_nlink = 2; } /*ARGSUSED*/ @@ -406,81 +525,46 @@ zfsctl_common_fid(ap) { vnode_t *vp = ap->a_vp; fid_t *fidp = (void *)ap->a_fid; - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - zfsctl_node_t *zcp = vp->v_data; - uint64_t object = zcp->zc_id; + sfs_node_t *node = vp->v_data; + uint64_t object = node->sn_id; zfid_short_t *zfid; int i; - ZFS_ENTER(zfsvfs); - -#ifdef illumos - if (fidp->fid_len < SHORT_FID_LEN) { - fidp->fid_len = SHORT_FID_LEN; - ZFS_EXIT(zfsvfs); - return (SET_ERROR(ENOSPC)); - } -#endif - zfid = (zfid_short_t *)fidp; - zfid->zf_len = SHORT_FID_LEN; - for (i = 0; i < sizeof (zfid->zf_object); i++) + for (i = 0; i < sizeof(zfid->zf_object); i++) zfid->zf_object[i] = (uint8_t)(object >> (8 * i)); - /* .zfs znodes always have a generation number of 0 */ - for (i = 0; i < sizeof (zfid->zf_gen); i++) + /* .zfs nodes always have a generation number of 0 */ + for (i = 0; i < sizeof(zfid->zf_gen); i++) zfid->zf_gen[i] = 0; - ZFS_EXIT(zfsvfs); return (0); } - -/*ARGSUSED*/ static int -zfsctl_shares_fid(ap) - struct vop_fid_args /* { +zfsctl_common_reclaim(ap) + struct vop_reclaim_args /* { struct vnode *a_vp; - struct fid *a_fid; + struct thread *a_td; } */ *ap; { - vnode_t *vp = ap->a_vp; - fid_t *fidp = (void *)ap->a_fid; - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - znode_t *dzp; - int error; - - ZFS_ENTER(zfsvfs); - - if (zfsvfs->z_shares_dir == 0) { - ZFS_EXIT(zfsvfs); - return (SET_ERROR(ENOTSUP)); - } - - if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) { - error = VOP_FID(ZTOV(dzp), fidp); - VN_RELE(ZTOV(dzp)); - } + vnode_t *vp = ap->a_vp; - ZFS_EXIT(zfsvfs); - return (error); + (void) sfs_reclaim_vnode(vp); + return (0); } -/* - * .zfs inode namespace - * - * We need to generate unique inode numbers for all files and directories - * within the .zfs pseudo-filesystem. We use the following scheme: - * - * ENTRY ZFSCTL_INODE - * .zfs 1 - * .zfs/snapshot 2 - * .zfs/snapshot/ objectid(snap) - */ - -#define ZFSCTL_INO_SNAP(id) (id) +static int +zfsctl_common_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + sfs_print_node(ap->a_vp->v_data); + return (0); +} /* * Get root directory attributes. @@ -496,156 +580,132 @@ zfsctl_root_getattr(ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; - zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; - zfsctl_node_t *zcp = vp->v_data; - - ZFS_ENTER(zfsvfs); - vap->va_nodeid = ZFSCTL_INO_ROOT; - vap->va_nlink = vap->va_size = NROOT_ENTRIES; - vap->va_mtime = vap->va_ctime = zcp->zc_cmtime; - vap->va_birthtime = vap->va_ctime; + zfsctl_root_t *node = vp->v_data; zfsctl_common_getattr(vp, vap); - ZFS_EXIT(zfsvfs); - + vap->va_ctime = node->cmtime; + vap->va_mtime = vap->va_ctime; + vap->va_birthtime = vap->va_ctime; + vap->va_nlink += 1; /* snapdir */ + vap->va_size = vap->va_nlink; return (0); } /* - * Special case the handling of "..". + * When we lookup "." we still can be asked to lock it + * differently, can't we? */ -/* ARGSUSED */ int -zfsctl_root_lookup(vnode_t *dvp, char *nm, vnode_t **vpp, pathname_t *pnp, - int flags, vnode_t *rdir, cred_t *cr, caller_context_t *ct, - int *direntflags, pathname_t *realpnp) +zfsctl_relock_dot(vnode_t *dvp, int ltype) { - zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data; - int err; - - /* - * No extended attributes allowed under .zfs - */ - if (flags & LOOKUP_XATTR) - return (SET_ERROR(EINVAL)); - - ZFS_ENTER(zfsvfs); - - if (strcmp(nm, "..") == 0) { -#ifdef illumos - err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp); -#else - /* - * NB: can not use VFS_ROOT here as it would acquire - * the vnode lock of the parent (root) vnode while - * holding the child's (.zfs) lock. - */ - znode_t *rootzp; - - err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp); - if (err == 0) - *vpp = ZTOV(rootzp); -#endif - } else { - err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir, - cr, ct, direntflags, realpnp); + vref(dvp); + if (ltype != VOP_ISLOCKED(dvp)) { + if (ltype == LK_EXCLUSIVE) + vn_lock(dvp, LK_UPGRADE | LK_RETRY); + else /* if (ltype == LK_SHARED) */ + vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); + + /* Relock for the "." case may left us with reclaimed vnode. */ + if ((dvp->v_iflag & VI_DOOMED) != 0) { + vrele(dvp); + return (SET_ERROR(ENOENT)); + } } - - ZFS_EXIT(zfsvfs); - - return (err); + return (0); } -static int -zfsctl_freebsd_root_lookup(ap) +/* + * Special case the handling of "..". + */ +int +zfsctl_root_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { + struct componentname *cnp = ap->a_cnp; vnode_t *dvp = ap->a_dvp; vnode_t **vpp = ap->a_vpp; cred_t *cr = ap->a_cnp->cn_cred; int flags = ap->a_cnp->cn_flags; int lkflags = ap->a_cnp->cn_lkflags; int nameiop = ap->a_cnp->cn_nameiop; - char nm[NAME_MAX + 1]; int err; + int ltype; - if ((flags & ISLASTCN) && (nameiop == RENAME || nameiop == CREATE)) - return (EOPNOTSUPP); + ASSERT(dvp->v_type == VDIR); - ASSERT(ap->a_cnp->cn_namelen < sizeof(nm)); - strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1); -relookup: - err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL); - if (err == 0 && (nm[0] != '.' || nm[1] != '\0')) { - if (flags & ISDOTDOT) { - VOP_UNLOCK(dvp, 0); - err = vn_lock(*vpp, lkflags); - if (err != 0) { - vrele(*vpp); - *vpp = NULL; - } - vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); - } else { - err = vn_lock(*vpp, LK_EXCLUSIVE); - if (err != 0) { - VERIFY3S(err, ==, ENOENT); - goto relookup; - } - } - } - return (err); -} + if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP) + return (SET_ERROR(ENOTSUP)); -static int -zfsctl_root_print(ap) - struct vop_print_args /* { + if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') { + err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK); + if (err == 0) + *vpp = dvp; + } else if ((flags & ISDOTDOT) != 0) { + err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL, + lkflags, vpp); + } else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) { + err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp); + } else { + err = SET_ERROR(ENOENT); + } + if (err != 0) + *vpp = NULL; + return (err); +} + +static int +zfsctl_root_readdir(ap) + struct vop_readdir_args /* { struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *ncookies; + u_long **a_cookies; } */ *ap; { - printf(" .zfs node\n"); - zfsctl_common_print(ap); + struct dirent entry; + vnode_t *vp = ap->a_vp; + zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data; + zfsctl_root_t *node = vp->v_data; + uio_t *uio = ap->a_uio; + int *eofp = ap->a_eofflag; + off_t dots_offset; + int error; + + ASSERT(vp->v_type == VDIR); + + error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio, *** DIFF OUTPUT TRUNCATED AT 1000 LINES ***