Skip site navigation (1)Skip section navigation (2)
Date:      Fri, 20 Apr 2012 06:50:44 +0000 (UTC)
From:      Kirk McKusick <mckusick@FreeBSD.org>
To:        src-committers@freebsd.org, svn-src-all@freebsd.org, svn-src-head@freebsd.org
Subject:   svn commit: r234482 - in head/sys: fs/msdosfs fs/nfsserver kern sys
Message-ID:  <201204200650.q3K6oiqO026673@svn.freebsd.org>

next in thread | raw e-mail | index | archive | help
Author: mckusick
Date: Fri Apr 20 06:50:44 2012
New Revision: 234482
URL: http://svn.freebsd.org/changeset/base/234482

Log:
  This change creates a new list of active vnodes associated with
  a mount point. Active vnodes are those with a non-zero use or hold
  count, e.g., those vnodes that are not on the free list. Note that
  this list is in addition to the list of all the vnodes associated
  with a mount point.
  
  To avoid adding another set of linkage pointers to the vnode
  structure, the active list uses the existing linkage pointers
  used by the free list (previously named v_freelist, now renamed
  v_actfreelist).
  
  This update adds the MNT_VNODE_FOREACH_ACTIVE interface that loops
  over just the active vnodes associated with a mount point (typically
  less than 1% of the vnodes associated with the mount point).
  
  Reviewed by: kib
  Tested by:   Peter Holm
  MFC after:   2 weeks

Modified:
  head/sys/fs/msdosfs/msdosfs_vfsops.c
  head/sys/fs/nfsserver/nfs_nfsdport.c
  head/sys/kern/vfs_mount.c
  head/sys/kern/vfs_subr.c
  head/sys/sys/mount.h
  head/sys/sys/vnode.h

Modified: head/sys/fs/msdosfs/msdosfs_vfsops.c
==============================================================================
--- head/sys/fs/msdosfs/msdosfs_vfsops.c	Fri Apr 20 04:40:39 2012	(r234481)
+++ head/sys/fs/msdosfs/msdosfs_vfsops.c	Fri Apr 20 06:50:44 2012	(r234482)
@@ -834,7 +834,7 @@ msdosfs_unmount(struct mount *mp, int mn
 		vn_printf(vp,
 		    "msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("freef %p, freeb %p, mount %p\n",
-		    TAILQ_NEXT(vp, v_freelist), vp->v_freelist.tqe_prev,
+		    TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd),

Modified: head/sys/fs/nfsserver/nfs_nfsdport.c
==============================================================================
--- head/sys/fs/nfsserver/nfs_nfsdport.c	Fri Apr 20 04:40:39 2012	(r234481)
+++ head/sys/fs/nfsserver/nfs_nfsdport.c	Fri Apr 20 06:50:44 2012	(r234482)
@@ -2907,12 +2907,14 @@ nfsd_mntinit(void)
 	inited = 1;
 	nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
 	TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
+	TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
 	nfsv4root_mnt.mnt_export = NULL;
 	TAILQ_INIT(&nfsv4root_opt);
 	TAILQ_INIT(&nfsv4root_newopt);
 	nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
 	nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
 	nfsv4root_mnt.mnt_nvnodelistsize = 0;
+	nfsv4root_mnt.mnt_activevnodelistsize = 0;
 }
 
 /*

Modified: head/sys/kern/vfs_mount.c
==============================================================================
--- head/sys/kern/vfs_mount.c	Fri Apr 20 04:40:39 2012	(r234481)
+++ head/sys/kern/vfs_mount.c	Fri Apr 20 06:50:44 2012	(r234482)
@@ -461,6 +461,8 @@ vfs_mount_alloc(struct vnode *vp, struct
 	    __rangeof(struct mount, mnt_startzero, mnt_endzero));
 	TAILQ_INIT(&mp->mnt_nvnodelist);
 	mp->mnt_nvnodelistsize = 0;
+	TAILQ_INIT(&mp->mnt_activevnodelist);
+	mp->mnt_activevnodelistsize = 0;
 	mp->mnt_ref = 0;
 	(void) vfs_busy(mp, MBF_NOWAIT);
 	mp->mnt_op = vfsp->vfc_vfsops;
@@ -514,6 +516,8 @@ vfs_mount_destroy(struct mount *mp)
 	}
 	if (mp->mnt_nvnodelistsize != 0)
 		panic("vfs_mount_destroy: nonzero nvnodelistsize");
+	if (mp->mnt_activevnodelistsize != 0)
+		panic("vfs_mount_destroy: nonzero activevnodelistsize");
 	if (mp->mnt_lockref != 0)
 		panic("vfs_mount_destroy: nonzero lock refcount");
 	MNT_IUNLOCK(mp);

Modified: head/sys/kern/vfs_subr.c
==============================================================================
--- head/sys/kern/vfs_subr.c	Fri Apr 20 04:40:39 2012	(r234481)
+++ head/sys/kern/vfs_subr.c	Fri Apr 20 06:50:44 2012	(r234482)
@@ -775,12 +775,16 @@ vnlru_free(int count)
 			break;
 		VNASSERT(vp->v_op != NULL, vp,
 		    ("vnlru_free: vnode already reclaimed."));
-		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		KASSERT((vp->v_iflag & VI_FREE) != 0,
+		    ("Removing vnode not on freelist"));
+		KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+		    ("Mangling active vnode"));
+		TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
 		/*
 		 * Don't recycle if we can't get the interlock.
 		 */
 		if (!VI_TRYLOCK(vp)) {
-			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
 			continue;
 		}
 		VNASSERT(VCANRECYCLE(vp), vp,
@@ -1035,12 +1039,26 @@ static void
 delmntque(struct vnode *vp)
 {
 	struct mount *mp;
+	int active;
 
 	mp = vp->v_mount;
 	if (mp == NULL)
 		return;
 	MNT_ILOCK(mp);
+	VI_LOCK(vp);
+	KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize,
+	    ("Active vnode list size %d > Vnode list size %d",
+	     mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize));
+	active = vp->v_iflag & VI_ACTIVE;
+	vp->v_iflag &= ~VI_ACTIVE;
+	if (active) {
+		mtx_lock(&vnode_free_list_mtx);
+		TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
+		mp->mnt_activevnodelistsize--;
+		mtx_unlock(&vnode_free_list_mtx);
+	}
 	vp->v_mount = NULL;
+	VI_UNLOCK(vp);
 	VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
 		("bad mount point vnode list size"));
 	TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
@@ -1080,13 +1098,24 @@ insmntque1(struct vnode *vp, struct moun
 		ASSERT_VOP_ELOCKED(vp,
 		    "insmntque: mp-safe fs and non-locked vp");
 #endif
+	/*
+	 * We acquire the vnode interlock early to ensure that the
+	 * vnode cannot be recycled by another process releasing a
+	 * holdcnt on it before we get it on both the vnode list
+	 * and the active vnode list. The mount mutex protects only
+	 * manipulation of the vnode list and the vnode freelist
+	 * mutex protects only manipulation of the active vnode list.
+	 * Hence the need to hold the vnode interlock throughout.
+	 */
 	MNT_ILOCK(mp);
+	VI_LOCK(vp);
 	if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 &&
 	    ((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
 	     mp->mnt_nvnodelistsize == 0)) {
 		locked = VOP_ISLOCKED(vp);
 		if (!locked || (locked == LK_EXCLUSIVE &&
 		     (vp->v_vflag & VV_FORCEINSMQ) == 0)) {
+			VI_UNLOCK(vp);
 			MNT_IUNLOCK(mp);
 			if (dtr != NULL)
 				dtr(vp, dtr_arg);
@@ -1099,6 +1128,14 @@ insmntque1(struct vnode *vp, struct moun
 	VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
 		("neg mount point vnode list size"));
 	mp->mnt_nvnodelistsize++;
+	KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+	    ("Activating already active vnode"));
+	vp->v_iflag |= VI_ACTIVE;
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
+	mp->mnt_activevnodelistsize++;
+	mtx_unlock(&vnode_free_list_mtx);
+	VI_UNLOCK(vp);
 	MNT_IUNLOCK(mp);
 	return (0);
 }
@@ -2309,6 +2346,7 @@ vhold(struct vnode *vp)
 void
 vholdl(struct vnode *vp)
 {
+	struct mount *mp;
 
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
 	vp->v_holdcnt++;
@@ -2318,12 +2356,19 @@ vholdl(struct vnode *vp)
 	VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free"));
 	VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
 	/*
-	 * Remove a vnode from the free list and mark it as in use.
+	 * Remove a vnode from the free list, mark it as in use,
+	 * and put it on the active list.
 	 */
 	mtx_lock(&vnode_free_list_mtx);
-	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+	TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
 	freevnodes--;
 	vp->v_iflag &= ~(VI_FREE|VI_AGE);
+	KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+	    ("Activating already active vnode"));
+	vp->v_iflag |= VI_ACTIVE;
+	mp = vp->v_mount;
+	TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
+	mp->mnt_activevnodelistsize++;
 	mtx_unlock(&vnode_free_list_mtx);
 }
 
@@ -2348,6 +2393,8 @@ void
 vdropl(struct vnode *vp)
 {
 	struct bufobj *bo;
+	struct mount *mp;
+	int active;
 
 	ASSERT_VI_LOCKED(vp, "vdropl");
 	CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
@@ -2360,19 +2407,28 @@ vdropl(struct vnode *vp)
 	}
 	if ((vp->v_iflag & VI_DOOMED) == 0) {
 		/*
-		 * Mark a vnode as free, putting it up for recycling.
+		 * Mark a vnode as free: remove it from its active list
+		 * and put it up for recycling on the freelist.
 		 */
-		mtx_lock(&vnode_free_list_mtx);
 		VNASSERT(vp->v_op != NULL, vp,
 		    ("vdropl: vnode already reclaimed."));
 		VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
 		    ("vnode already free"));
 		VNASSERT(VSHOULDFREE(vp), vp,
 		    ("vdropl: freeing when we shouldn't"));
+		active = vp->v_iflag & VI_ACTIVE;
+		vp->v_iflag &= ~VI_ACTIVE;
+		mp = vp->v_mount;
+		mtx_lock(&vnode_free_list_mtx);
+		if (active) {
+			TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
+			    v_actfreelist);
+			mp->mnt_activevnodelistsize--;
+		}
 		if (vp->v_iflag & VI_AGE) {
-			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_actfreelist);
 		} else {
-			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
 		}
 		freevnodes++;
 		vp->v_iflag &= ~VI_AGE;
@@ -3010,6 +3066,8 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	db_printf("    mnt_ref = %d\n", mp->mnt_ref);
 	db_printf("    mnt_gen = %d\n", mp->mnt_gen);
 	db_printf("    mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
+	db_printf("    mnt_activevnodelistsize = %d\n",
+	    mp->mnt_activevnodelistsize);
 	db_printf("    mnt_writeopcount = %d\n", mp->mnt_writeopcount);
 	db_printf("    mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
 	db_printf("    mnt_iosize_max = %d\n", mp->mnt_iosize_max);
@@ -3019,15 +3077,23 @@ DB_SHOW_COMMAND(mount, db_show_mount)
 	    mp->mnt_secondary_accwrites);
 	db_printf("    mnt_gjprovider = %s\n",
 	    mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
-	db_printf("\n");
 
-	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
+	db_printf("\n\nList of active vnodes\n");
+	TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
 		if (vp->v_type != VMARKER) {
 			vn_printf(vp, "vnode ");
 			if (db_pager_quit)
 				break;
 		}
 	}
+	db_printf("\n\nList of inactive vnodes\n");
+	TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
+		if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) {
+			vn_printf(vp, "vnode ");
+			if (db_pager_quit)
+				break;
+		}
+	}
 }
 #endif	/* DDB */
 
@@ -4558,3 +4624,100 @@ __mnt_vnode_markerfree_all(struct vnode 
 	free(*mvp, M_VNODE_MARKER);
 	*mvp = NULL;
 }
+
+/*
+ * These are helper functions for filesystems to traverse their
+ * active vnodes.  See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
+ */
+struct vnode *
+__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+	struct vnode *vp, *nvp;
+
+	if (should_yield())
+		kern_yield(PRI_UNCHANGED);
+	MNT_ILOCK(mp);
+	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+	vp = TAILQ_NEXT(*mvp, v_actfreelist);
+	while (vp != NULL) {
+		VI_LOCK(vp);
+		if (vp->v_mount == mp && vp->v_type != VMARKER &&
+		    (vp->v_iflag & VI_DOOMED) == 0)
+			break;
+		nvp = TAILQ_NEXT(vp, v_actfreelist);
+		VI_UNLOCK(vp);
+		vp = nvp;
+	}
+
+	/* Check if we are done */
+	if (vp == NULL) {
+		__mnt_vnode_markerfree_active(mvp, mp);
+		/* MNT_IUNLOCK(mp); -- done in above function */
+		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+		return (NULL);
+	}
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
+	TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
+	mtx_unlock(&vnode_free_list_mtx);
+	MNT_IUNLOCK(mp);
+	return (vp);
+}
+
+struct vnode *
+__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
+{
+	struct vnode *vp, *nvp;
+
+	*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
+	MNT_ILOCK(mp);
+	MNT_REF(mp);
+	(*mvp)->v_type = VMARKER;
+
+	vp = TAILQ_NEXT(*mvp, v_actfreelist);
+	while (vp != NULL) {
+		VI_LOCK(vp);
+		if (vp->v_mount == mp && vp->v_type != VMARKER &&
+		    (vp->v_iflag & VI_DOOMED) == 0)
+			break;
+		nvp = TAILQ_NEXT(vp, v_actfreelist);
+		VI_UNLOCK(vp);
+		vp = nvp;
+	}
+
+	/* Check if we are done */
+	if (vp == NULL) {
+		MNT_REL(mp);
+		MNT_IUNLOCK(mp);
+		free(*mvp, M_VNODE_MARKER);
+		*mvp = NULL;
+		return (NULL);
+	}
+	(*mvp)->v_mount = mp;
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
+	mtx_unlock(&vnode_free_list_mtx);
+	MNT_IUNLOCK(mp);
+	return (vp);
+}
+
+void
+__mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
+{
+
+	if (*mvp == NULL) {
+		MNT_IUNLOCK(mp);
+		return;
+	}
+
+	mtx_assert(MNT_MTX(mp), MA_OWNED);
+
+	KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+	mtx_lock(&vnode_free_list_mtx);
+	TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
+	mtx_unlock(&vnode_free_list_mtx);
+	MNT_REL(mp);
+	MNT_IUNLOCK(mp);
+	free(*mvp, M_VNODE_MARKER);
+	*mvp = NULL;
+}

Modified: head/sys/sys/mount.h
==============================================================================
--- head/sys/sys/mount.h	Fri Apr 20 04:40:39 2012	(r234481)
+++ head/sys/sys/mount.h	Fri Apr 20 06:50:44 2012	(r234482)
@@ -164,6 +164,8 @@ struct mount {
 	int		mnt_ref;		/* (i) Reference count */
 	struct vnodelst	mnt_nvnodelist;		/* (i) list of vnodes */
 	int		mnt_nvnodelistsize;	/* (i) # of vnodes */
+	struct vnodelst	mnt_activevnodelist;	/* (i) list of active vnodes */
+	int		mnt_activevnodelistsize;/* (i) # of active vnodes */
 	int		mnt_writeopcount;	/* (i) write syscalls pending */
 	int		mnt_kern_flag;		/* (i) kernel only flags */
 	uint64_t	mnt_flag;		/* (i) flags shared with user */
@@ -207,6 +209,25 @@ void          __mnt_vnode_markerfree_all
 	} while (0)
 
 /*
+ * Definitions for MNT_VNODE_FOREACH_ACTIVE.
+ */
+struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp);
+struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp);
+void          __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
+
+#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
+	for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \
+		(vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp)))
+
+#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp)				\
+	do {								\
+		MNT_ILOCK(mp);						\
+		__mnt_vnode_markerfree_active(&(mvp), (mp));		\
+		/* MNT_IUNLOCK(mp); -- done in above function */	\
+		mtx_assert(MNT_MTX(mp), MA_NOTOWNED);			\
+	} while (0)
+
+/*
  * Definitions for MNT_VNODE_FOREACH.
  *
  * This interface has been deprecated in favor of MNT_VNODE_FOREACH_ALL.

Modified: head/sys/sys/vnode.h
==============================================================================
--- head/sys/sys/vnode.h	Fri Apr 20 04:40:39 2012	(r234481)
+++ head/sys/sys/vnode.h	Fri Apr 20 06:50:44 2012	(r234482)
@@ -156,7 +156,7 @@ struct vnode {
 	/*
 	 * The machinery of being a vnode
 	 */
-	TAILQ_ENTRY(vnode) v_freelist;		/* f vnode freelist */
+	TAILQ_ENTRY(vnode) v_actfreelist;	/* f vnode active/free lists */
 	struct bufobj	v_bufobj;		/* * Buffer cache object */
 
 	/*
@@ -232,6 +232,7 @@ struct xvnode {
 #define	VI_AGE		0x0040	/* Insert vnode at head of free list */
 #define	VI_DOOMED	0x0080	/* This vnode is being recycled */
 #define	VI_FREE		0x0100	/* This vnode is on the freelist */
+#define	VI_ACTIVE	0x0200	/* This vnode is on the active list */
 #define	VI_DOINGINACT	0x0800	/* VOP_INACTIVE is in progress */
 #define	VI_OWEINACT	0x1000	/* Need to call inactive */
 



Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?201204200650.q3K6oiqO026673>